/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 117 - (show annotations)
Sun Sep 11 13:05:06 2005 UTC (18 years, 8 months ago) by dpavlin
File size: 14402 byte(s)
added node search

1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14
15 use constant BPC_FTYPE_DIR => 5;
16 use constant EST_CHUNK => 100000;
17
18 my $debug = 0;
19 $|=1;
20
21 my $start_t = time();
22
23 my $pidfile = new File::Pid;
24
25 if (my $pid = $pidfile->running ) {
26 die "$0 already running: $pid\n";
27 } elsif ($pidfile->pid ne $$) {
28 $pidfile->remove;
29 $pidfile = new File::Pid;
30 }
31 $pidfile->write;
32 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
33
34 my $t_fmt = '%Y-%m-%d %H:%M:%S';
35
36 my $hosts;
37 my $bpc = BackupPC::Lib->new || die;
38 my %Conf = $bpc->Conf();
39 my $TopDir = $bpc->TopDir();
40 my $beenThere = {};
41
42 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
43 my $user = $Conf{SearchUser} || '';
44
45 my $use_hest = $Conf{HyperEstraierIndex};
46 my ($index_path, $index_node_url) = getHyperEstraier_url($use_hest);
47
48 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
49
50 my %opt;
51
52 if ( !getopts("cdm:v:i", \%opt ) ) {
53 print STDERR <<EOF;
54 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
55
56 Options:
57 -c create database on first use
58 -d delete database before import
59 -m num import just num increments for one host
60 -v num set verbosity (debug) level (default $debug)
61 -i update HyperEstraier full text index
62 EOF
63 exit 1;
64 }
65
66 if ($opt{v}) {
67 print "Debug level at $opt{v}\n";
68 $debug = $opt{v};
69 }
70
71 #---- subs ----
72
73 sub fmt_time {
74 my $t = shift || return;
75 my $out = "";
76 my ($ss,$mm,$hh) = gmtime($t);
77 $out .= "${hh}h" if ($hh);
78 $out .= sprintf("%02d:%02d", $mm,$ss);
79 return $out;
80 }
81
82 sub curr_time {
83 return strftime($t_fmt,localtime());
84 }
85
86 my $hest_db;
87 my $hest_node;
88
89 sub signal {
90 my($sig) = @_;
91 if ($hest_db) {
92 print "\nCaught a SIG$sig--syncing database and shutting down\n";
93 $hest_db->sync();
94 $hest_db->close();
95 }
96 exit(0);
97 }
98
99 $SIG{'INT'} = \&signal;
100 $SIG{'QUIT'} = \&signal;
101
102 sub hest_update {
103
104 my ($host_id, $share_id, $num) = @_;
105
106 unless ($use_hest) {
107 print STDERR "HyperEstraier support not enabled in configuration\n";
108 return;
109 }
110
111 print curr_time," updating HyperEstraier:";
112
113 my $t = time();
114
115 my $offset = 0;
116 my $added = 0;
117
118 print " opening index $use_hest";
119 if ($index_path) {
120 $hest_db = HyperEstraier::Database->new();
121 $hest_db->open($index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
122 print " directly";
123 } elsif ($index_node_url) {
124 $hest_node ||= HyperEstraier::Node->new($index_node_url);
125 $hest_node->set_auth('admin', 'admin');
126 print " via node URL";
127 } else {
128 die "don't know how to use HyperEstraier Index $use_hest";
129 }
130 print " increment is " . EST_CHUNK . " files:";
131
132 my $results = 0;
133
134 do {
135
136 my $where = '';
137 my @data;
138 if ($host_id && $share_id && $num) {
139 $where = qq{
140 WHERE
141 hosts.id = ? AND
142 shares.id = ? AND
143 files.backupnum = ?
144 };
145 @data = ( $host_id, $share_id, $num );
146 }
147
148 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
149
150 my $sth = $dbh->prepare(qq{
151 SELECT
152 files.id AS fid,
153 hosts.name AS hname,
154 shares.name AS sname,
155 -- shares.share AS sharename,
156 files.backupnum AS backupnum,
157 -- files.name AS filename,
158 files.path AS filepath,
159 files.date AS date,
160 files.type AS type,
161 files.size AS size,
162 files.shareid AS shareid,
163 backups.date AS backup_date
164 FROM files
165 INNER JOIN shares ON files.shareID=shares.ID
166 INNER JOIN hosts ON hosts.ID = shares.hostID
167 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
168 $where
169 $limit
170 });
171
172 $sth->execute(@data);
173 $results = $sth->rows;
174
175 if ($results == 0) {
176 print " - no new files\n";
177 last;
178 }
179
180 sub fmt_date {
181 my $t = shift || return;
182 my $iso = BackupPC::Lib::timeStamp($t);
183 $iso =~ s/\s/T/;
184 return $iso;
185 }
186
187 while (my $row = $sth->fetchrow_hashref()) {
188
189 my $fid = $row->{'fid'} || die "no fid?";
190 my $uri = 'file:///' . $fid;
191
192 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
193 next unless ($id == -1);
194
195 # create a document object
196 my $doc = HyperEstraier::Document->new;
197
198 # add attributes to the document object
199 $doc->add_attr('@uri', $uri);
200
201 foreach my $c (@{ $sth->{NAME} }) {
202 $doc->add_attr($c, $row->{$c}) if ($row->{$c});
203 }
204
205 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
206
207 # add the body text to the document object
208 my $path = $row->{'filepath'};
209 $doc->add_text($path);
210 $path =~ s/(.)/$1 /g;
211 $doc->add_hidden_text($path);
212
213 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
214
215 # register the document object to the database
216 if ($hest_db) {
217 $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
218 } elsif ($hest_node) {
219 $hest_node->put_doc($doc);
220 } else {
221 die "not supported";
222 }
223 $added++;
224 }
225
226 print " $added";
227 $hest_db->sync() if ($index_path);
228
229 $offset += EST_CHUNK;
230
231 } while ($results == EST_CHUNK);
232
233 if ($index_path) {
234 print ", close";
235 $hest_db->close();
236 }
237
238 my $dur = (time() - $t) || 1;
239 printf(" [%.2f/s dur: %s]\n",
240 ( $added / $dur ),
241 fmt_time($dur)
242 );
243 }
244
245 #---- /subs ----
246
247
248 ## update index ##
249 if (($opt{i} || ($index_path && ! -e $index_path)) && !$opt{c}) {
250 # update all
251 print "force update of HyperEstraier index ";
252 print "importing existing data" unless (-e $index_path);
253 print "by -i flag" if ($opt{i});
254 print "\n";
255 hest_update();
256 }
257
258 ## create tables ##
259 if ($opt{c}) {
260 sub do_index {
261 my $index = shift || return;
262 my ($table,$col,$unique) = split(/_/, $index);
263 $unique ||= '';
264 $index =~ s/,/_/g;
265 $dbh->do(qq{ create $unique index $index on $table($col) });
266 }
267
268 print "creating tables...\n";
269
270 $dbh->do(qq{
271 create table hosts (
272 ID SERIAL PRIMARY KEY,
273 name VARCHAR(30) NOT NULL,
274 IP VARCHAR(15)
275 );
276 });
277
278 $dbh->do(qq{
279 create table shares (
280 ID SERIAL PRIMARY KEY,
281 hostID INTEGER NOT NULL references hosts(id),
282 name VARCHAR(30) NOT NULL,
283 share VARCHAR(200) NOT NULL,
284 localpath VARCHAR(200)
285 );
286 });
287
288 $dbh->do(qq{
289 create table backups (
290 hostID INTEGER NOT NULL references hosts(id),
291 num INTEGER NOT NULL,
292 date integer NOT NULL,
293 type CHAR(4) not null,
294 shareID integer not null references shares(id),
295 size integer not null,
296 PRIMARY KEY(hostID, num, shareID)
297 );
298 });
299
300 #do_index('backups_hostid,num_unique');
301
302 $dbh->do(qq{
303 create table dvds (
304 ID SERIAL PRIMARY KEY,
305 num INTEGER NOT NULL,
306 name VARCHAR(255) NOT NULL,
307 mjesto VARCHAR(255)
308 );
309 });
310
311 $dbh->do(qq{
312 create table files (
313 ID SERIAL PRIMARY KEY,
314 shareID INTEGER NOT NULL references shares(id),
315 backupNum INTEGER NOT NULL,
316 name VARCHAR(255) NOT NULL,
317 path VARCHAR(255) NOT NULL,
318 date integer NOT NULL,
319 type INTEGER NOT NULL,
320 size INTEGER NOT NULL,
321 dvdid INTEGER references dvds(id)
322 );
323 });
324
325 print "creating indexes:";
326
327 foreach my $index (qw(
328 hosts_name
329 backups_hostID
330 backups_num
331 shares_hostID
332 shares_name
333 files_shareID
334 files_path
335 files_name
336 files_date
337 files_size
338 )) {
339 print " $index";
340 do_index($index);
341 }
342 print "...\n";
343
344 $dbh->commit;
345
346 }
347
348 ## delete data before inseting ##
349 if ($opt{d}) {
350 print "deleting ";
351 foreach my $table (qw(files dvds backups shares hosts)) {
352 print "$table ";
353 $dbh->do(qq{ DELETE FROM $table });
354 }
355 print " done...\n";
356
357 $dbh->commit;
358 }
359
360 ## insert new values ##
361
362 # get hosts
363 $hosts = $bpc->HostInfoRead();
364 my $hostID;
365 my $shareID;
366
367 my $sth;
368
369 $sth->{insert_hosts} = $dbh->prepare(qq{
370 INSERT INTO hosts (name, IP) VALUES (?,?)
371 });
372
373 $sth->{hosts_by_name} = $dbh->prepare(qq{
374 SELECT ID FROM hosts WHERE name=?
375 });
376
377 $sth->{backups_count} = $dbh->prepare(qq{
378 SELECT COUNT(*)
379 FROM backups
380 WHERE hostID=? AND num=? AND shareid=?
381 });
382
383 $sth->{insert_backups} = $dbh->prepare(qq{
384 INSERT INTO backups (hostID, num, date, type, shareid, size)
385 VALUES (?,?,?,?,?,?)
386 });
387
388 $sth->{insert_files} = $dbh->prepare(qq{
389 INSERT INTO files
390 (shareID, backupNum, name, path, date, type, size)
391 VALUES (?,?,?,?,?,?,?)
392 });
393
394 foreach my $host_key (keys %{$hosts}) {
395
396 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
397
398 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
399
400 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
401 $sth->{insert_hosts}->execute(
402 $hosts->{$host_key}->{'host'},
403 $hosts->{$host_key}->{'ip'}
404 );
405
406 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
407 }
408
409 print "host ".$hosts->{$host_key}->{'host'}.": ";
410
411 # get backups for a host
412 my @backups = $bpc->BackupInfoRead($hostname);
413 my $incs = scalar @backups;
414 print "$incs increments\n";
415
416 my $inc_nr = 0;
417 $beenThere = {};
418
419 foreach my $backup (@backups) {
420
421 $inc_nr++;
422 last if ($opt{m} && $inc_nr > $opt{m});
423
424 my $backupNum = $backup->{'num'};
425 my @backupShares = ();
426
427 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
428 $hosts->{$host_key}->{'host'},
429 $inc_nr, $incs, $backupNum,
430 $backup->{type} || '?',
431 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
432 strftime($t_fmt,localtime($backup->{startTime})),
433 fmt_time($backup->{endTime} - $backup->{startTime})
434 );
435
436 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
437 foreach my $share ($files->shareList($backupNum)) {
438
439 my $t = time();
440
441 $shareID = getShareID($share, $hostID, $hostname);
442
443 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
444 my ($count) = $sth->{backups_count}->fetchrow_array();
445 # skip if allready in database!
446 next if ($count > 0);
447
448 # dump some log
449 print curr_time," ", $share;
450
451 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
452
453 $sth->{insert_backups}->execute(
454 $hostID,
455 $backupNum,
456 $backup->{'endTime'},
457 $backup->{'type'},
458 $shareID,
459 $size,
460 );
461
462 print " commit";
463 $dbh->commit();
464
465 my $dur = (time() - $t) || 1;
466 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
467 $nf, $f, $nd, $d,
468 ($size / 1024 / 1024),
469 ( ($f+$d) / $dur ),
470 fmt_time($dur)
471 );
472
473 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
474 }
475
476 }
477 }
478 undef $sth;
479 $dbh->commit();
480 $dbh->disconnect();
481
482 print "total duration: ",fmt_time(time() - $start_t),"\n";
483
484 $pidfile->remove;
485
486 sub getShareID() {
487
488 my ($share, $hostID, $hostname) = @_;
489
490 $sth->{share_id} ||= $dbh->prepare(qq{
491 SELECT ID FROM shares WHERE hostID=? AND name=?
492 });
493
494 $sth->{share_id}->execute($hostID,$share);
495
496 my ($id) = $sth->{share_id}->fetchrow_array();
497
498 return $id if (defined($id));
499
500 $sth->{insert_share} ||= $dbh->prepare(qq{
501 INSERT INTO shares
502 (hostID,name,share,localpath)
503 VALUES (?,?,?,?)
504 });
505
506 my $drop_down = $hostname . '/' . $share;
507 $drop_down =~ s#//+#/#g;
508
509 $sth->{insert_share}->execute($hostID,$share, $drop_down ,undef);
510 return $dbh->last_insert_id(undef,undef,'shares',undef);
511 }
512
513 sub found_in_db {
514
515 my @data = @_;
516 shift @data;
517
518 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
519
520 return $beenThere->{$key} if (defined($beenThere->{$key}));
521
522 $sth->{file_in_db} ||= $dbh->prepare(qq{
523 SELECT 1 FROM files
524 WHERE shareID = ? and
525 path = ? and
526 date = ? and
527 size = ?
528 LIMIT 1
529 });
530
531 my @param = ($shareID,$path,$date,$size);
532 $sth->{file_in_db}->execute(@param);
533 my $rows = $sth->{file_in_db}->rows;
534 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
535
536 $beenThere->{$key}++;
537
538 $sth->{'insert_files'}->execute(@data) unless ($rows);
539 return $rows;
540 }
541
542 ####################################################
543 # recursing through filesystem structure and #
544 # and returning flattened files list #
545 ####################################################
546 sub recurseDir($$$$$$$$) {
547
548 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
549
550 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
551
552 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
553
554 { # scope
555 my @stack;
556
557 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
558 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
559
560 # first, add all the entries in current directory
561 foreach my $path_key (keys %{$filesInBackup}) {
562 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
563 my @data = (
564 $shareID,
565 $backupNum,
566 $path_key,
567 $filesInBackup->{$path_key}->{'relPath'},
568 $filesInBackup->{$path_key}->{'mtime'},
569 $filesInBackup->{$path_key}->{'type'},
570 $filesInBackup->{$path_key}->{'size'}
571 );
572
573 my $key = join(" ", (
574 $shareID,
575 $dir,
576 $path_key,
577 $filesInBackup->{$path_key}->{'mtime'},
578 $filesInBackup->{$path_key}->{'size'}
579 ));
580
581 my $found;
582 if (! defined($beenThere->{$key}) && ! ($found = found_in_db($key, @data)) ) {
583 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
584
585 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
586 $new_dirs++ unless ($found);
587 print STDERR " dir\n" if ($debug >= 2);
588 } else {
589 $new_files++ unless ($found);
590 print STDERR " file\n" if ($debug >= 2);
591 }
592 $size += $filesInBackup->{$path_key}->{'size'} || 0;
593 }
594
595 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
596 $nr_dirs++;
597
598 my $full_path = $dir . '/' . $path_key;
599 push @stack, $full_path;
600 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
601
602 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
603 #
604 # $nr_files += $f;
605 # $new_files += $nf;
606 # $nr_dirs += $d;
607 # $new_dirs += $nd;
608
609 } else {
610 $nr_files++;
611 }
612 }
613
614 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
615
616 while ( my $dir = shift @stack ) {
617 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
618 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
619 $nr_files += $f;
620 $new_files += $nf;
621 $nr_dirs += $d;
622 $new_dirs += $nd;
623 $size += $s;
624 }
625 }
626
627 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
628 }
629

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26