/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 305 - (show annotations)
Sat Jan 28 19:49:18 2006 UTC (18 years, 4 months ago) by dpavlin
File size: 17005 byte(s)
be chatty on debug level 3
1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
15 use Cwd qw/abs_path/;
16
17 use constant BPC_FTYPE_DIR => 5;
18 use constant EST_CHUNK => 100000;
19
20 # daylight saving time change offset for 1h
21 my $dst_offset = 60 * 60;
22
23 my $debug = 0;
24 $|=1;
25
26 my $start_t = time();
27
28 my $pid_path = abs_path($0);
29 $pid_path =~ s/\W+/_/g;
30
31 my $pidfile = new File::Pid({
32 file => "/tmp/$pid_path",
33 });
34
35 if (my $pid = $pidfile->running ) {
36 die "$0 already running: $pid\n";
37 } elsif ($pidfile->pid ne $$) {
38 $pidfile->remove;
39 $pidfile = new File::Pid;
40 }
41 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
42 $pidfile->write;
43
44 my $t_fmt = '%Y-%m-%d %H:%M:%S';
45
46 my $hosts;
47 my $bpc = BackupPC::Lib->new || die;
48 my %Conf = $bpc->Conf();
49 my $TopDir = $bpc->TopDir();
50 my $beenThere = {};
51
52 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
53 my $user = $Conf{SearchUser} || '';
54
55 my $index_node_url = $Conf{HyperEstraierIndex};
56
57 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
58
59 my %opt;
60
61 if ( !getopts("cdm:v:ijf", \%opt ) ) {
62 print STDERR <<EOF;
63 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
64
65 Options:
66 -c create database on first use
67 -d delete database before import
68 -m num import just num increments for one host
69 -v num set verbosity (debug) level (default $debug)
70 -i update Hyper Estraier full text index
71 -j update full text, don't check existing files
72 -f don't do anything with full text index
73
74 Option -j is variation on -i. It will allow faster initial creation
75 of full-text index from existing database.
76
77 Option -f will create database which is out of sync with full text index. You
78 will have to re-run $0 with -i to fix it.
79
80 EOF
81 exit 1;
82 }
83
84 if ($opt{v}) {
85 print "Debug level at $opt{v}\n";
86 $debug = $opt{v};
87 } elsif ($opt{f}) {
88 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
89 $index_node_url = undef;
90 }
91
92 #---- subs ----
93
94 sub fmt_time {
95 my $t = shift || return;
96 my $out = "";
97 my ($ss,$mm,$hh) = gmtime($t);
98 $out .= "${hh}h" if ($hh);
99 $out .= sprintf("%02d:%02d", $mm,$ss);
100 return $out;
101 }
102
103 sub curr_time {
104 return strftime($t_fmt,localtime());
105 }
106
107 my $hest_db;
108 my $hest_node;
109
110 sub signal {
111 my($sig) = @_;
112 if ($hest_db) {
113 print "\nCaught a SIG$sig--syncing database and shutting down\n";
114 $hest_db->sync();
115 $hest_db->close();
116 }
117 exit(0);
118 }
119
120 $SIG{'INT'} = \&signal;
121 $SIG{'QUIT'} = \&signal;
122
123 sub hest_update {
124
125 my ($host_id, $share_id, $num) = @_;
126
127 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
128
129 unless (defined($index_node_url)) {
130 print STDERR "HyperEstraier support not enabled in configuration\n";
131 $index_node_url = 0;
132 return;
133 }
134
135 print curr_time," updating Hyper Estraier:";
136
137 my $t = time();
138
139 my $offset = 0;
140 my $added = 0;
141
142 print " opening index $index_node_url";
143 if ($index_node_url) {
144 $hest_node ||= Search::Estraier::Node->new($index_node_url);
145 $hest_node->set_auth('admin', 'admin');
146 print " via node URL";
147 } else {
148 die "don't know how to use Hyper Estraier Index $index_node_url";
149 }
150
151 my $results = 0;
152
153 do {
154
155 my $where = '';
156 my @data;
157 if (defined($host_id) && defined($share_id) && defined($num)) {
158 $where = qq{
159 WHERE
160 hosts.id = ? AND
161 shares.id = ? AND
162 files.backupnum = ?
163 };
164 @data = ( $host_id, $share_id, $num );
165 }
166
167 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
168
169 my $sth = $dbh->prepare(qq{
170 SELECT
171 files.id AS fid,
172 hosts.name AS hname,
173 shares.name AS sname,
174 -- shares.share AS sharename,
175 files.backupnum AS backupnum,
176 -- files.name AS filename,
177 files.path AS filepath,
178 files.date AS date,
179 files.type AS type,
180 files.size AS size,
181 files.shareid AS shareid,
182 backups.date AS backup_date
183 FROM files
184 INNER JOIN shares ON files.shareID=shares.ID
185 INNER JOIN hosts ON hosts.ID = shares.hostID
186 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
187 $where
188 $limit
189 });
190
191 $sth->execute(@data);
192 $results = $sth->rows;
193
194 if ($results == 0) {
195 print " - no new files\n";
196 last;
197 } else {
198 print " - $results files: ";
199 }
200
201 sub fmt_date {
202 my $t = shift || return;
203 my $iso = BackupPC::Lib::timeStamp($t);
204 $iso =~ s/\s/T/;
205 return $iso;
206 }
207
208 while (my $row = $sth->fetchrow_hashref()) {
209
210 my $fid = $row->{'fid'} || die "no fid?";
211 my $uri = 'file:///' . $fid;
212
213 unless ($skip_check) {
214 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
215 next unless ($id == -1);
216 }
217
218 # create a document object
219 my $doc = Search::Estraier::Document->new;
220
221 # add attributes to the document object
222 $doc->add_attr('@uri', $uri);
223
224 foreach my $c (@{ $sth->{NAME} }) {
225 print STDERR "attr $c = $row->{$c}\n" if ($debug > 2);
226 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
227 }
228
229 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
230
231 # add the body text to the document object
232 my $path = $row->{'filepath'};
233 $doc->add_text($path);
234 $path =~ s/(.)/$1 /g;
235 $doc->add_hidden_text($path);
236
237 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
238
239 # register the document object to the database
240 if ($hest_node) {
241 $hest_node->put_doc($doc);
242 } else {
243 die "not supported";
244 }
245 $added++;
246 }
247
248 print " $added";
249
250 $offset += EST_CHUNK;
251
252 } while ($results == EST_CHUNK);
253
254 my $dur = (time() - $t) || 1;
255 printf(" [%.2f/s dur: %s]\n",
256 ( $added / $dur ),
257 fmt_time($dur)
258 );
259 }
260
261 #---- /subs ----
262
263
264 ## update index ##
265 if ( ( $opt{i} || $opt{j} ) && !$opt{c} ) {
266 # update all
267 print "force update of Hyper Estraier index ";
268 print "by -i flag" if ($opt{i});
269 print "by -j flag" if ($opt{j});
270 print "\n";
271 hest_update();
272 }
273
274 ## create tables ##
275 if ($opt{c}) {
276 sub do_index {
277 my $index = shift || return;
278 my ($table,$col,$unique) = split(/:/, $index);
279 $unique ||= '';
280 $index =~ s/\W+/_/g;
281 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
282 $dbh->do(qq{ create $unique index $index on $table($col) });
283 }
284
285 print "creating tables...\n";
286
287 $dbh->do( qq{
288 create table hosts (
289 ID SERIAL PRIMARY KEY,
290 name VARCHAR(30) NOT NULL,
291 IP VARCHAR(15)
292 );
293
294 create table shares (
295 ID SERIAL PRIMARY KEY,
296 hostID INTEGER NOT NULL references hosts(id),
297 name VARCHAR(30) NOT NULL,
298 share VARCHAR(200) NOT NULL
299 );
300
301 create table dvds (
302 ID SERIAL PRIMARY KEY,
303 num INTEGER NOT NULL,
304 name VARCHAR(255) NOT NULL,
305 mjesto VARCHAR(255)
306 );
307
308 create table backups (
309 id serial,
310 hostID INTEGER NOT NULL references hosts(id),
311 num INTEGER NOT NULL,
312 date integer NOT NULL,
313 type CHAR(4) not null,
314 shareID integer not null references shares(id),
315 size bigint not null,
316 inc_size bigint not null default -1,
317 inc_deleted boolean default false,
318 parts integer not null default 1,
319 PRIMARY KEY(id)
320 );
321
322 create table files (
323 ID SERIAL,
324 shareID INTEGER NOT NULL references shares(id),
325 backupNum INTEGER NOT NULL,
326 name VARCHAR(255) NOT NULL,
327 path VARCHAR(255) NOT NULL,
328 date integer NOT NULL,
329 type INTEGER NOT NULL,
330 size bigint NOT NULL,
331 primary key(id)
332 );
333
334 create table archive (
335 id serial,
336 dvd_nr int not null,
337 total_size bigint default -1,
338 note text,
339 username varchar(20) not null,
340 date timestamp default now(),
341 primary key(id)
342 );
343
344 create table archive_backup (
345 archive_id int not null references archive(id) on delete cascade,
346 backup_id int not null references backups(id),
347 primary key(archive_id, backup_id)
348 );
349
350 create table archive_burned (
351 archive_id int references archive(id),
352 date timestamp default now(),
353 part int not null default 1,
354 copy int not null default 1,
355 iso_size bigint default -1
356 );
357
358 create table backup_parts (
359 id serial,
360 backup_id int references backups(id),
361 part_nr int not null check (part_nr > 0),
362 tar_size bigint not null check (tar_size > 0),
363 size bigint not null check (size > 0),
364 md5 text not null,
365 items int not null check (items > 0),
366 date timestamp default now(),
367 primary key(id)
368 );
369 });
370
371 print "creating indexes: ";
372
373 foreach my $index (qw(
374 hosts:name
375 backups:hostID
376 backups:num
377 backups:shareID
378 shares:hostID
379 shares:name
380 files:shareID
381 files:path
382 files:name
383 files:date
384 files:size
385 archive:dvd_nr
386 archive_burned:archive_id
387 backup_parts:backup_id,part_nr
388 )) {
389 do_index($index);
390 }
391
392 print " creating sequence: ";
393 foreach my $seq (qw/dvd_nr/) {
394 print "$seq ";
395 $dbh->do( qq{ CREATE SEQUENCE $seq } );
396 }
397
398
399 print "...\n";
400
401 $dbh->commit;
402
403 }
404
405 ## delete data before inseting ##
406 if ($opt{d}) {
407 print "deleting ";
408 foreach my $table (qw(files dvds backups shares hosts)) {
409 print "$table ";
410 $dbh->do(qq{ DELETE FROM $table });
411 }
412 print " done...\n";
413
414 $dbh->commit;
415 }
416
417 ## insert new values ##
418
419 # get hosts
420 $hosts = $bpc->HostInfoRead();
421 my $hostID;
422 my $shareID;
423
424 my $sth;
425
426 $sth->{insert_hosts} = $dbh->prepare(qq{
427 INSERT INTO hosts (name, IP) VALUES (?,?)
428 });
429
430 $sth->{hosts_by_name} = $dbh->prepare(qq{
431 SELECT ID FROM hosts WHERE name=?
432 });
433
434 $sth->{backups_count} = $dbh->prepare(qq{
435 SELECT COUNT(*)
436 FROM backups
437 WHERE hostID=? AND num=? AND shareid=?
438 });
439
440 $sth->{insert_backups} = $dbh->prepare(qq{
441 INSERT INTO backups (hostID, num, date, type, shareid, size)
442 VALUES (?,?,?,?,?,-1)
443 });
444
445 $sth->{update_backups_size} = $dbh->prepare(qq{
446 UPDATE backups SET size = ?
447 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
448 });
449
450 $sth->{insert_files} = $dbh->prepare(qq{
451 INSERT INTO files
452 (shareID, backupNum, name, path, date, type, size)
453 VALUES (?,?,?,?,?,?,?)
454 });
455
456 my @hosts = keys %{$hosts};
457 my $host_nr = 0;
458
459 foreach my $host_key (@hosts) {
460
461 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
462
463 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
464
465 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
466 $sth->{insert_hosts}->execute(
467 $hosts->{$host_key}->{'host'},
468 $hosts->{$host_key}->{'ip'}
469 );
470
471 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
472 }
473
474 $host_nr++;
475 print "host ", $hosts->{$host_key}->{'host'}, " [",
476 $host_nr, "/", ($#hosts + 1), "]: ";
477
478 # get backups for a host
479 my @backups = $bpc->BackupInfoRead($hostname);
480 my $incs = scalar @backups;
481 print "$incs increments\n";
482
483 my $inc_nr = 0;
484 $beenThere = {};
485
486 foreach my $backup (@backups) {
487
488 $inc_nr++;
489 last if ($opt{m} && $inc_nr > $opt{m});
490
491 my $backupNum = $backup->{'num'};
492 my @backupShares = ();
493
494 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
495 $hosts->{$host_key}->{'host'},
496 $inc_nr, $incs, $backupNum,
497 $backup->{type} || '?',
498 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
499 strftime($t_fmt,localtime($backup->{startTime})),
500 fmt_time($backup->{endTime} - $backup->{startTime})
501 );
502
503 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
504 foreach my $share ($files->shareList($backupNum)) {
505
506 my $t = time();
507
508 $shareID = getShareID($share, $hostID, $hostname);
509
510 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
511 my ($count) = $sth->{backups_count}->fetchrow_array();
512 # skip if allready in database!
513 next if ($count > 0);
514
515 # dump some log
516 print curr_time," ", $share;
517
518 $sth->{insert_backups}->execute(
519 $hostID,
520 $backupNum,
521 $backup->{'endTime'},
522 substr($backup->{'type'},0,4),
523 $shareID,
524 );
525
526 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
527
528 eval {
529 $sth->{update_backups_size}->execute(
530 $size,
531 $hostID,
532 $backupNum,
533 $backup->{'endTime'},
534 substr($backup->{'type'},0,4),
535 $shareID,
536 );
537 print " commit";
538 $dbh->commit();
539 };
540 if ($@) {
541 print " rollback";
542 $dbh->rollback();
543 }
544
545 my $dur = (time() - $t) || 1;
546 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
547 $nf, $f, $nd, $d,
548 ($size / 1024 / 1024),
549 ( ($f+$d) / $dur ),
550 fmt_time($dur)
551 );
552
553 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
554 }
555
556 }
557 }
558 undef $sth;
559 $dbh->commit();
560 $dbh->disconnect();
561
562 print "total duration: ",fmt_time(time() - $start_t),"\n";
563
564 $pidfile->remove;
565
566 sub getShareID() {
567
568 my ($share, $hostID, $hostname) = @_;
569
570 $sth->{share_id} ||= $dbh->prepare(qq{
571 SELECT ID FROM shares WHERE hostID=? AND name=?
572 });
573
574 $sth->{share_id}->execute($hostID,$share);
575
576 my ($id) = $sth->{share_id}->fetchrow_array();
577
578 return $id if (defined($id));
579
580 $sth->{insert_share} ||= $dbh->prepare(qq{
581 INSERT INTO shares
582 (hostID,name,share)
583 VALUES (?,?,?)
584 });
585
586 my $drop_down = $hostname . '/' . $share;
587 $drop_down =~ s#//+#/#g;
588
589 $sth->{insert_share}->execute($hostID,$share, $drop_down);
590 return $dbh->last_insert_id(undef,undef,'shares',undef);
591 }
592
593 sub found_in_db {
594
595 my @data = @_;
596 shift @data;
597
598 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
599
600 return $beenThere->{$key} if (defined($beenThere->{$key}));
601
602 $sth->{file_in_db} ||= $dbh->prepare(qq{
603 SELECT 1 FROM files
604 WHERE shareID = ? and
605 path = ? and
606 size = ? and
607 ( date = ? or date = ? or date = ? )
608 LIMIT 1
609 });
610
611 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
612 $sth->{file_in_db}->execute(@param);
613 my $rows = $sth->{file_in_db}->rows;
614 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
615
616 $beenThere->{$key}++;
617
618 $sth->{'insert_files'}->execute(@data) unless ($rows);
619 return $rows;
620 }
621
622 ####################################################
623 # recursing through filesystem structure and #
624 # and returning flattened files list #
625 ####################################################
626 sub recurseDir($$$$$$$$) {
627
628 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
629
630 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
631
632 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
633
634 { # scope
635 my @stack;
636
637 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
638 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
639
640 # first, add all the entries in current directory
641 foreach my $path_key (keys %{$filesInBackup}) {
642 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
643 my @data = (
644 $shareID,
645 $backupNum,
646 $path_key,
647 $filesInBackup->{$path_key}->{'relPath'},
648 $filesInBackup->{$path_key}->{'mtime'},
649 $filesInBackup->{$path_key}->{'type'},
650 $filesInBackup->{$path_key}->{'size'}
651 );
652
653 my $key = join(" ", (
654 $shareID,
655 $dir,
656 $path_key,
657 $filesInBackup->{$path_key}->{'mtime'},
658 $filesInBackup->{$path_key}->{'size'}
659 ));
660
661 my $key_dst_prev = join(" ", (
662 $shareID,
663 $dir,
664 $path_key,
665 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
666 $filesInBackup->{$path_key}->{'size'}
667 ));
668
669 my $key_dst_next = join(" ", (
670 $shareID,
671 $dir,
672 $path_key,
673 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
674 $filesInBackup->{$path_key}->{'size'}
675 ));
676
677 my $found;
678 if (
679 ! defined($beenThere->{$key}) &&
680 ! defined($beenThere->{$key_dst_prev}) &&
681 ! defined($beenThere->{$key_dst_next}) &&
682 ! ($found = found_in_db($key, @data))
683 ) {
684 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
685
686 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
687 $new_dirs++ unless ($found);
688 print STDERR " dir\n" if ($debug >= 2);
689 } else {
690 $new_files++ unless ($found);
691 print STDERR " file\n" if ($debug >= 2);
692 }
693 $size += $filesInBackup->{$path_key}->{'size'} || 0;
694 }
695
696 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
697 $nr_dirs++;
698
699 my $full_path = $dir . '/' . $path_key;
700 push @stack, $full_path;
701 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
702
703 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
704 #
705 # $nr_files += $f;
706 # $new_files += $nf;
707 # $nr_dirs += $d;
708 # $new_dirs += $nd;
709
710 } else {
711 $nr_files++;
712 }
713 }
714
715 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
716
717 while ( my $dir = shift @stack ) {
718 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
719 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
720 $nr_files += $f;
721 $new_files += $nf;
722 $nr_dirs += $d;
723 $new_dirs += $nd;
724 $size += $s;
725 }
726 }
727
728 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
729 }
730

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26