/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 326 - (show annotations)
Tue Jan 31 16:37:14 2006 UTC (18 years, 3 months ago) by dpavlin
File size: 17859 byte(s)
 r9166@llin:  dpavlin | 2006-01-31 17:36:56 +0100
 more fixes for operation without Hyper Estraier

1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
15 use Cwd qw/abs_path/;
16
17 use constant BPC_FTYPE_DIR => 5;
18 use constant EST_CHUNK => 4096;
19
20 # daylight saving time change offset for 1h
21 my $dst_offset = 60 * 60;
22
23 my $debug = 0;
24 $|=1;
25
26 my $start_t = time();
27
28 my $pid_path = abs_path($0);
29 $pid_path =~ s/\W+/_/g;
30
31 my $pidfile = new File::Pid({
32 file => "/tmp/$pid_path",
33 });
34
35 if (my $pid = $pidfile->running ) {
36 die "$0 already running: $pid\n";
37 } elsif ($pidfile->pid ne $$) {
38 $pidfile->remove;
39 $pidfile = new File::Pid;
40 }
41 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
42 $pidfile->write;
43
44 my $t_fmt = '%Y-%m-%d %H:%M:%S';
45
46 my $hosts;
47 my $bpc = BackupPC::Lib->new || die;
48 my %Conf = $bpc->Conf();
49 my $TopDir = $bpc->TopDir();
50 my $beenThere = {};
51
52 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
53 my $user = $Conf{SearchUser} || '';
54
55 my $index_node_url = $Conf{HyperEstraierIndex};
56
57 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
58
59 my %opt;
60
61 if ( !getopts("cdm:v:ijfq", \%opt ) ) {
62 print STDERR <<EOF;
63 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
64
65 Options:
66 -c create database on first use
67 -d delete database before import
68 -m num import just num increments for one host
69 -v num set verbosity (debug) level (default $debug)
70 -i update Hyper Estraier full text index
71 -j update full text, don't check existing files
72 -f don't do anything with full text index
73 -q be quiet for hosts without changes
74
75 Option -j is variation on -i. It will allow faster initial creation
76 of full-text index from existing database.
77
78 Option -f will create database which is out of sync with full text index. You
79 will have to re-run $0 with -i to fix it.
80
81 EOF
82 exit 1;
83 }
84
85 if ($opt{v}) {
86 print "Debug level at $opt{v}\n";
87 $debug = $opt{v};
88 } elsif ($opt{f}) {
89 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
90 $index_node_url = undef;
91 }
92
93 #---- subs ----
94
95 sub fmt_time {
96 my $t = shift || return;
97 my $out = "";
98 my ($ss,$mm,$hh) = gmtime($t);
99 $out .= "${hh}h" if ($hh);
100 $out .= sprintf("%02d:%02d", $mm,$ss);
101 return $out;
102 }
103
104 sub curr_time {
105 return strftime($t_fmt,localtime());
106 }
107
108 my $hest_node;
109
110 sub hest_update {
111
112 my ($host_id, $share_id, $num) = @_;
113
114 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
115
116 unless ($index_node_url && $index_node_url =~ m#^http://#) {
117 print STDERR "HyperEstraier support not enabled or index node invalid\n" if ($debug);
118 $index_node_url = 0;
119 return;
120 }
121
122 print curr_time," updating Hyper Estraier:";
123
124 my $t = time();
125
126 my $offset = 0;
127 my $added = 0;
128
129 if ($index_node_url) {
130 print " opening index $index_node_url";
131 $hest_node ||= Search::Estraier::Node->new(
132 url => $index_node_url,
133 user => 'admin',
134 passwd => 'admin',
135 croak_on_error => 1,
136 );
137 print " via node URL";
138 }
139
140 my $results = 0;
141
142 do {
143
144 my $where = '';
145 my @data;
146 if (defined($host_id) && defined($share_id) && defined($num)) {
147 $where = qq{
148 WHERE
149 hosts.id = ? AND
150 shares.id = ? AND
151 files.backupnum = ?
152 };
153 @data = ( $host_id, $share_id, $num );
154 }
155
156 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
157
158 my $sth = $dbh->prepare(qq{
159 SELECT
160 files.id AS fid,
161 hosts.name AS hname,
162 shares.name AS sname,
163 -- shares.share AS sharename,
164 files.backupnum AS backupnum,
165 -- files.name AS filename,
166 files.path AS filepath,
167 files.date AS date,
168 files.type AS type,
169 files.size AS size,
170 files.shareid AS shareid,
171 backups.date AS backup_date
172 FROM files
173 INNER JOIN shares ON files.shareID=shares.ID
174 INNER JOIN hosts ON hosts.ID = shares.hostID
175 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
176 $where
177 $limit
178 });
179
180 $sth->execute(@data);
181 $results = $sth->rows;
182
183 if ($results == 0) {
184 print " - no new files\n";
185 return;
186 } else {
187 print "...";
188 }
189
190 sub fmt_date {
191 my $t = shift || return;
192 my $iso = BackupPC::Lib::timeStamp($t);
193 $iso =~ s/\s/T/;
194 return $iso;
195 }
196
197 while (my $row = $sth->fetchrow_hashref()) {
198
199 my $uri = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath};
200 if (! $skip_check && $hest_node) {
201 my $id = $hest_node->uri_to_id($uri);
202 next if ($id && $id == -1);
203 }
204
205 # create a document object
206 my $doc = Search::Estraier::Document->new;
207
208 # add attributes to the document object
209 $doc->add_attr('@uri', $uri);
210
211 foreach my $c (@{ $sth->{NAME} }) {
212 print STDERR "attr $c = $row->{$c}\n" if ($debug > 2);
213 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
214 }
215
216 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
217
218 # add the body text to the document object
219 my $path = $row->{'filepath'};
220 $doc->add_text($path);
221 $path =~ s/(.)/$1 /g;
222 $doc->add_hidden_text($path);
223
224 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
225
226 # register the document object to the database
227 $hest_node->put_doc($doc) if ($hest_node);
228
229 $added++;
230 }
231
232 print "$added";
233
234 $offset += EST_CHUNK;
235
236 } while ($results == EST_CHUNK);
237
238 my $dur = (time() - $t) || 1;
239 printf(" [%.2f/s dur: %s]\n",
240 ( $added / $dur ),
241 fmt_time($dur)
242 );
243 }
244
245 #---- /subs ----
246
247
248 ## update index ##
249 if ( ( $opt{i} || $opt{j} ) && !$opt{c} ) {
250 # update all
251 print "force update of Hyper Estraier index ";
252 print "by -i flag" if ($opt{i});
253 print "by -j flag" if ($opt{j});
254 print "\n";
255 hest_update();
256 }
257
258 ## create tables ##
259 if ($opt{c}) {
260 sub do_index {
261 my $index = shift || return;
262 my ($table,$col,$unique) = split(/:/, $index);
263 $unique ||= '';
264 $index =~ s/\W+/_/g;
265 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
266 $dbh->do(qq{ create $unique index $index on $table($col) });
267 }
268
269 print "creating tables...\n";
270
271 $dbh->do( qq{
272 create table hosts (
273 ID SERIAL PRIMARY KEY,
274 name VARCHAR(30) NOT NULL,
275 IP VARCHAR(15)
276 );
277
278 create table shares (
279 ID SERIAL PRIMARY KEY,
280 hostID INTEGER NOT NULL references hosts(id),
281 name VARCHAR(30) NOT NULL,
282 share VARCHAR(200) NOT NULL
283 );
284
285 create table dvds (
286 ID SERIAL PRIMARY KEY,
287 num INTEGER NOT NULL,
288 name VARCHAR(255) NOT NULL,
289 mjesto VARCHAR(255)
290 );
291
292 create table backups (
293 id serial,
294 hostID INTEGER NOT NULL references hosts(id),
295 num INTEGER NOT NULL,
296 date integer NOT NULL,
297 type CHAR(4) not null,
298 shareID integer not null references shares(id),
299 size bigint not null,
300 inc_size bigint not null default -1,
301 inc_deleted boolean default false,
302 parts integer not null default 0,
303 PRIMARY KEY(id)
304 );
305
306 create table files (
307 ID SERIAL,
308 shareID INTEGER NOT NULL references shares(id),
309 backupNum INTEGER NOT NULL,
310 name VARCHAR(255) NOT NULL,
311 path VARCHAR(255) NOT NULL,
312 date integer NOT NULL,
313 type INTEGER NOT NULL,
314 size bigint NOT NULL,
315 primary key(id)
316 );
317
318 create table archive (
319 id serial,
320 dvd_nr int not null,
321 total_size bigint default -1,
322 note text,
323 username varchar(20) not null,
324 date timestamp default now(),
325 primary key(id)
326 );
327
328 create table archive_backup (
329 archive_id int not null references archive(id) on delete cascade,
330 backup_id int not null references backups(id),
331 primary key(archive_id, backup_id)
332 );
333
334 create table archive_burned (
335 archive_id int references archive(id),
336 date timestamp default now(),
337 part int not null default 1,
338 copy int not null default 1,
339 iso_size bigint default -1
340 );
341
342 create table backup_parts (
343 id serial,
344 backup_id int references backups(id),
345 part_nr int not null check (part_nr > 0),
346 tar_size bigint not null check (tar_size > 0),
347 size bigint not null check (size > 0),
348 md5 text not null,
349 items int not null check (items > 0),
350 date timestamp default now(),
351 primary key(id)
352 );
353 });
354
355 print "creating indexes: ";
356
357 foreach my $index (qw(
358 hosts:name
359 backups:hostID
360 backups:num
361 backups:shareID
362 shares:hostID
363 shares:name
364 files:shareID
365 files:path
366 files:name
367 files:date
368 files:size
369 archive:dvd_nr
370 archive_burned:archive_id
371 backup_parts:backup_id,part_nr
372 )) {
373 do_index($index);
374 }
375
376 print " creating sequence: ";
377 foreach my $seq (qw/dvd_nr/) {
378 print "$seq ";
379 $dbh->do( qq{ CREATE SEQUENCE $seq } );
380 }
381
382 print " creating triggers ";
383 $dbh->do( qq{
384 create or replace function backup_parts_check() returns trigger as '
385 declare
386 b_parts integer;
387 b_counted integer;
388 b_id integer;
389 begin
390 if (TG_OP=''UPDATE'') then
391 b_id := old.id;
392 b_parts := old.parts;
393 elsif (TG_OP = ''INSERT'') then
394 b_id := new.id;
395 b_parts := new.parts;
396 end if;
397 b_counted := (select count(*) from backup_parts where backup_id = b_id);
398 if ( b_parts != b_counted ) then
399 raise exception ''Update of backup % aborted, requested % parts and there are really % parts'', b_id, b_parts, b_counted;
400 end if;
401 return null;
402 end;
403 ' language plpgsql;
404
405 create trigger do_backup_parts_check
406 after insert or update or delete on backups
407 for each row execute procedure backup_parts_check();
408 });
409
410 print "...\n";
411
412 $dbh->commit;
413
414 }
415
416 ## delete data before inseting ##
417 if ($opt{d}) {
418 print "deleting ";
419 foreach my $table (qw(files dvds backups shares hosts)) {
420 print "$table ";
421 $dbh->do(qq{ DELETE FROM $table });
422 }
423 print " done...\n";
424
425 $dbh->commit;
426 }
427
428 ## insert new values ##
429
430 # get hosts
431 $hosts = $bpc->HostInfoRead();
432 my $hostID;
433 my $shareID;
434
435 my $sth;
436
437 $sth->{insert_hosts} = $dbh->prepare(qq{
438 INSERT INTO hosts (name, IP) VALUES (?,?)
439 });
440
441 $sth->{hosts_by_name} = $dbh->prepare(qq{
442 SELECT ID FROM hosts WHERE name=?
443 });
444
445 $sth->{backups_count} = $dbh->prepare(qq{
446 SELECT COUNT(*)
447 FROM backups
448 WHERE hostID=? AND num=? AND shareid=?
449 });
450
451 $sth->{insert_backups} = $dbh->prepare(qq{
452 INSERT INTO backups (hostID, num, date, type, shareid, size)
453 VALUES (?,?,?,?,?,-1)
454 });
455
456 $sth->{update_backups_size} = $dbh->prepare(qq{
457 UPDATE backups SET size = ?
458 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
459 });
460
461 $sth->{insert_files} = $dbh->prepare(qq{
462 INSERT INTO files
463 (shareID, backupNum, name, path, date, type, size)
464 VALUES (?,?,?,?,?,?,?)
465 });
466
467 my @hosts = keys %{$hosts};
468 my $host_nr = 0;
469
470 foreach my $host_key (@hosts) {
471
472 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
473
474 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
475
476 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
477 $sth->{insert_hosts}->execute(
478 $hosts->{$host_key}->{'host'},
479 $hosts->{$host_key}->{'ip'}
480 );
481
482 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
483 }
484
485 $host_nr++;
486 # get backups for a host
487 my @backups = $bpc->BackupInfoRead($hostname);
488 my $incs = scalar @backups;
489
490 my $host_header = sprintf("host %s [%d/%d]: %d increments\n",
491 $hosts->{$host_key}->{'host'},
492 $host_nr,
493 ($#hosts + 1),
494 $incs
495 );
496 print $host_header unless ($opt{q});
497
498 my $inc_nr = 0;
499 $beenThere = {};
500
501 foreach my $backup (@backups) {
502
503 $inc_nr++;
504 last if ($opt{m} && $inc_nr > $opt{m});
505
506 my $backupNum = $backup->{'num'};
507 my @backupShares = ();
508
509 my $share_header = sprintf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
510 $hosts->{$host_key}->{'host'},
511 $inc_nr, $incs, $backupNum,
512 $backup->{type} || '?',
513 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
514 strftime($t_fmt,localtime($backup->{startTime})),
515 fmt_time($backup->{endTime} - $backup->{startTime})
516 );
517 print $share_header unless ($opt{q});
518
519 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
520 foreach my $share ($files->shareList($backupNum)) {
521
522 my $t = time();
523
524 $shareID = getShareID($share, $hostID, $hostname);
525
526 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
527 my ($count) = $sth->{backups_count}->fetchrow_array();
528 # skip if allready in database!
529 next if ($count > 0);
530
531 # dump host and share header for -q
532 if ($opt{q}) {
533 if ($host_header) {
534 print $host_header;
535 $host_header = undef;
536 }
537 print $share_header;
538 }
539
540 # dump some log
541 print curr_time," ", $share;
542
543 $sth->{insert_backups}->execute(
544 $hostID,
545 $backupNum,
546 $backup->{'endTime'},
547 substr($backup->{'type'},0,4),
548 $shareID,
549 );
550
551 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
552
553 eval {
554 $sth->{update_backups_size}->execute(
555 $size,
556 $hostID,
557 $backupNum,
558 $backup->{'endTime'},
559 substr($backup->{'type'},0,4),
560 $shareID,
561 );
562 print " commit";
563 $dbh->commit();
564 };
565 if ($@) {
566 print " rollback";
567 $dbh->rollback();
568 }
569
570 my $dur = (time() - $t) || 1;
571 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
572 $nf, $f, $nd, $d,
573 ($size / 1024 / 1024),
574 ( ($f+$d) / $dur ),
575 fmt_time($dur)
576 );
577
578 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
579 }
580
581 }
582 }
583 undef $sth;
584 $dbh->commit();
585 $dbh->disconnect();
586
587 print "total duration: ",fmt_time(time() - $start_t),"\n";
588
589 $pidfile->remove;
590
591 sub getShareID() {
592
593 my ($share, $hostID, $hostname) = @_;
594
595 $sth->{share_id} ||= $dbh->prepare(qq{
596 SELECT ID FROM shares WHERE hostID=? AND name=?
597 });
598
599 $sth->{share_id}->execute($hostID,$share);
600
601 my ($id) = $sth->{share_id}->fetchrow_array();
602
603 return $id if (defined($id));
604
605 $sth->{insert_share} ||= $dbh->prepare(qq{
606 INSERT INTO shares
607 (hostID,name,share)
608 VALUES (?,?,?)
609 });
610
611 my $drop_down = $hostname . '/' . $share;
612 $drop_down =~ s#//+#/#g;
613
614 $sth->{insert_share}->execute($hostID,$share, $drop_down);
615 return $dbh->last_insert_id(undef,undef,'shares',undef);
616 }
617
618 sub found_in_db {
619
620 my @data = @_;
621 shift @data;
622
623 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
624
625 return $beenThere->{$key} if (defined($beenThere->{$key}));
626
627 $sth->{file_in_db} ||= $dbh->prepare(qq{
628 SELECT 1 FROM files
629 WHERE shareID = ? and
630 path = ? and
631 size = ? and
632 ( date = ? or date = ? or date = ? )
633 LIMIT 1
634 });
635
636 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
637 $sth->{file_in_db}->execute(@param);
638 my $rows = $sth->{file_in_db}->rows;
639 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
640
641 $beenThere->{$key}++;
642
643 $sth->{'insert_files'}->execute(@data) unless ($rows);
644 return $rows;
645 }
646
647 ####################################################
648 # recursing through filesystem structure and #
649 # and returning flattened files list #
650 ####################################################
651 sub recurseDir($$$$$$$$) {
652
653 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
654
655 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
656
657 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
658
659 { # scope
660 my @stack;
661
662 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
663 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
664
665 # first, add all the entries in current directory
666 foreach my $path_key (keys %{$filesInBackup}) {
667 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
668 my @data = (
669 $shareID,
670 $backupNum,
671 $path_key,
672 $filesInBackup->{$path_key}->{'relPath'},
673 $filesInBackup->{$path_key}->{'mtime'},
674 $filesInBackup->{$path_key}->{'type'},
675 $filesInBackup->{$path_key}->{'size'}
676 );
677
678 my $key = join(" ", (
679 $shareID,
680 $dir,
681 $path_key,
682 $filesInBackup->{$path_key}->{'mtime'},
683 $filesInBackup->{$path_key}->{'size'}
684 ));
685
686 my $key_dst_prev = join(" ", (
687 $shareID,
688 $dir,
689 $path_key,
690 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
691 $filesInBackup->{$path_key}->{'size'}
692 ));
693
694 my $key_dst_next = join(" ", (
695 $shareID,
696 $dir,
697 $path_key,
698 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
699 $filesInBackup->{$path_key}->{'size'}
700 ));
701
702 my $found;
703 if (
704 ! defined($beenThere->{$key}) &&
705 ! defined($beenThere->{$key_dst_prev}) &&
706 ! defined($beenThere->{$key_dst_next}) &&
707 ! ($found = found_in_db($key, @data))
708 ) {
709 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
710
711 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
712 $new_dirs++ unless ($found);
713 print STDERR " dir\n" if ($debug >= 2);
714 } else {
715 $new_files++ unless ($found);
716 print STDERR " file\n" if ($debug >= 2);
717 }
718 $size += $filesInBackup->{$path_key}->{'size'} || 0;
719 }
720
721 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
722 $nr_dirs++;
723
724 my $full_path = $dir . '/' . $path_key;
725 push @stack, $full_path;
726 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
727
728 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
729 #
730 # $nr_files += $f;
731 # $new_files += $nf;
732 # $nr_dirs += $d;
733 # $new_dirs += $nd;
734
735 } else {
736 $nr_files++;
737 }
738 }
739
740 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
741
742 while ( my $dir = shift @stack ) {
743 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
744 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
745 $nr_files += $f;
746 $new_files += $nf;
747 $nr_dirs += $d;
748 $new_dirs += $nd;
749 $size += $s;
750 }
751 }
752
753 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
754 }
755

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26