/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 249 - (show annotations)
Fri Dec 9 16:27:49 2005 UTC (18 years, 5 months ago) by dpavlin
File size: 17381 byte(s)
small fixes

1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
15
16 use constant BPC_FTYPE_DIR => 5;
17 use constant EST_CHUNK => 100000;
18
19 # daylight saving time change offset for 1h
20 my $dst_offset = 60 * 60;
21
22 my $debug = 0;
23 $|=1;
24
25 my $start_t = time();
26
27 my $pidfile = new File::Pid;
28
29 if (my $pid = $pidfile->running ) {
30 die "$0 already running: $pid\n";
31 } elsif ($pidfile->pid ne $$) {
32 $pidfile->remove;
33 $pidfile = new File::Pid;
34 }
35 $pidfile->write;
36 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
37
38 my $t_fmt = '%Y-%m-%d %H:%M:%S';
39
40 my $hosts;
41 my $bpc = BackupPC::Lib->new || die;
42 my %Conf = $bpc->Conf();
43 my $TopDir = $bpc->TopDir();
44 my $beenThere = {};
45
46 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
47 my $user = $Conf{SearchUser} || '';
48
49 my $use_hest = $Conf{HyperEstraierIndex};
50 my ($index_path, $index_node_url) = BackupPC::SearchLib::getHyperEstraier_url($use_hest);
51
52 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
53
54 my %opt;
55
56 if ( !getopts("cdm:v:ijf", \%opt ) ) {
57 print STDERR <<EOF;
58 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
59
60 Options:
61 -c create database on first use
62 -d delete database before import
63 -m num import just num increments for one host
64 -v num set verbosity (debug) level (default $debug)
65 -i update Hyper Estraier full text index
66 -j update full text, don't check existing files
67 -f don't do anything with full text index
68
69 Option -j is variation on -i. It will allow faster initial creation
70 of full-text index from existing database.
71
72 Option -f will create database which is out of sync with full text index. You
73 will have to re-run $0 with -i to fix it.
74
75 EOF
76 exit 1;
77 }
78
79 if ($opt{v}) {
80 print "Debug level at $opt{v}\n";
81 $debug = $opt{v};
82 } elsif ($opt{f}) {
83 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
84 ($use_hest, $index_path, $index_node_url) = (undef, undef, undef);
85 }
86
87 #---- subs ----
88
89 sub fmt_time {
90 my $t = shift || return;
91 my $out = "";
92 my ($ss,$mm,$hh) = gmtime($t);
93 $out .= "${hh}h" if ($hh);
94 $out .= sprintf("%02d:%02d", $mm,$ss);
95 return $out;
96 }
97
98 sub curr_time {
99 return strftime($t_fmt,localtime());
100 }
101
102 my $hest_db;
103 my $hest_node;
104
105 sub signal {
106 my($sig) = @_;
107 if ($hest_db) {
108 print "\nCaught a SIG$sig--syncing database and shutting down\n";
109 $hest_db->sync();
110 $hest_db->close();
111 }
112 exit(0);
113 }
114
115 $SIG{'INT'} = \&signal;
116 $SIG{'QUIT'} = \&signal;
117
118 sub hest_update {
119
120 my ($host_id, $share_id, $num) = @_;
121
122 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
123
124 unless (defined($use_hest)) {
125 print STDERR "HyperEstraier support not enabled in configuration\n";
126 $use_hest = 0;
127 return;
128 }
129
130 return unless($use_hest);
131
132 print curr_time," updating HyperEstraier:";
133
134 my $t = time();
135
136 my $offset = 0;
137 my $added = 0;
138
139 print " opening index $use_hest";
140 if ($index_path) {
141 $hest_db = HyperEstraier::Database->new();
142 $hest_db->open($TopDir . $index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
143 print " directly";
144 } elsif ($index_node_url) {
145 $hest_node ||= HyperEstraier::Node->new($index_node_url);
146 $hest_node->set_auth('admin', 'admin');
147 print " via node URL";
148 } else {
149 die "don't know how to use HyperEstraier Index $use_hest";
150 }
151 print " increment is " . EST_CHUNK . " files:";
152
153 my $results = 0;
154
155 do {
156
157 my $where = '';
158 my @data;
159 if (defined($host_id) && defined($share_id) && defined($num)) {
160 $where = qq{
161 WHERE
162 hosts.id = ? AND
163 shares.id = ? AND
164 files.backupnum = ?
165 };
166 @data = ( $host_id, $share_id, $num );
167 }
168
169 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
170
171 my $sth = $dbh->prepare(qq{
172 SELECT
173 files.id AS fid,
174 hosts.name AS hname,
175 shares.name AS sname,
176 -- shares.share AS sharename,
177 files.backupnum AS backupnum,
178 -- files.name AS filename,
179 files.path AS filepath,
180 files.date AS date,
181 files.type AS type,
182 files.size AS size,
183 files.shareid AS shareid,
184 backups.date AS backup_date
185 FROM files
186 INNER JOIN shares ON files.shareID=shares.ID
187 INNER JOIN hosts ON hosts.ID = shares.hostID
188 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
189 $where
190 $limit
191 });
192
193 $sth->execute(@data);
194 $results = $sth->rows;
195
196 if ($results == 0) {
197 print " - no new files\n";
198 last;
199 }
200
201 sub fmt_date {
202 my $t = shift || return;
203 my $iso = BackupPC::Lib::timeStamp($t);
204 $iso =~ s/\s/T/;
205 return $iso;
206 }
207
208 while (my $row = $sth->fetchrow_hashref()) {
209
210 my $fid = $row->{'fid'} || die "no fid?";
211 my $uri = 'file:///' . $fid;
212
213 unless ($skip_check) {
214 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
215 next unless ($id == -1);
216 }
217
218 # create a document object
219 my $doc = HyperEstraier::Document->new;
220
221 # add attributes to the document object
222 $doc->add_attr('@uri', $uri);
223
224 foreach my $c (@{ $sth->{NAME} }) {
225 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
226 }
227
228 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
229
230 # add the body text to the document object
231 my $path = $row->{'filepath'};
232 $doc->add_text($path);
233 $path =~ s/(.)/$1 /g;
234 $doc->add_hidden_text($path);
235
236 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
237
238 # register the document object to the database
239 if ($hest_db) {
240 $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
241 } elsif ($hest_node) {
242 $hest_node->put_doc($doc);
243 } else {
244 die "not supported";
245 }
246 $added++;
247 }
248
249 print " $added";
250 $hest_db->sync() if ($index_path);
251
252 $offset += EST_CHUNK;
253
254 } while ($results == EST_CHUNK);
255
256 if ($index_path) {
257 print ", close";
258 $hest_db->close();
259 }
260
261 my $dur = (time() - $t) || 1;
262 printf(" [%.2f/s dur: %s]\n",
263 ( $added / $dur ),
264 fmt_time($dur)
265 );
266 }
267
268 #---- /subs ----
269
270
271 ## update index ##
272 if (($opt{i} || $opt{j} || ($index_path && ! -e $TopDir . $index_path)) && !$opt{c}) {
273 # update all
274 print "force update of HyperEstraier index ";
275 print "importing existing data" unless (-e $TopDir . $index_path);
276 print "by -i flag" if ($opt{i});
277 print "by -j flag" if ($opt{j});
278 print "\n";
279 hest_update();
280 }
281
282 ## create tables ##
283 if ($opt{c}) {
284 sub do_index {
285 my $index = shift || return;
286 my ($table,$col,$unique) = split(/:/, $index);
287 $unique ||= '';
288 $index =~ s/\W+/_/g;
289 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
290 $dbh->do(qq{ create $unique index $index on $table($col) });
291 }
292
293 print "creating tables...\n";
294
295 $dbh->do( qq{
296 create table hosts (
297 ID SERIAL PRIMARY KEY,
298 name VARCHAR(30) NOT NULL,
299 IP VARCHAR(15)
300 );
301
302 create table shares (
303 ID SERIAL PRIMARY KEY,
304 hostID INTEGER NOT NULL references hosts(id),
305 name VARCHAR(30) NOT NULL,
306 share VARCHAR(200) NOT NULL
307 );
308
309 create table dvds (
310 ID SERIAL PRIMARY KEY,
311 num INTEGER NOT NULL,
312 name VARCHAR(255) NOT NULL,
313 mjesto VARCHAR(255)
314 );
315
316 create table backups (
317 id serial,
318 hostID INTEGER NOT NULL references hosts(id),
319 num INTEGER NOT NULL,
320 date integer NOT NULL,
321 type CHAR(4) not null,
322 shareID integer not null references shares(id),
323 size bigint not null,
324 inc_size bigint not null default -1,
325 inc_deleted boolean default false,
326 parts integer not null default 1,
327 PRIMARY KEY(id)
328 );
329
330 create table files (
331 ID SERIAL,
332 shareID INTEGER NOT NULL references shares(id),
333 backupNum INTEGER NOT NULL,
334 name VARCHAR(255) NOT NULL,
335 path VARCHAR(255) NOT NULL,
336 date integer NOT NULL,
337 type INTEGER NOT NULL,
338 size bigint NOT NULL,
339 primary key(id)
340 );
341
342 create table archive (
343 id serial,
344 dvd_nr int not null,
345 total_size bigint default -1,
346 note text,
347 username varchar(20) not null,
348 date timestamp default now(),
349 primary key(id)
350 );
351
352 create table archive_backup (
353 archive_id int not null references archive(id) on delete cascade,
354 backup_id int not null references backups(id),
355 primary key(archive_id, backup_id)
356 );
357
358 create table archive_burned (
359 archive_id int references archive(id),
360 date timestamp default now(),
361 part int not null default 1,
362 copy int not null default 1,
363 iso_size bigint default -1
364 );
365
366 create table backup_parts (
367 id serial,
368 backup_id int references backups(id),
369 part_nr int not null check (part_nr > 0),
370 tar_size bigint not null check (tar_size > 0),
371 size bigint not null check (size > 0),
372 md5 text not null,
373 items int not null check (items > 0),
374 date timestamp default now(),
375 primary key(id)
376 );
377 });
378
379 print "creating indexes: ";
380
381 foreach my $index (qw(
382 hosts:name
383 backups:hostID
384 backups:num
385 backups:shareID
386 shares:hostID
387 shares:name
388 files:shareID
389 files:path
390 files:name
391 files:date
392 files:size
393 archive:dvd_nr
394 archive_burned:archive_id
395 backup_parts:backup_id,part_nr
396 )) {
397 do_index($index);
398 }
399
400 print " creating sequence: ";
401 foreach my $seq (qw/dvd_nr/) {
402 print "$seq ";
403 $dbh->do( qq{ CREATE SEQUENCE $seq } );
404 }
405
406
407 print "...\n";
408
409 $dbh->commit;
410
411 }
412
413 ## delete data before inseting ##
414 if ($opt{d}) {
415 print "deleting ";
416 foreach my $table (qw(files dvds backups shares hosts)) {
417 print "$table ";
418 $dbh->do(qq{ DELETE FROM $table });
419 }
420 print " done...\n";
421
422 $dbh->commit;
423 }
424
425 ## insert new values ##
426
427 # get hosts
428 $hosts = $bpc->HostInfoRead();
429 my $hostID;
430 my $shareID;
431
432 my $sth;
433
434 $sth->{insert_hosts} = $dbh->prepare(qq{
435 INSERT INTO hosts (name, IP) VALUES (?,?)
436 });
437
438 $sth->{hosts_by_name} = $dbh->prepare(qq{
439 SELECT ID FROM hosts WHERE name=?
440 });
441
442 $sth->{backups_count} = $dbh->prepare(qq{
443 SELECT COUNT(*)
444 FROM backups
445 WHERE hostID=? AND num=? AND shareid=?
446 });
447
448 $sth->{insert_backups} = $dbh->prepare(qq{
449 INSERT INTO backups (hostID, num, date, type, shareid, size)
450 VALUES (?,?,?,?,?,-1)
451 });
452
453 $sth->{update_backups_size} = $dbh->prepare(qq{
454 UPDATE backups SET size = ?
455 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
456 });
457
458 $sth->{insert_files} = $dbh->prepare(qq{
459 INSERT INTO files
460 (shareID, backupNum, name, path, date, type, size)
461 VALUES (?,?,?,?,?,?,?)
462 });
463
464 my @hosts = keys %{$hosts};
465 my $host_nr = 0;
466
467 foreach my $host_key (@hosts) {
468
469 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
470
471 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
472
473 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
474 $sth->{insert_hosts}->execute(
475 $hosts->{$host_key}->{'host'},
476 $hosts->{$host_key}->{'ip'}
477 );
478
479 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
480 }
481
482 $host_nr++;
483 print "host ", $hosts->{$host_key}->{'host'}, " [",
484 $host_nr, "/", ($#hosts + 1), "]: ";
485
486 # get backups for a host
487 my @backups = $bpc->BackupInfoRead($hostname);
488 my $incs = scalar @backups;
489 print "$incs increments\n";
490
491 my $inc_nr = 0;
492 $beenThere = {};
493
494 foreach my $backup (@backups) {
495
496 $inc_nr++;
497 last if ($opt{m} && $inc_nr > $opt{m});
498
499 my $backupNum = $backup->{'num'};
500 my @backupShares = ();
501
502 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
503 $hosts->{$host_key}->{'host'},
504 $inc_nr, $incs, $backupNum,
505 $backup->{type} || '?',
506 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
507 strftime($t_fmt,localtime($backup->{startTime})),
508 fmt_time($backup->{endTime} - $backup->{startTime})
509 );
510
511 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
512 foreach my $share ($files->shareList($backupNum)) {
513
514 my $t = time();
515
516 $shareID = getShareID($share, $hostID, $hostname);
517
518 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
519 my ($count) = $sth->{backups_count}->fetchrow_array();
520 # skip if allready in database!
521 next if ($count > 0);
522
523 # dump some log
524 print curr_time," ", $share;
525
526 $sth->{insert_backups}->execute(
527 $hostID,
528 $backupNum,
529 $backup->{'endTime'},
530 substr($backup->{'type'},0,4),
531 $shareID,
532 );
533
534 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
535
536 $sth->{update_backups_size}->execute(
537 $size,
538 $hostID,
539 $backupNum,
540 $backup->{'endTime'},
541 substr($backup->{'type'},0,4),
542 $shareID,
543 );
544
545 print " commit";
546 $dbh->commit();
547
548 my $dur = (time() - $t) || 1;
549 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
550 $nf, $f, $nd, $d,
551 ($size / 1024 / 1024),
552 ( ($f+$d) / $dur ),
553 fmt_time($dur)
554 );
555
556 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
557 }
558
559 }
560 }
561 undef $sth;
562 $dbh->commit();
563 $dbh->disconnect();
564
565 print "total duration: ",fmt_time(time() - $start_t),"\n";
566
567 $pidfile->remove;
568
569 sub getShareID() {
570
571 my ($share, $hostID, $hostname) = @_;
572
573 $sth->{share_id} ||= $dbh->prepare(qq{
574 SELECT ID FROM shares WHERE hostID=? AND name=?
575 });
576
577 $sth->{share_id}->execute($hostID,$share);
578
579 my ($id) = $sth->{share_id}->fetchrow_array();
580
581 return $id if (defined($id));
582
583 $sth->{insert_share} ||= $dbh->prepare(qq{
584 INSERT INTO shares
585 (hostID,name,share)
586 VALUES (?,?,?)
587 });
588
589 my $drop_down = $hostname . '/' . $share;
590 $drop_down =~ s#//+#/#g;
591
592 $sth->{insert_share}->execute($hostID,$share, $drop_down);
593 return $dbh->last_insert_id(undef,undef,'shares',undef);
594 }
595
596 sub found_in_db {
597
598 my @data = @_;
599 shift @data;
600
601 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
602
603 return $beenThere->{$key} if (defined($beenThere->{$key}));
604
605 $sth->{file_in_db} ||= $dbh->prepare(qq{
606 SELECT 1 FROM files
607 WHERE shareID = ? and
608 path = ? and
609 size = ? and
610 ( date = ? or date = ? or date = ? )
611 LIMIT 1
612 });
613
614 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
615 $sth->{file_in_db}->execute(@param);
616 my $rows = $sth->{file_in_db}->rows;
617 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
618
619 $beenThere->{$key}++;
620
621 $sth->{'insert_files'}->execute(@data) unless ($rows);
622 return $rows;
623 }
624
625 ####################################################
626 # recursing through filesystem structure and #
627 # and returning flattened files list #
628 ####################################################
629 sub recurseDir($$$$$$$$) {
630
631 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
632
633 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
634
635 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
636
637 { # scope
638 my @stack;
639
640 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
641 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
642
643 # first, add all the entries in current directory
644 foreach my $path_key (keys %{$filesInBackup}) {
645 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
646 my @data = (
647 $shareID,
648 $backupNum,
649 $path_key,
650 $filesInBackup->{$path_key}->{'relPath'},
651 $filesInBackup->{$path_key}->{'mtime'},
652 $filesInBackup->{$path_key}->{'type'},
653 $filesInBackup->{$path_key}->{'size'}
654 );
655
656 my $key = join(" ", (
657 $shareID,
658 $dir,
659 $path_key,
660 $filesInBackup->{$path_key}->{'mtime'},
661 $filesInBackup->{$path_key}->{'size'}
662 ));
663
664 my $key_dst_prev = join(" ", (
665 $shareID,
666 $dir,
667 $path_key,
668 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
669 $filesInBackup->{$path_key}->{'size'}
670 ));
671
672 my $key_dst_next = join(" ", (
673 $shareID,
674 $dir,
675 $path_key,
676 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
677 $filesInBackup->{$path_key}->{'size'}
678 ));
679
680 my $found;
681 if (
682 ! defined($beenThere->{$key}) &&
683 ! defined($beenThere->{$key_dst_prev}) &&
684 ! defined($beenThere->{$key_dst_next}) &&
685 ! ($found = found_in_db($key, @data))
686 ) {
687 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
688
689 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
690 $new_dirs++ unless ($found);
691 print STDERR " dir\n" if ($debug >= 2);
692 } else {
693 $new_files++ unless ($found);
694 print STDERR " file\n" if ($debug >= 2);
695 }
696 $size += $filesInBackup->{$path_key}->{'size'} || 0;
697 }
698
699 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
700 $nr_dirs++;
701
702 my $full_path = $dir . '/' . $path_key;
703 push @stack, $full_path;
704 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
705
706 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
707 #
708 # $nr_files += $f;
709 # $new_files += $nf;
710 # $nr_dirs += $d;
711 # $new_dirs += $nd;
712
713 } else {
714 $nr_files++;
715 }
716 }
717
718 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
719
720 while ( my $dir = shift @stack ) {
721 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
722 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
723 $nr_files += $f;
724 $new_files += $nf;
725 $nr_dirs += $d;
726 $new_dirs += $nd;
727 $size += $s;
728 }
729 }
730
731 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
732 }
733

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26