/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 246 - (show annotations)
Fri Dec 9 12:49:55 2005 UTC (18 years, 5 months ago) by dpavlin
File size: 17234 byte(s)
check daylight saving time offset when reading from database too.

1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
15
16 use constant BPC_FTYPE_DIR => 5;
17 use constant EST_CHUNK => 100000;
18
19 # daylight saving time change offset for 1h
20 my $dst_offset = 60 * 60;
21
22 my $debug = 0;
23 $|=1;
24
25 my $start_t = time();
26
27 my $pidfile = new File::Pid;
28
29 if (my $pid = $pidfile->running ) {
30 die "$0 already running: $pid\n";
31 } elsif ($pidfile->pid ne $$) {
32 $pidfile->remove;
33 $pidfile = new File::Pid;
34 }
35 $pidfile->write;
36 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
37
38 my $t_fmt = '%Y-%m-%d %H:%M:%S';
39
40 my $hosts;
41 my $bpc = BackupPC::Lib->new || die;
42 my %Conf = $bpc->Conf();
43 my $TopDir = $bpc->TopDir();
44 my $beenThere = {};
45
46 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
47 my $user = $Conf{SearchUser} || '';
48
49 my $use_hest = $Conf{HyperEstraierIndex};
50 my ($index_path, $index_node_url) = BackupPC::SearchLib::getHyperEstraier_url($use_hest);
51
52 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
53
54 my %opt;
55
56 if ( !getopts("cdm:v:ijf", \%opt ) ) {
57 print STDERR <<EOF;
58 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
59
60 Options:
61 -c create database on first use
62 -d delete database before import
63 -m num import just num increments for one host
64 -v num set verbosity (debug) level (default $debug)
65 -i update Hyper Estraier full text index
66 -j update full text, don't check existing files
67 -f don't do anything with full text index
68
69 Option -j is variation on -i. It will allow faster initial creation
70 of full-text index from existing database.
71
72 Option -f will create database which is out of sync with full text index. You
73 will have to re-run $0 with -i to fix it.
74
75 EOF
76 exit 1;
77 }
78
79 if ($opt{v}) {
80 print "Debug level at $opt{v}\n";
81 $debug = $opt{v};
82 } elsif ($opt{f}) {
83 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
84 ($use_hest, $index_path, $index_node_url) = (undef, undef, undef);
85 }
86
87 #---- subs ----
88
89 sub fmt_time {
90 my $t = shift || return;
91 my $out = "";
92 my ($ss,$mm,$hh) = gmtime($t);
93 $out .= "${hh}h" if ($hh);
94 $out .= sprintf("%02d:%02d", $mm,$ss);
95 return $out;
96 }
97
98 sub curr_time {
99 return strftime($t_fmt,localtime());
100 }
101
102 my $hest_db;
103 my $hest_node;
104
105 sub signal {
106 my($sig) = @_;
107 if ($hest_db) {
108 print "\nCaught a SIG$sig--syncing database and shutting down\n";
109 $hest_db->sync();
110 $hest_db->close();
111 }
112 exit(0);
113 }
114
115 $SIG{'INT'} = \&signal;
116 $SIG{'QUIT'} = \&signal;
117
118 sub hest_update {
119
120 my ($host_id, $share_id, $num) = @_;
121
122 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
123
124 unless ($use_hest) {
125 print STDERR "HyperEstraier support not enabled in configuration\n";
126 return;
127 }
128
129 print curr_time," updating HyperEstraier:";
130
131 my $t = time();
132
133 my $offset = 0;
134 my $added = 0;
135
136 print " opening index $use_hest";
137 if ($index_path) {
138 $hest_db = HyperEstraier::Database->new();
139 $hest_db->open($TopDir . $index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
140 print " directly";
141 } elsif ($index_node_url) {
142 $hest_node ||= HyperEstraier::Node->new($index_node_url);
143 $hest_node->set_auth('admin', 'admin');
144 print " via node URL";
145 } else {
146 die "don't know how to use HyperEstraier Index $use_hest";
147 }
148 print " increment is " . EST_CHUNK . " files:";
149
150 my $results = 0;
151
152 do {
153
154 my $where = '';
155 my @data;
156 if (defined($host_id) && defined($share_id) && defined($num)) {
157 $where = qq{
158 WHERE
159 hosts.id = ? AND
160 shares.id = ? AND
161 files.backupnum = ?
162 };
163 @data = ( $host_id, $share_id, $num );
164 }
165
166 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
167
168 my $sth = $dbh->prepare(qq{
169 SELECT
170 files.id AS fid,
171 hosts.name AS hname,
172 shares.name AS sname,
173 -- shares.share AS sharename,
174 files.backupnum AS backupnum,
175 -- files.name AS filename,
176 files.path AS filepath,
177 files.date AS date,
178 files.type AS type,
179 files.size AS size,
180 files.shareid AS shareid,
181 backups.date AS backup_date
182 FROM files
183 INNER JOIN shares ON files.shareID=shares.ID
184 INNER JOIN hosts ON hosts.ID = shares.hostID
185 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
186 $where
187 $limit
188 });
189
190 $sth->execute(@data);
191 $results = $sth->rows;
192
193 if ($results == 0) {
194 print " - no new files\n";
195 last;
196 }
197
198 sub fmt_date {
199 my $t = shift || return;
200 my $iso = BackupPC::Lib::timeStamp($t);
201 $iso =~ s/\s/T/;
202 return $iso;
203 }
204
205 while (my $row = $sth->fetchrow_hashref()) {
206
207 my $fid = $row->{'fid'} || die "no fid?";
208 my $uri = 'file:///' . $fid;
209
210 unless ($skip_check) {
211 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
212 next unless ($id == -1);
213 }
214
215 # create a document object
216 my $doc = HyperEstraier::Document->new;
217
218 # add attributes to the document object
219 $doc->add_attr('@uri', $uri);
220
221 foreach my $c (@{ $sth->{NAME} }) {
222 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
223 }
224
225 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
226
227 # add the body text to the document object
228 my $path = $row->{'filepath'};
229 $doc->add_text($path);
230 $path =~ s/(.)/$1 /g;
231 $doc->add_hidden_text($path);
232
233 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
234
235 # register the document object to the database
236 if ($hest_db) {
237 $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
238 } elsif ($hest_node) {
239 $hest_node->put_doc($doc);
240 } else {
241 die "not supported";
242 }
243 $added++;
244 }
245
246 print " $added";
247 $hest_db->sync() if ($index_path);
248
249 $offset += EST_CHUNK;
250
251 } while ($results == EST_CHUNK);
252
253 if ($index_path) {
254 print ", close";
255 $hest_db->close();
256 }
257
258 my $dur = (time() - $t) || 1;
259 printf(" [%.2f/s dur: %s]\n",
260 ( $added / $dur ),
261 fmt_time($dur)
262 );
263 }
264
265 #---- /subs ----
266
267
268 ## update index ##
269 if (($opt{i} || $opt{j} || ($index_path && ! -e $TopDir . $index_path)) && !$opt{c}) {
270 # update all
271 print "force update of HyperEstraier index ";
272 print "importing existing data" unless (-e $TopDir . $index_path);
273 print "by -i flag" if ($opt{i});
274 print "by -j flag" if ($opt{j});
275 print "\n";
276 hest_update();
277 }
278
279 ## create tables ##
280 if ($opt{c}) {
281 sub do_index {
282 my $index = shift || return;
283 my ($table,$col,$unique) = split(/:/, $index);
284 $unique ||= '';
285 $index =~ s/\W+/_/g;
286 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
287 $dbh->do(qq{ create $unique index $index on $table($col) });
288 }
289
290 print "creating tables...\n";
291
292 $dbh->do( qq{
293 create table hosts (
294 ID SERIAL PRIMARY KEY,
295 name VARCHAR(30) NOT NULL,
296 IP VARCHAR(15)
297 );
298
299 create table shares (
300 ID SERIAL PRIMARY KEY,
301 hostID INTEGER NOT NULL references hosts(id),
302 name VARCHAR(30) NOT NULL,
303 share VARCHAR(200) NOT NULL
304 );
305
306 create table dvds (
307 ID SERIAL PRIMARY KEY,
308 num INTEGER NOT NULL,
309 name VARCHAR(255) NOT NULL,
310 mjesto VARCHAR(255)
311 );
312
313 create table backups (
314 id serial,
315 hostID INTEGER NOT NULL references hosts(id),
316 num INTEGER NOT NULL,
317 date integer NOT NULL,
318 type CHAR(4) not null,
319 shareID integer not null references shares(id),
320 size bigint not null,
321 inc_size bigint not null default -1,
322 inc_deleted boolean default false,
323 parts integer not null default 1,
324 PRIMARY KEY(id)
325 );
326
327 create table files (
328 ID SERIAL,
329 shareID INTEGER NOT NULL references shares(id),
330 backupNum INTEGER NOT NULL,
331 name VARCHAR(255) NOT NULL,
332 path VARCHAR(255) NOT NULL,
333 date integer NOT NULL,
334 type INTEGER NOT NULL,
335 size bigint NOT NULL,
336 primary key(id)
337 );
338
339 create table archive (
340 id serial,
341 dvd_nr int not null,
342 total_size bigint default -1,
343 note text,
344 username varchar(20) not null,
345 date timestamp default now(),
346 primary key(id)
347 );
348
349 create table archive_backup (
350 archive_id int not null references archive(id) on delete cascade,
351 backup_id int not null references backups(id),
352 primary key(archive_id, backup_id)
353 );
354
355 create table archive_burned (
356 archive_id int references archive(id),
357 date timestamp default now(),
358 part int not null default 1,
359 copy int not null default 1,
360 iso_size bigint default -1
361 );
362
363 create table backup_parts (
364 id serial,
365 backup_id int references backups(id),
366 part_nr int not null check (part_nr > 0),
367 tar_size bigint not null check (tar_size > 0),
368 size bigint not null check (size > 0),
369 md5 text not null,
370 items int not null check (items > 0),
371 date timestamp default now(),
372 primary key(id)
373 );
374 });
375
376 print "creating indexes: ";
377
378 foreach my $index (qw(
379 hosts:name
380 backups:hostID
381 backups:num
382 backups:shareID
383 shares:hostID
384 shares:name
385 files:shareID
386 files:path
387 files:name
388 files:date
389 files:size
390 archive:dvd_nr
391 archive_burned:archive_id
392 backup_parts:backup_id,part_nr
393 )) {
394 do_index($index);
395 }
396
397 print " creating sequence: ";
398 foreach my $seq (qw/dvd_nr/) {
399 print "$seq ";
400 $dbh->do( qq{ CREATE SEQUENCE $seq } );
401 }
402
403
404 print "...\n";
405
406 $dbh->commit;
407
408 }
409
410 ## delete data before inseting ##
411 if ($opt{d}) {
412 print "deleting ";
413 foreach my $table (qw(files dvds backups shares hosts)) {
414 print "$table ";
415 $dbh->do(qq{ DELETE FROM $table });
416 }
417 print " done...\n";
418
419 $dbh->commit;
420 }
421
422 ## insert new values ##
423
424 # get hosts
425 $hosts = $bpc->HostInfoRead();
426 my $hostID;
427 my $shareID;
428
429 my $sth;
430
431 $sth->{insert_hosts} = $dbh->prepare(qq{
432 INSERT INTO hosts (name, IP) VALUES (?,?)
433 });
434
435 $sth->{hosts_by_name} = $dbh->prepare(qq{
436 SELECT ID FROM hosts WHERE name=?
437 });
438
439 $sth->{backups_count} = $dbh->prepare(qq{
440 SELECT COUNT(*)
441 FROM backups
442 WHERE hostID=? AND num=? AND shareid=?
443 });
444
445 $sth->{insert_backups} = $dbh->prepare(qq{
446 INSERT INTO backups (hostID, num, date, type, shareid, size)
447 VALUES (?,?,?,?,?,-1)
448 });
449
450 $sth->{update_backups_size} = $dbh->prepare(qq{
451 UPDATE backups SET size = ?
452 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
453 });
454
455 $sth->{insert_files} = $dbh->prepare(qq{
456 INSERT INTO files
457 (shareID, backupNum, name, path, date, type, size)
458 VALUES (?,?,?,?,?,?,?)
459 });
460
461 foreach my $host_key (keys %{$hosts}) {
462
463 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
464
465 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
466
467 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
468 $sth->{insert_hosts}->execute(
469 $hosts->{$host_key}->{'host'},
470 $hosts->{$host_key}->{'ip'}
471 );
472
473 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
474 }
475
476 print "host ".$hosts->{$host_key}->{'host'}.": ";
477
478 # get backups for a host
479 my @backups = $bpc->BackupInfoRead($hostname);
480 my $incs = scalar @backups;
481 print "$incs increments\n";
482
483 my $inc_nr = 0;
484 $beenThere = {};
485
486 foreach my $backup (@backups) {
487
488 $inc_nr++;
489 last if ($opt{m} && $inc_nr > $opt{m});
490
491 my $backupNum = $backup->{'num'};
492 my @backupShares = ();
493
494 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
495 $hosts->{$host_key}->{'host'},
496 $inc_nr, $incs, $backupNum,
497 $backup->{type} || '?',
498 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
499 strftime($t_fmt,localtime($backup->{startTime})),
500 fmt_time($backup->{endTime} - $backup->{startTime})
501 );
502
503 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
504 foreach my $share ($files->shareList($backupNum)) {
505
506 my $t = time();
507
508 $shareID = getShareID($share, $hostID, $hostname);
509
510 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
511 my ($count) = $sth->{backups_count}->fetchrow_array();
512 # skip if allready in database!
513 next if ($count > 0);
514
515 # dump some log
516 print curr_time," ", $share;
517
518 $sth->{insert_backups}->execute(
519 $hostID,
520 $backupNum,
521 $backup->{'endTime'},
522 substr($backup->{'type'},0,4),
523 $shareID,
524 );
525
526 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
527
528 $sth->{update_backups_size}->execute(
529 $size,
530 $hostID,
531 $backupNum,
532 $backup->{'endTime'},
533 substr($backup->{'type'},0,4),
534 $shareID,
535 );
536
537 print " commit";
538 $dbh->commit();
539
540 my $dur = (time() - $t) || 1;
541 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
542 $nf, $f, $nd, $d,
543 ($size / 1024 / 1024),
544 ( ($f+$d) / $dur ),
545 fmt_time($dur)
546 );
547
548 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
549 }
550
551 }
552 }
553 undef $sth;
554 $dbh->commit();
555 $dbh->disconnect();
556
557 print "total duration: ",fmt_time(time() - $start_t),"\n";
558
559 $pidfile->remove;
560
561 sub getShareID() {
562
563 my ($share, $hostID, $hostname) = @_;
564
565 $sth->{share_id} ||= $dbh->prepare(qq{
566 SELECT ID FROM shares WHERE hostID=? AND name=?
567 });
568
569 $sth->{share_id}->execute($hostID,$share);
570
571 my ($id) = $sth->{share_id}->fetchrow_array();
572
573 return $id if (defined($id));
574
575 $sth->{insert_share} ||= $dbh->prepare(qq{
576 INSERT INTO shares
577 (hostID,name,share)
578 VALUES (?,?,?)
579 });
580
581 my $drop_down = $hostname . '/' . $share;
582 $drop_down =~ s#//+#/#g;
583
584 $sth->{insert_share}->execute($hostID,$share, $drop_down);
585 return $dbh->last_insert_id(undef,undef,'shares',undef);
586 }
587
588 sub found_in_db {
589
590 my @data = @_;
591 shift @data;
592
593 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
594
595 return $beenThere->{$key} if (defined($beenThere->{$key}));
596
597 $sth->{file_in_db} ||= $dbh->prepare(qq{
598 SELECT 1 FROM files
599 WHERE shareID = ? and
600 path = ? and
601 size = ? and
602 ( date = ? or date = ? or date = ? )
603 LIMIT 1
604 });
605
606 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
607 $sth->{file_in_db}->execute(@param);
608 my $rows = $sth->{file_in_db}->rows;
609 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
610
611 $beenThere->{$key}++;
612
613 $sth->{'insert_files'}->execute(@data) unless ($rows);
614 return $rows;
615 }
616
617 ####################################################
618 # recursing through filesystem structure and #
619 # and returning flattened files list #
620 ####################################################
621 sub recurseDir($$$$$$$$) {
622
623 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
624
625 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
626
627 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
628
629 { # scope
630 my @stack;
631
632 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
633 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
634
635 # first, add all the entries in current directory
636 foreach my $path_key (keys %{$filesInBackup}) {
637 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
638 my @data = (
639 $shareID,
640 $backupNum,
641 $path_key,
642 $filesInBackup->{$path_key}->{'relPath'},
643 $filesInBackup->{$path_key}->{'mtime'},
644 $filesInBackup->{$path_key}->{'type'},
645 $filesInBackup->{$path_key}->{'size'}
646 );
647
648 my $key = join(" ", (
649 $shareID,
650 $dir,
651 $path_key,
652 $filesInBackup->{$path_key}->{'mtime'},
653 $filesInBackup->{$path_key}->{'size'}
654 ));
655
656 my $key_dst_prev = join(" ", (
657 $shareID,
658 $dir,
659 $path_key,
660 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
661 $filesInBackup->{$path_key}->{'size'}
662 ));
663
664 my $key_dst_next = join(" ", (
665 $shareID,
666 $dir,
667 $path_key,
668 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
669 $filesInBackup->{$path_key}->{'size'}
670 ));
671
672 my $found;
673 if (
674 ! defined($beenThere->{$key}) &&
675 ! defined($beenThere->{$key_dst_prev}) &&
676 ! defined($beenThere->{$key_dst_next}) &&
677 ! ($found = found_in_db($key, @data))
678 ) {
679 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
680
681 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
682 $new_dirs++ unless ($found);
683 print STDERR " dir\n" if ($debug >= 2);
684 } else {
685 $new_files++ unless ($found);
686 print STDERR " file\n" if ($debug >= 2);
687 }
688 $size += $filesInBackup->{$path_key}->{'size'} || 0;
689 }
690
691 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
692 $nr_dirs++;
693
694 my $full_path = $dir . '/' . $path_key;
695 push @stack, $full_path;
696 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
697
698 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
699 #
700 # $nr_files += $f;
701 # $new_files += $nf;
702 # $nr_dirs += $d;
703 # $new_dirs += $nd;
704
705 } else {
706 $nr_files++;
707 }
708 }
709
710 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
711
712 while ( my $dir = shift @stack ) {
713 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
714 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
715 $nr_files += $f;
716 $new_files += $nf;
717 $nr_dirs += $d;
718 $new_dirs += $nd;
719 $size += $s;
720 }
721 }
722
723 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
724 }
725

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26