/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 248 - (show annotations)
Fri Dec 9 14:41:13 2005 UTC (18 years, 4 months ago) by dpavlin
File size: 17353 byte(s)
added hostnumber and total number of hosts

1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
15
16 use constant BPC_FTYPE_DIR => 5;
17 use constant EST_CHUNK => 100000;
18
19 # daylight saving time change offset for 1h
20 my $dst_offset = 60 * 60;
21
22 my $debug = 0;
23 $|=1;
24
25 my $start_t = time();
26
27 my $pidfile = new File::Pid;
28
29 if (my $pid = $pidfile->running ) {
30 die "$0 already running: $pid\n";
31 } elsif ($pidfile->pid ne $$) {
32 $pidfile->remove;
33 $pidfile = new File::Pid;
34 }
35 $pidfile->write;
36 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
37
38 my $t_fmt = '%Y-%m-%d %H:%M:%S';
39
40 my $hosts;
41 my $bpc = BackupPC::Lib->new || die;
42 my %Conf = $bpc->Conf();
43 my $TopDir = $bpc->TopDir();
44 my $beenThere = {};
45
46 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
47 my $user = $Conf{SearchUser} || '';
48
49 my $use_hest = $Conf{HyperEstraierIndex};
50 my ($index_path, $index_node_url) = BackupPC::SearchLib::getHyperEstraier_url($use_hest);
51
52 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
53
54 my %opt;
55
56 if ( !getopts("cdm:v:ijf", \%opt ) ) {
57 print STDERR <<EOF;
58 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
59
60 Options:
61 -c create database on first use
62 -d delete database before import
63 -m num import just num increments for one host
64 -v num set verbosity (debug) level (default $debug)
65 -i update Hyper Estraier full text index
66 -j update full text, don't check existing files
67 -f don't do anything with full text index
68
69 Option -j is variation on -i. It will allow faster initial creation
70 of full-text index from existing database.
71
72 Option -f will create database which is out of sync with full text index. You
73 will have to re-run $0 with -i to fix it.
74
75 EOF
76 exit 1;
77 }
78
79 if ($opt{v}) {
80 print "Debug level at $opt{v}\n";
81 $debug = $opt{v};
82 } elsif ($opt{f}) {
83 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
84 ($use_hest, $index_path, $index_node_url) = (undef, undef, undef);
85 }
86
87 #---- subs ----
88
89 sub fmt_time {
90 my $t = shift || return;
91 my $out = "";
92 my ($ss,$mm,$hh) = gmtime($t);
93 $out .= "${hh}h" if ($hh);
94 $out .= sprintf("%02d:%02d", $mm,$ss);
95 return $out;
96 }
97
98 sub curr_time {
99 return strftime($t_fmt,localtime());
100 }
101
102 my $hest_db;
103 my $hest_node;
104
105 sub signal {
106 my($sig) = @_;
107 if ($hest_db) {
108 print "\nCaught a SIG$sig--syncing database and shutting down\n";
109 $hest_db->sync();
110 $hest_db->close();
111 }
112 exit(0);
113 }
114
115 $SIG{'INT'} = \&signal;
116 $SIG{'QUIT'} = \&signal;
117
118 sub hest_update {
119
120 my ($host_id, $share_id, $num) = @_;
121
122 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
123
124 unless (defined($use_hest)) {
125 print STDERR "HyperEstraier support not enabled in configuration\n";
126 $use_hest = 0;
127 return;
128 }
129
130 print curr_time," updating HyperEstraier:";
131
132 my $t = time();
133
134 my $offset = 0;
135 my $added = 0;
136
137 print " opening index $use_hest";
138 if ($index_path) {
139 $hest_db = HyperEstraier::Database->new();
140 $hest_db->open($TopDir . $index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
141 print " directly";
142 } elsif ($index_node_url) {
143 $hest_node ||= HyperEstraier::Node->new($index_node_url);
144 $hest_node->set_auth('admin', 'admin');
145 print " via node URL";
146 } else {
147 die "don't know how to use HyperEstraier Index $use_hest";
148 }
149 print " increment is " . EST_CHUNK . " files:";
150
151 my $results = 0;
152
153 do {
154
155 my $where = '';
156 my @data;
157 if (defined($host_id) && defined($share_id) && defined($num)) {
158 $where = qq{
159 WHERE
160 hosts.id = ? AND
161 shares.id = ? AND
162 files.backupnum = ?
163 };
164 @data = ( $host_id, $share_id, $num );
165 }
166
167 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
168
169 my $sth = $dbh->prepare(qq{
170 SELECT
171 files.id AS fid,
172 hosts.name AS hname,
173 shares.name AS sname,
174 -- shares.share AS sharename,
175 files.backupnum AS backupnum,
176 -- files.name AS filename,
177 files.path AS filepath,
178 files.date AS date,
179 files.type AS type,
180 files.size AS size,
181 files.shareid AS shareid,
182 backups.date AS backup_date
183 FROM files
184 INNER JOIN shares ON files.shareID=shares.ID
185 INNER JOIN hosts ON hosts.ID = shares.hostID
186 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
187 $where
188 $limit
189 });
190
191 $sth->execute(@data);
192 $results = $sth->rows;
193
194 if ($results == 0) {
195 print " - no new files\n";
196 last;
197 }
198
199 sub fmt_date {
200 my $t = shift || return;
201 my $iso = BackupPC::Lib::timeStamp($t);
202 $iso =~ s/\s/T/;
203 return $iso;
204 }
205
206 while (my $row = $sth->fetchrow_hashref()) {
207
208 my $fid = $row->{'fid'} || die "no fid?";
209 my $uri = 'file:///' . $fid;
210
211 unless ($skip_check) {
212 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
213 next unless ($id == -1);
214 }
215
216 # create a document object
217 my $doc = HyperEstraier::Document->new;
218
219 # add attributes to the document object
220 $doc->add_attr('@uri', $uri);
221
222 foreach my $c (@{ $sth->{NAME} }) {
223 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
224 }
225
226 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
227
228 # add the body text to the document object
229 my $path = $row->{'filepath'};
230 $doc->add_text($path);
231 $path =~ s/(.)/$1 /g;
232 $doc->add_hidden_text($path);
233
234 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
235
236 # register the document object to the database
237 if ($hest_db) {
238 $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
239 } elsif ($hest_node) {
240 $hest_node->put_doc($doc);
241 } else {
242 die "not supported";
243 }
244 $added++;
245 }
246
247 print " $added";
248 $hest_db->sync() if ($index_path);
249
250 $offset += EST_CHUNK;
251
252 } while ($results == EST_CHUNK);
253
254 if ($index_path) {
255 print ", close";
256 $hest_db->close();
257 }
258
259 my $dur = (time() - $t) || 1;
260 printf(" [%.2f/s dur: %s]\n",
261 ( $added / $dur ),
262 fmt_time($dur)
263 );
264 }
265
266 #---- /subs ----
267
268
269 ## update index ##
270 if (($opt{i} || $opt{j} || ($index_path && ! -e $TopDir . $index_path)) && !$opt{c}) {
271 # update all
272 print "force update of HyperEstraier index ";
273 print "importing existing data" unless (-e $TopDir . $index_path);
274 print "by -i flag" if ($opt{i});
275 print "by -j flag" if ($opt{j});
276 print "\n";
277 hest_update();
278 }
279
280 ## create tables ##
281 if ($opt{c}) {
282 sub do_index {
283 my $index = shift || return;
284 my ($table,$col,$unique) = split(/:/, $index);
285 $unique ||= '';
286 $index =~ s/\W+/_/g;
287 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
288 $dbh->do(qq{ create $unique index $index on $table($col) });
289 }
290
291 print "creating tables...\n";
292
293 $dbh->do( qq{
294 create table hosts (
295 ID SERIAL PRIMARY KEY,
296 name VARCHAR(30) NOT NULL,
297 IP VARCHAR(15)
298 );
299
300 create table shares (
301 ID SERIAL PRIMARY KEY,
302 hostID INTEGER NOT NULL references hosts(id),
303 name VARCHAR(30) NOT NULL,
304 share VARCHAR(200) NOT NULL
305 );
306
307 create table dvds (
308 ID SERIAL PRIMARY KEY,
309 num INTEGER NOT NULL,
310 name VARCHAR(255) NOT NULL,
311 mjesto VARCHAR(255)
312 );
313
314 create table backups (
315 id serial,
316 hostID INTEGER NOT NULL references hosts(id),
317 num INTEGER NOT NULL,
318 date integer NOT NULL,
319 type CHAR(4) not null,
320 shareID integer not null references shares(id),
321 size bigint not null,
322 inc_size bigint not null default -1,
323 inc_deleted boolean default false,
324 parts integer not null default 1,
325 PRIMARY KEY(id)
326 );
327
328 create table files (
329 ID SERIAL,
330 shareID INTEGER NOT NULL references shares(id),
331 backupNum INTEGER NOT NULL,
332 name VARCHAR(255) NOT NULL,
333 path VARCHAR(255) NOT NULL,
334 date integer NOT NULL,
335 type INTEGER NOT NULL,
336 size bigint NOT NULL,
337 primary key(id)
338 );
339
340 create table archive (
341 id serial,
342 dvd_nr int not null,
343 total_size bigint default -1,
344 note text,
345 username varchar(20) not null,
346 date timestamp default now(),
347 primary key(id)
348 );
349
350 create table archive_backup (
351 archive_id int not null references archive(id) on delete cascade,
352 backup_id int not null references backups(id),
353 primary key(archive_id, backup_id)
354 );
355
356 create table archive_burned (
357 archive_id int references archive(id),
358 date timestamp default now(),
359 part int not null default 1,
360 copy int not null default 1,
361 iso_size bigint default -1
362 );
363
364 create table backup_parts (
365 id serial,
366 backup_id int references backups(id),
367 part_nr int not null check (part_nr > 0),
368 tar_size bigint not null check (tar_size > 0),
369 size bigint not null check (size > 0),
370 md5 text not null,
371 items int not null check (items > 0),
372 date timestamp default now(),
373 primary key(id)
374 );
375 });
376
377 print "creating indexes: ";
378
379 foreach my $index (qw(
380 hosts:name
381 backups:hostID
382 backups:num
383 backups:shareID
384 shares:hostID
385 shares:name
386 files:shareID
387 files:path
388 files:name
389 files:date
390 files:size
391 archive:dvd_nr
392 archive_burned:archive_id
393 backup_parts:backup_id,part_nr
394 )) {
395 do_index($index);
396 }
397
398 print " creating sequence: ";
399 foreach my $seq (qw/dvd_nr/) {
400 print "$seq ";
401 $dbh->do( qq{ CREATE SEQUENCE $seq } );
402 }
403
404
405 print "...\n";
406
407 $dbh->commit;
408
409 }
410
411 ## delete data before inseting ##
412 if ($opt{d}) {
413 print "deleting ";
414 foreach my $table (qw(files dvds backups shares hosts)) {
415 print "$table ";
416 $dbh->do(qq{ DELETE FROM $table });
417 }
418 print " done...\n";
419
420 $dbh->commit;
421 }
422
423 ## insert new values ##
424
425 # get hosts
426 $hosts = $bpc->HostInfoRead();
427 my $hostID;
428 my $shareID;
429
430 my $sth;
431
432 $sth->{insert_hosts} = $dbh->prepare(qq{
433 INSERT INTO hosts (name, IP) VALUES (?,?)
434 });
435
436 $sth->{hosts_by_name} = $dbh->prepare(qq{
437 SELECT ID FROM hosts WHERE name=?
438 });
439
440 $sth->{backups_count} = $dbh->prepare(qq{
441 SELECT COUNT(*)
442 FROM backups
443 WHERE hostID=? AND num=? AND shareid=?
444 });
445
446 $sth->{insert_backups} = $dbh->prepare(qq{
447 INSERT INTO backups (hostID, num, date, type, shareid, size)
448 VALUES (?,?,?,?,?,-1)
449 });
450
451 $sth->{update_backups_size} = $dbh->prepare(qq{
452 UPDATE backups SET size = ?
453 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
454 });
455
456 $sth->{insert_files} = $dbh->prepare(qq{
457 INSERT INTO files
458 (shareID, backupNum, name, path, date, type, size)
459 VALUES (?,?,?,?,?,?,?)
460 });
461
462 my @hosts = keys %{$hosts};
463 my $host_nr = 0;
464
465 foreach my $host_key (@hosts) {
466
467 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
468
469 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
470
471 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
472 $sth->{insert_hosts}->execute(
473 $hosts->{$host_key}->{'host'},
474 $hosts->{$host_key}->{'ip'}
475 );
476
477 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
478 }
479
480 $host_nr++;
481 print "host ", $hosts->{$host_key}->{'host'}, " [",
482 $host_nr, "/", ($#hosts + 1), "]: ";
483
484 # get backups for a host
485 my @backups = $bpc->BackupInfoRead($hostname);
486 my $incs = scalar @backups;
487 print "$incs increments\n";
488
489 my $inc_nr = 0;
490 $beenThere = {};
491
492 foreach my $backup (@backups) {
493
494 $inc_nr++;
495 last if ($opt{m} && $inc_nr > $opt{m});
496
497 my $backupNum = $backup->{'num'};
498 my @backupShares = ();
499
500 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
501 $hosts->{$host_key}->{'host'},
502 $inc_nr, $incs, $backupNum,
503 $backup->{type} || '?',
504 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
505 strftime($t_fmt,localtime($backup->{startTime})),
506 fmt_time($backup->{endTime} - $backup->{startTime})
507 );
508
509 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
510 foreach my $share ($files->shareList($backupNum)) {
511
512 my $t = time();
513
514 $shareID = getShareID($share, $hostID, $hostname);
515
516 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
517 my ($count) = $sth->{backups_count}->fetchrow_array();
518 # skip if allready in database!
519 next if ($count > 0);
520
521 # dump some log
522 print curr_time," ", $share;
523
524 $sth->{insert_backups}->execute(
525 $hostID,
526 $backupNum,
527 $backup->{'endTime'},
528 substr($backup->{'type'},0,4),
529 $shareID,
530 );
531
532 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
533
534 $sth->{update_backups_size}->execute(
535 $size,
536 $hostID,
537 $backupNum,
538 $backup->{'endTime'},
539 substr($backup->{'type'},0,4),
540 $shareID,
541 );
542
543 print " commit";
544 $dbh->commit();
545
546 my $dur = (time() - $t) || 1;
547 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
548 $nf, $f, $nd, $d,
549 ($size / 1024 / 1024),
550 ( ($f+$d) / $dur ),
551 fmt_time($dur)
552 );
553
554 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
555 }
556
557 }
558 }
559 undef $sth;
560 $dbh->commit();
561 $dbh->disconnect();
562
563 print "total duration: ",fmt_time(time() - $start_t),"\n";
564
565 $pidfile->remove;
566
567 sub getShareID() {
568
569 my ($share, $hostID, $hostname) = @_;
570
571 $sth->{share_id} ||= $dbh->prepare(qq{
572 SELECT ID FROM shares WHERE hostID=? AND name=?
573 });
574
575 $sth->{share_id}->execute($hostID,$share);
576
577 my ($id) = $sth->{share_id}->fetchrow_array();
578
579 return $id if (defined($id));
580
581 $sth->{insert_share} ||= $dbh->prepare(qq{
582 INSERT INTO shares
583 (hostID,name,share)
584 VALUES (?,?,?)
585 });
586
587 my $drop_down = $hostname . '/' . $share;
588 $drop_down =~ s#//+#/#g;
589
590 $sth->{insert_share}->execute($hostID,$share, $drop_down);
591 return $dbh->last_insert_id(undef,undef,'shares',undef);
592 }
593
594 sub found_in_db {
595
596 my @data = @_;
597 shift @data;
598
599 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
600
601 return $beenThere->{$key} if (defined($beenThere->{$key}));
602
603 $sth->{file_in_db} ||= $dbh->prepare(qq{
604 SELECT 1 FROM files
605 WHERE shareID = ? and
606 path = ? and
607 size = ? and
608 ( date = ? or date = ? or date = ? )
609 LIMIT 1
610 });
611
612 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
613 $sth->{file_in_db}->execute(@param);
614 my $rows = $sth->{file_in_db}->rows;
615 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
616
617 $beenThere->{$key}++;
618
619 $sth->{'insert_files'}->execute(@data) unless ($rows);
620 return $rows;
621 }
622
623 ####################################################
624 # recursing through filesystem structure and #
625 # and returning flattened files list #
626 ####################################################
627 sub recurseDir($$$$$$$$) {
628
629 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
630
631 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
632
633 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
634
635 { # scope
636 my @stack;
637
638 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
639 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
640
641 # first, add all the entries in current directory
642 foreach my $path_key (keys %{$filesInBackup}) {
643 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
644 my @data = (
645 $shareID,
646 $backupNum,
647 $path_key,
648 $filesInBackup->{$path_key}->{'relPath'},
649 $filesInBackup->{$path_key}->{'mtime'},
650 $filesInBackup->{$path_key}->{'type'},
651 $filesInBackup->{$path_key}->{'size'}
652 );
653
654 my $key = join(" ", (
655 $shareID,
656 $dir,
657 $path_key,
658 $filesInBackup->{$path_key}->{'mtime'},
659 $filesInBackup->{$path_key}->{'size'}
660 ));
661
662 my $key_dst_prev = join(" ", (
663 $shareID,
664 $dir,
665 $path_key,
666 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
667 $filesInBackup->{$path_key}->{'size'}
668 ));
669
670 my $key_dst_next = join(" ", (
671 $shareID,
672 $dir,
673 $path_key,
674 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
675 $filesInBackup->{$path_key}->{'size'}
676 ));
677
678 my $found;
679 if (
680 ! defined($beenThere->{$key}) &&
681 ! defined($beenThere->{$key_dst_prev}) &&
682 ! defined($beenThere->{$key_dst_next}) &&
683 ! ($found = found_in_db($key, @data))
684 ) {
685 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
686
687 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
688 $new_dirs++ unless ($found);
689 print STDERR " dir\n" if ($debug >= 2);
690 } else {
691 $new_files++ unless ($found);
692 print STDERR " file\n" if ($debug >= 2);
693 }
694 $size += $filesInBackup->{$path_key}->{'size'} || 0;
695 }
696
697 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
698 $nr_dirs++;
699
700 my $full_path = $dir . '/' . $path_key;
701 push @stack, $full_path;
702 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
703
704 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
705 #
706 # $nr_files += $f;
707 # $new_files += $nf;
708 # $nr_dirs += $d;
709 # $new_dirs += $nd;
710
711 } else {
712 $nr_files++;
713 }
714 }
715
716 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
717
718 while ( my $dir = shift @stack ) {
719 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
720 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
721 $nr_files += $f;
722 $new_files += $nf;
723 $nr_dirs += $d;
724 $new_dirs += $nd;
725 $size += $s;
726 }
727 }
728
729 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
730 }
731

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26