/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 311 - (show annotations)
Sun Jan 29 14:54:17 2006 UTC (18 years, 4 months ago) by dpavlin
File size: 17122 byte(s)
added -q option to supress output for hosts which aren't changed
(useful for invocation from cron -- this produces report of hosts
processed)

1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
15 use Cwd qw/abs_path/;
16
17 use constant BPC_FTYPE_DIR => 5;
18 use constant EST_CHUNK => 100000;
19
20 # daylight saving time change offset for 1h
21 my $dst_offset = 60 * 60;
22
23 my $debug = 0;
24 $|=1;
25
26 my $start_t = time();
27
28 my $pid_path = abs_path($0);
29 $pid_path =~ s/\W+/_/g;
30
31 my $pidfile = new File::Pid({
32 file => "/tmp/$pid_path",
33 });
34
35 if (my $pid = $pidfile->running ) {
36 die "$0 already running: $pid\n";
37 } elsif ($pidfile->pid ne $$) {
38 $pidfile->remove;
39 $pidfile = new File::Pid;
40 }
41 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
42 $pidfile->write;
43
44 my $t_fmt = '%Y-%m-%d %H:%M:%S';
45
46 my $hosts;
47 my $bpc = BackupPC::Lib->new || die;
48 my %Conf = $bpc->Conf();
49 my $TopDir = $bpc->TopDir();
50 my $beenThere = {};
51
52 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
53 my $user = $Conf{SearchUser} || '';
54
55 my $index_node_url = $Conf{HyperEstraierIndex};
56
57 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
58
59 my %opt;
60
61 if ( !getopts("cdm:v:ijfq", \%opt ) ) {
62 print STDERR <<EOF;
63 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
64
65 Options:
66 -c create database on first use
67 -d delete database before import
68 -m num import just num increments for one host
69 -v num set verbosity (debug) level (default $debug)
70 -i update Hyper Estraier full text index
71 -j update full text, don't check existing files
72 -f don't do anything with full text index
73 -q be quiet for hosts without changes
74
75 Option -j is variation on -i. It will allow faster initial creation
76 of full-text index from existing database.
77
78 Option -f will create database which is out of sync with full text index. You
79 will have to re-run $0 with -i to fix it.
80
81 EOF
82 exit 1;
83 }
84
85 if ($opt{v}) {
86 print "Debug level at $opt{v}\n";
87 $debug = $opt{v};
88 } elsif ($opt{f}) {
89 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
90 $index_node_url = undef;
91 }
92
93 #---- subs ----
94
95 sub fmt_time {
96 my $t = shift || return;
97 my $out = "";
98 my ($ss,$mm,$hh) = gmtime($t);
99 $out .= "${hh}h" if ($hh);
100 $out .= sprintf("%02d:%02d", $mm,$ss);
101 return $out;
102 }
103
104 sub curr_time {
105 return strftime($t_fmt,localtime());
106 }
107
108 my $hest_node;
109
110 sub hest_update {
111
112 my ($host_id, $share_id, $num) = @_;
113
114 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
115
116 unless (defined($index_node_url)) {
117 print STDERR "HyperEstraier support not enabled in configuration\n";
118 $index_node_url = 0;
119 return;
120 }
121
122 print curr_time," updating Hyper Estraier:";
123
124 my $t = time();
125
126 my $offset = 0;
127 my $added = 0;
128
129 print " opening index $index_node_url";
130 if ($index_node_url) {
131 $hest_node ||= Search::Estraier::Node->new(
132 url => $index_node_url,
133 user => 'admin',
134 passwd => 'admin',
135 croak_on_error => 1,
136 );
137 print " via node URL";
138 } else {
139 die "don't know how to use Hyper Estraier Index $index_node_url";
140 }
141
142 my $results = 0;
143
144 do {
145
146 my $where = '';
147 my @data;
148 if (defined($host_id) && defined($share_id) && defined($num)) {
149 $where = qq{
150 WHERE
151 hosts.id = ? AND
152 shares.id = ? AND
153 files.backupnum = ?
154 };
155 @data = ( $host_id, $share_id, $num );
156 }
157
158 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
159
160 my $sth = $dbh->prepare(qq{
161 SELECT
162 files.id AS fid,
163 hosts.name AS hname,
164 shares.name AS sname,
165 -- shares.share AS sharename,
166 files.backupnum AS backupnum,
167 -- files.name AS filename,
168 files.path AS filepath,
169 files.date AS date,
170 files.type AS type,
171 files.size AS size,
172 files.shareid AS shareid,
173 backups.date AS backup_date
174 FROM files
175 INNER JOIN shares ON files.shareID=shares.ID
176 INNER JOIN hosts ON hosts.ID = shares.hostID
177 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
178 $where
179 $limit
180 });
181
182 $sth->execute(@data);
183 $results = $sth->rows;
184
185 if ($results == 0) {
186 print " - no new files\n";
187 return;
188 } else {
189 print " - $results files: ";
190 }
191
192 sub fmt_date {
193 my $t = shift || return;
194 my $iso = BackupPC::Lib::timeStamp($t);
195 $iso =~ s/\s/T/;
196 return $iso;
197 }
198
199 while (my $row = $sth->fetchrow_hashref()) {
200
201 my $fid = $row->{'fid'} || die "no fid?";
202 my $uri = 'file:///' . $fid;
203
204 unless ($skip_check) {
205 my $id = $hest_node->uri_to_id($uri);
206 next if ($id && $id == -1);
207 }
208
209 # create a document object
210 my $doc = Search::Estraier::Document->new;
211
212 # add attributes to the document object
213 $doc->add_attr('@uri', $uri);
214
215 foreach my $c (@{ $sth->{NAME} }) {
216 print STDERR "attr $c = $row->{$c}\n" if ($debug > 2);
217 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
218 }
219
220 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
221
222 # add the body text to the document object
223 my $path = $row->{'filepath'};
224 $doc->add_text($path);
225 $path =~ s/(.)/$1 /g;
226 $doc->add_hidden_text($path);
227
228 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
229
230 # register the document object to the database
231 if ($hest_node) {
232 $hest_node->put_doc($doc);
233 } else {
234 die "not supported";
235 }
236 $added++;
237 }
238
239 print " $added";
240
241 $offset += EST_CHUNK;
242
243 } while ($results == EST_CHUNK);
244
245 my $dur = (time() - $t) || 1;
246 printf(" [%.2f/s dur: %s]\n",
247 ( $added / $dur ),
248 fmt_time($dur)
249 );
250 }
251
252 #---- /subs ----
253
254
255 ## update index ##
256 if ( ( $opt{i} || $opt{j} ) && !$opt{c} ) {
257 # update all
258 print "force update of Hyper Estraier index ";
259 print "by -i flag" if ($opt{i});
260 print "by -j flag" if ($opt{j});
261 print "\n";
262 hest_update();
263 }
264
265 ## create tables ##
266 if ($opt{c}) {
267 sub do_index {
268 my $index = shift || return;
269 my ($table,$col,$unique) = split(/:/, $index);
270 $unique ||= '';
271 $index =~ s/\W+/_/g;
272 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
273 $dbh->do(qq{ create $unique index $index on $table($col) });
274 }
275
276 print "creating tables...\n";
277
278 $dbh->do( qq{
279 create table hosts (
280 ID SERIAL PRIMARY KEY,
281 name VARCHAR(30) NOT NULL,
282 IP VARCHAR(15)
283 );
284
285 create table shares (
286 ID SERIAL PRIMARY KEY,
287 hostID INTEGER NOT NULL references hosts(id),
288 name VARCHAR(30) NOT NULL,
289 share VARCHAR(200) NOT NULL
290 );
291
292 create table dvds (
293 ID SERIAL PRIMARY KEY,
294 num INTEGER NOT NULL,
295 name VARCHAR(255) NOT NULL,
296 mjesto VARCHAR(255)
297 );
298
299 create table backups (
300 id serial,
301 hostID INTEGER NOT NULL references hosts(id),
302 num INTEGER NOT NULL,
303 date integer NOT NULL,
304 type CHAR(4) not null,
305 shareID integer not null references shares(id),
306 size bigint not null,
307 inc_size bigint not null default -1,
308 inc_deleted boolean default false,
309 parts integer not null default 1,
310 PRIMARY KEY(id)
311 );
312
313 create table files (
314 ID SERIAL,
315 shareID INTEGER NOT NULL references shares(id),
316 backupNum INTEGER NOT NULL,
317 name VARCHAR(255) NOT NULL,
318 path VARCHAR(255) NOT NULL,
319 date integer NOT NULL,
320 type INTEGER NOT NULL,
321 size bigint NOT NULL,
322 primary key(id)
323 );
324
325 create table archive (
326 id serial,
327 dvd_nr int not null,
328 total_size bigint default -1,
329 note text,
330 username varchar(20) not null,
331 date timestamp default now(),
332 primary key(id)
333 );
334
335 create table archive_backup (
336 archive_id int not null references archive(id) on delete cascade,
337 backup_id int not null references backups(id),
338 primary key(archive_id, backup_id)
339 );
340
341 create table archive_burned (
342 archive_id int references archive(id),
343 date timestamp default now(),
344 part int not null default 1,
345 copy int not null default 1,
346 iso_size bigint default -1
347 );
348
349 create table backup_parts (
350 id serial,
351 backup_id int references backups(id),
352 part_nr int not null check (part_nr > 0),
353 tar_size bigint not null check (tar_size > 0),
354 size bigint not null check (size > 0),
355 md5 text not null,
356 items int not null check (items > 0),
357 date timestamp default now(),
358 primary key(id)
359 );
360 });
361
362 print "creating indexes: ";
363
364 foreach my $index (qw(
365 hosts:name
366 backups:hostID
367 backups:num
368 backups:shareID
369 shares:hostID
370 shares:name
371 files:shareID
372 files:path
373 files:name
374 files:date
375 files:size
376 archive:dvd_nr
377 archive_burned:archive_id
378 backup_parts:backup_id,part_nr
379 )) {
380 do_index($index);
381 }
382
383 print " creating sequence: ";
384 foreach my $seq (qw/dvd_nr/) {
385 print "$seq ";
386 $dbh->do( qq{ CREATE SEQUENCE $seq } );
387 }
388
389
390 print "...\n";
391
392 $dbh->commit;
393
394 }
395
396 ## delete data before inseting ##
397 if ($opt{d}) {
398 print "deleting ";
399 foreach my $table (qw(files dvds backups shares hosts)) {
400 print "$table ";
401 $dbh->do(qq{ DELETE FROM $table });
402 }
403 print " done...\n";
404
405 $dbh->commit;
406 }
407
408 ## insert new values ##
409
410 # get hosts
411 $hosts = $bpc->HostInfoRead();
412 my $hostID;
413 my $shareID;
414
415 my $sth;
416
417 $sth->{insert_hosts} = $dbh->prepare(qq{
418 INSERT INTO hosts (name, IP) VALUES (?,?)
419 });
420
421 $sth->{hosts_by_name} = $dbh->prepare(qq{
422 SELECT ID FROM hosts WHERE name=?
423 });
424
425 $sth->{backups_count} = $dbh->prepare(qq{
426 SELECT COUNT(*)
427 FROM backups
428 WHERE hostID=? AND num=? AND shareid=?
429 });
430
431 $sth->{insert_backups} = $dbh->prepare(qq{
432 INSERT INTO backups (hostID, num, date, type, shareid, size)
433 VALUES (?,?,?,?,?,-1)
434 });
435
436 $sth->{update_backups_size} = $dbh->prepare(qq{
437 UPDATE backups SET size = ?
438 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
439 });
440
441 $sth->{insert_files} = $dbh->prepare(qq{
442 INSERT INTO files
443 (shareID, backupNum, name, path, date, type, size)
444 VALUES (?,?,?,?,?,?,?)
445 });
446
447 my @hosts = keys %{$hosts};
448 my $host_nr = 0;
449
450 foreach my $host_key (@hosts) {
451
452 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
453
454 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
455
456 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
457 $sth->{insert_hosts}->execute(
458 $hosts->{$host_key}->{'host'},
459 $hosts->{$host_key}->{'ip'}
460 );
461
462 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
463 }
464
465 $host_nr++;
466 # get backups for a host
467 my @backups = $bpc->BackupInfoRead($hostname);
468 my $incs = scalar @backups;
469
470 my $host_header = sprintf("host %s [%d/%d]: %d increments\n",
471 $hosts->{$host_key}->{'host'},
472 $host_nr,
473 ($#hosts + 1),
474 $incs
475 );
476 print $host_header unless ($opt{q});
477
478 my $inc_nr = 0;
479 $beenThere = {};
480
481 foreach my $backup (@backups) {
482
483 $inc_nr++;
484 last if ($opt{m} && $inc_nr > $opt{m});
485
486 my $backupNum = $backup->{'num'};
487 my @backupShares = ();
488
489 my $share_header = sprintf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
490 $hosts->{$host_key}->{'host'},
491 $inc_nr, $incs, $backupNum,
492 $backup->{type} || '?',
493 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
494 strftime($t_fmt,localtime($backup->{startTime})),
495 fmt_time($backup->{endTime} - $backup->{startTime})
496 );
497 print $share_header unless ($opt{q});
498
499 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
500 foreach my $share ($files->shareList($backupNum)) {
501
502 my $t = time();
503
504 $shareID = getShareID($share, $hostID, $hostname);
505
506 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
507 my ($count) = $sth->{backups_count}->fetchrow_array();
508 # skip if allready in database!
509 next if ($count > 0);
510
511 # dump host and share header for -q
512 if ($opt{q}) {
513 if ($host_header) {
514 print $host_header;
515 $host_header = undef;
516 }
517 print $share_header;
518 }
519
520 # dump some log
521 print curr_time," ", $share;
522
523 $sth->{insert_backups}->execute(
524 $hostID,
525 $backupNum,
526 $backup->{'endTime'},
527 substr($backup->{'type'},0,4),
528 $shareID,
529 );
530
531 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
532
533 eval {
534 $sth->{update_backups_size}->execute(
535 $size,
536 $hostID,
537 $backupNum,
538 $backup->{'endTime'},
539 substr($backup->{'type'},0,4),
540 $shareID,
541 );
542 print " commit";
543 $dbh->commit();
544 };
545 if ($@) {
546 print " rollback";
547 $dbh->rollback();
548 }
549
550 my $dur = (time() - $t) || 1;
551 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
552 $nf, $f, $nd, $d,
553 ($size / 1024 / 1024),
554 ( ($f+$d) / $dur ),
555 fmt_time($dur)
556 );
557
558 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
559 }
560
561 }
562 }
563 undef $sth;
564 $dbh->commit();
565 $dbh->disconnect();
566
567 print "total duration: ",fmt_time(time() - $start_t),"\n";
568
569 $pidfile->remove;
570
571 sub getShareID() {
572
573 my ($share, $hostID, $hostname) = @_;
574
575 $sth->{share_id} ||= $dbh->prepare(qq{
576 SELECT ID FROM shares WHERE hostID=? AND name=?
577 });
578
579 $sth->{share_id}->execute($hostID,$share);
580
581 my ($id) = $sth->{share_id}->fetchrow_array();
582
583 return $id if (defined($id));
584
585 $sth->{insert_share} ||= $dbh->prepare(qq{
586 INSERT INTO shares
587 (hostID,name,share)
588 VALUES (?,?,?)
589 });
590
591 my $drop_down = $hostname . '/' . $share;
592 $drop_down =~ s#//+#/#g;
593
594 $sth->{insert_share}->execute($hostID,$share, $drop_down);
595 return $dbh->last_insert_id(undef,undef,'shares',undef);
596 }
597
598 sub found_in_db {
599
600 my @data = @_;
601 shift @data;
602
603 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
604
605 return $beenThere->{$key} if (defined($beenThere->{$key}));
606
607 $sth->{file_in_db} ||= $dbh->prepare(qq{
608 SELECT 1 FROM files
609 WHERE shareID = ? and
610 path = ? and
611 size = ? and
612 ( date = ? or date = ? or date = ? )
613 LIMIT 1
614 });
615
616 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
617 $sth->{file_in_db}->execute(@param);
618 my $rows = $sth->{file_in_db}->rows;
619 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
620
621 $beenThere->{$key}++;
622
623 $sth->{'insert_files'}->execute(@data) unless ($rows);
624 return $rows;
625 }
626
627 ####################################################
628 # recursing through filesystem structure and #
629 # and returning flattened files list #
630 ####################################################
631 sub recurseDir($$$$$$$$) {
632
633 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
634
635 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
636
637 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
638
639 { # scope
640 my @stack;
641
642 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
643 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
644
645 # first, add all the entries in current directory
646 foreach my $path_key (keys %{$filesInBackup}) {
647 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
648 my @data = (
649 $shareID,
650 $backupNum,
651 $path_key,
652 $filesInBackup->{$path_key}->{'relPath'},
653 $filesInBackup->{$path_key}->{'mtime'},
654 $filesInBackup->{$path_key}->{'type'},
655 $filesInBackup->{$path_key}->{'size'}
656 );
657
658 my $key = join(" ", (
659 $shareID,
660 $dir,
661 $path_key,
662 $filesInBackup->{$path_key}->{'mtime'},
663 $filesInBackup->{$path_key}->{'size'}
664 ));
665
666 my $key_dst_prev = join(" ", (
667 $shareID,
668 $dir,
669 $path_key,
670 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
671 $filesInBackup->{$path_key}->{'size'}
672 ));
673
674 my $key_dst_next = join(" ", (
675 $shareID,
676 $dir,
677 $path_key,
678 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
679 $filesInBackup->{$path_key}->{'size'}
680 ));
681
682 my $found;
683 if (
684 ! defined($beenThere->{$key}) &&
685 ! defined($beenThere->{$key_dst_prev}) &&
686 ! defined($beenThere->{$key_dst_next}) &&
687 ! ($found = found_in_db($key, @data))
688 ) {
689 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
690
691 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
692 $new_dirs++ unless ($found);
693 print STDERR " dir\n" if ($debug >= 2);
694 } else {
695 $new_files++ unless ($found);
696 print STDERR " file\n" if ($debug >= 2);
697 }
698 $size += $filesInBackup->{$path_key}->{'size'} || 0;
699 }
700
701 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
702 $nr_dirs++;
703
704 my $full_path = $dir . '/' . $path_key;
705 push @stack, $full_path;
706 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
707
708 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
709 #
710 # $nr_files += $f;
711 # $new_files += $nf;
712 # $nr_dirs += $d;
713 # $new_dirs += $nd;
714
715 } else {
716 $nr_files++;
717 }
718 }
719
720 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
721
722 while ( my $dir = shift @stack ) {
723 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
724 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
725 $nr_files += $f;
726 $new_files += $nf;
727 $nr_dirs += $d;
728 $new_dirs += $nd;
729 $size += $s;
730 }
731 }
732
733 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
734 }
735

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26