/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 250 - (show annotations)
Sun Dec 11 14:27:45 2005 UTC (18 years, 5 months ago) by dpavlin
File size: 17577 byte(s)
create pid file from full path (to enable multiple instances to run in
parallel), update backup in eval, and rollback if needed (for partial
backups mostly)

1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
15 use Cwd qw/abs_path/;
16
17 use constant BPC_FTYPE_DIR => 5;
18 use constant EST_CHUNK => 100000;
19
20 # daylight saving time change offset for 1h
21 my $dst_offset = 60 * 60;
22
23 my $debug = 0;
24 $|=1;
25
26 my $start_t = time();
27
28 my $pid_path = abs_path($0);
29 $pid_path =~ s/\W+/_/g;
30
31 my $pidfile = new File::Pid({
32 file => "/tmp/$pid_path",
33 });
34
35 if (my $pid = $pidfile->running ) {
36 die "$0 already running: $pid\n";
37 } elsif ($pidfile->pid ne $$) {
38 $pidfile->remove;
39 $pidfile = new File::Pid;
40 }
41 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
42 $pidfile->write;
43
44 my $t_fmt = '%Y-%m-%d %H:%M:%S';
45
46 my $hosts;
47 my $bpc = BackupPC::Lib->new || die;
48 my %Conf = $bpc->Conf();
49 my $TopDir = $bpc->TopDir();
50 my $beenThere = {};
51
52 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
53 my $user = $Conf{SearchUser} || '';
54
55 my $use_hest = $Conf{HyperEstraierIndex};
56 my ($index_path, $index_node_url) = BackupPC::SearchLib::getHyperEstraier_url($use_hest);
57
58 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
59
60 my %opt;
61
62 if ( !getopts("cdm:v:ijf", \%opt ) ) {
63 print STDERR <<EOF;
64 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
65
66 Options:
67 -c create database on first use
68 -d delete database before import
69 -m num import just num increments for one host
70 -v num set verbosity (debug) level (default $debug)
71 -i update Hyper Estraier full text index
72 -j update full text, don't check existing files
73 -f don't do anything with full text index
74
75 Option -j is variation on -i. It will allow faster initial creation
76 of full-text index from existing database.
77
78 Option -f will create database which is out of sync with full text index. You
79 will have to re-run $0 with -i to fix it.
80
81 EOF
82 exit 1;
83 }
84
85 if ($opt{v}) {
86 print "Debug level at $opt{v}\n";
87 $debug = $opt{v};
88 } elsif ($opt{f}) {
89 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
90 ($use_hest, $index_path, $index_node_url) = (undef, undef, undef);
91 }
92
93 #---- subs ----
94
95 sub fmt_time {
96 my $t = shift || return;
97 my $out = "";
98 my ($ss,$mm,$hh) = gmtime($t);
99 $out .= "${hh}h" if ($hh);
100 $out .= sprintf("%02d:%02d", $mm,$ss);
101 return $out;
102 }
103
104 sub curr_time {
105 return strftime($t_fmt,localtime());
106 }
107
108 my $hest_db;
109 my $hest_node;
110
111 sub signal {
112 my($sig) = @_;
113 if ($hest_db) {
114 print "\nCaught a SIG$sig--syncing database and shutting down\n";
115 $hest_db->sync();
116 $hest_db->close();
117 }
118 exit(0);
119 }
120
121 $SIG{'INT'} = \&signal;
122 $SIG{'QUIT'} = \&signal;
123
124 sub hest_update {
125
126 my ($host_id, $share_id, $num) = @_;
127
128 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
129
130 unless (defined($use_hest)) {
131 print STDERR "HyperEstraier support not enabled in configuration\n";
132 $use_hest = 0;
133 return;
134 }
135
136 return unless($use_hest);
137
138 print curr_time," updating HyperEstraier:";
139
140 my $t = time();
141
142 my $offset = 0;
143 my $added = 0;
144
145 print " opening index $use_hest";
146 if ($index_path) {
147 $hest_db = HyperEstraier::Database->new();
148 $hest_db->open($TopDir . $index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
149 print " directly";
150 } elsif ($index_node_url) {
151 $hest_node ||= HyperEstraier::Node->new($index_node_url);
152 $hest_node->set_auth('admin', 'admin');
153 print " via node URL";
154 } else {
155 die "don't know how to use HyperEstraier Index $use_hest";
156 }
157 print " increment is " . EST_CHUNK . " files:";
158
159 my $results = 0;
160
161 do {
162
163 my $where = '';
164 my @data;
165 if (defined($host_id) && defined($share_id) && defined($num)) {
166 $where = qq{
167 WHERE
168 hosts.id = ? AND
169 shares.id = ? AND
170 files.backupnum = ?
171 };
172 @data = ( $host_id, $share_id, $num );
173 }
174
175 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
176
177 my $sth = $dbh->prepare(qq{
178 SELECT
179 files.id AS fid,
180 hosts.name AS hname,
181 shares.name AS sname,
182 -- shares.share AS sharename,
183 files.backupnum AS backupnum,
184 -- files.name AS filename,
185 files.path AS filepath,
186 files.date AS date,
187 files.type AS type,
188 files.size AS size,
189 files.shareid AS shareid,
190 backups.date AS backup_date
191 FROM files
192 INNER JOIN shares ON files.shareID=shares.ID
193 INNER JOIN hosts ON hosts.ID = shares.hostID
194 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
195 $where
196 $limit
197 });
198
199 $sth->execute(@data);
200 $results = $sth->rows;
201
202 if ($results == 0) {
203 print " - no new files\n";
204 last;
205 }
206
207 sub fmt_date {
208 my $t = shift || return;
209 my $iso = BackupPC::Lib::timeStamp($t);
210 $iso =~ s/\s/T/;
211 return $iso;
212 }
213
214 while (my $row = $sth->fetchrow_hashref()) {
215
216 my $fid = $row->{'fid'} || die "no fid?";
217 my $uri = 'file:///' . $fid;
218
219 unless ($skip_check) {
220 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
221 next unless ($id == -1);
222 }
223
224 # create a document object
225 my $doc = HyperEstraier::Document->new;
226
227 # add attributes to the document object
228 $doc->add_attr('@uri', $uri);
229
230 foreach my $c (@{ $sth->{NAME} }) {
231 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
232 }
233
234 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
235
236 # add the body text to the document object
237 my $path = $row->{'filepath'};
238 $doc->add_text($path);
239 $path =~ s/(.)/$1 /g;
240 $doc->add_hidden_text($path);
241
242 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
243
244 # register the document object to the database
245 if ($hest_db) {
246 $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
247 } elsif ($hest_node) {
248 $hest_node->put_doc($doc);
249 } else {
250 die "not supported";
251 }
252 $added++;
253 }
254
255 print " $added";
256 $hest_db->sync() if ($index_path);
257
258 $offset += EST_CHUNK;
259
260 } while ($results == EST_CHUNK);
261
262 if ($index_path) {
263 print ", close";
264 $hest_db->close();
265 }
266
267 my $dur = (time() - $t) || 1;
268 printf(" [%.2f/s dur: %s]\n",
269 ( $added / $dur ),
270 fmt_time($dur)
271 );
272 }
273
274 #---- /subs ----
275
276
277 ## update index ##
278 if (($opt{i} || $opt{j} || ($index_path && ! -e $TopDir . $index_path)) && !$opt{c}) {
279 # update all
280 print "force update of HyperEstraier index ";
281 print "importing existing data" unless (-e $TopDir . $index_path);
282 print "by -i flag" if ($opt{i});
283 print "by -j flag" if ($opt{j});
284 print "\n";
285 hest_update();
286 }
287
288 ## create tables ##
289 if ($opt{c}) {
290 sub do_index {
291 my $index = shift || return;
292 my ($table,$col,$unique) = split(/:/, $index);
293 $unique ||= '';
294 $index =~ s/\W+/_/g;
295 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
296 $dbh->do(qq{ create $unique index $index on $table($col) });
297 }
298
299 print "creating tables...\n";
300
301 $dbh->do( qq{
302 create table hosts (
303 ID SERIAL PRIMARY KEY,
304 name VARCHAR(30) NOT NULL,
305 IP VARCHAR(15)
306 );
307
308 create table shares (
309 ID SERIAL PRIMARY KEY,
310 hostID INTEGER NOT NULL references hosts(id),
311 name VARCHAR(30) NOT NULL,
312 share VARCHAR(200) NOT NULL
313 );
314
315 create table dvds (
316 ID SERIAL PRIMARY KEY,
317 num INTEGER NOT NULL,
318 name VARCHAR(255) NOT NULL,
319 mjesto VARCHAR(255)
320 );
321
322 create table backups (
323 id serial,
324 hostID INTEGER NOT NULL references hosts(id),
325 num INTEGER NOT NULL,
326 date integer NOT NULL,
327 type CHAR(4) not null,
328 shareID integer not null references shares(id),
329 size bigint not null,
330 inc_size bigint not null default -1,
331 inc_deleted boolean default false,
332 parts integer not null default 1,
333 PRIMARY KEY(id)
334 );
335
336 create table files (
337 ID SERIAL,
338 shareID INTEGER NOT NULL references shares(id),
339 backupNum INTEGER NOT NULL,
340 name VARCHAR(255) NOT NULL,
341 path VARCHAR(255) NOT NULL,
342 date integer NOT NULL,
343 type INTEGER NOT NULL,
344 size bigint NOT NULL,
345 primary key(id)
346 );
347
348 create table archive (
349 id serial,
350 dvd_nr int not null,
351 total_size bigint default -1,
352 note text,
353 username varchar(20) not null,
354 date timestamp default now(),
355 primary key(id)
356 );
357
358 create table archive_backup (
359 archive_id int not null references archive(id) on delete cascade,
360 backup_id int not null references backups(id),
361 primary key(archive_id, backup_id)
362 );
363
364 create table archive_burned (
365 archive_id int references archive(id),
366 date timestamp default now(),
367 part int not null default 1,
368 copy int not null default 1,
369 iso_size bigint default -1
370 );
371
372 create table backup_parts (
373 id serial,
374 backup_id int references backups(id),
375 part_nr int not null check (part_nr > 0),
376 tar_size bigint not null check (tar_size > 0),
377 size bigint not null check (size > 0),
378 md5 text not null,
379 items int not null check (items > 0),
380 date timestamp default now(),
381 primary key(id)
382 );
383 });
384
385 print "creating indexes: ";
386
387 foreach my $index (qw(
388 hosts:name
389 backups:hostID
390 backups:num
391 backups:shareID
392 shares:hostID
393 shares:name
394 files:shareID
395 files:path
396 files:name
397 files:date
398 files:size
399 archive:dvd_nr
400 archive_burned:archive_id
401 backup_parts:backup_id,part_nr
402 )) {
403 do_index($index);
404 }
405
406 print " creating sequence: ";
407 foreach my $seq (qw/dvd_nr/) {
408 print "$seq ";
409 $dbh->do( qq{ CREATE SEQUENCE $seq } );
410 }
411
412
413 print "...\n";
414
415 $dbh->commit;
416
417 }
418
419 ## delete data before inseting ##
420 if ($opt{d}) {
421 print "deleting ";
422 foreach my $table (qw(files dvds backups shares hosts)) {
423 print "$table ";
424 $dbh->do(qq{ DELETE FROM $table });
425 }
426 print " done...\n";
427
428 $dbh->commit;
429 }
430
431 ## insert new values ##
432
433 # get hosts
434 $hosts = $bpc->HostInfoRead();
435 my $hostID;
436 my $shareID;
437
438 my $sth;
439
440 $sth->{insert_hosts} = $dbh->prepare(qq{
441 INSERT INTO hosts (name, IP) VALUES (?,?)
442 });
443
444 $sth->{hosts_by_name} = $dbh->prepare(qq{
445 SELECT ID FROM hosts WHERE name=?
446 });
447
448 $sth->{backups_count} = $dbh->prepare(qq{
449 SELECT COUNT(*)
450 FROM backups
451 WHERE hostID=? AND num=? AND shareid=?
452 });
453
454 $sth->{insert_backups} = $dbh->prepare(qq{
455 INSERT INTO backups (hostID, num, date, type, shareid, size)
456 VALUES (?,?,?,?,?,-1)
457 });
458
459 $sth->{update_backups_size} = $dbh->prepare(qq{
460 UPDATE backups SET size = ?
461 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
462 });
463
464 $sth->{insert_files} = $dbh->prepare(qq{
465 INSERT INTO files
466 (shareID, backupNum, name, path, date, type, size)
467 VALUES (?,?,?,?,?,?,?)
468 });
469
470 my @hosts = keys %{$hosts};
471 my $host_nr = 0;
472
473 foreach my $host_key (@hosts) {
474
475 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
476
477 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
478
479 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
480 $sth->{insert_hosts}->execute(
481 $hosts->{$host_key}->{'host'},
482 $hosts->{$host_key}->{'ip'}
483 );
484
485 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
486 }
487
488 $host_nr++;
489 print "host ", $hosts->{$host_key}->{'host'}, " [",
490 $host_nr, "/", ($#hosts + 1), "]: ";
491
492 # get backups for a host
493 my @backups = $bpc->BackupInfoRead($hostname);
494 my $incs = scalar @backups;
495 print "$incs increments\n";
496
497 my $inc_nr = 0;
498 $beenThere = {};
499
500 foreach my $backup (@backups) {
501
502 $inc_nr++;
503 last if ($opt{m} && $inc_nr > $opt{m});
504
505 my $backupNum = $backup->{'num'};
506 my @backupShares = ();
507
508 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
509 $hosts->{$host_key}->{'host'},
510 $inc_nr, $incs, $backupNum,
511 $backup->{type} || '?',
512 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
513 strftime($t_fmt,localtime($backup->{startTime})),
514 fmt_time($backup->{endTime} - $backup->{startTime})
515 );
516
517 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
518 foreach my $share ($files->shareList($backupNum)) {
519
520 my $t = time();
521
522 $shareID = getShareID($share, $hostID, $hostname);
523
524 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
525 my ($count) = $sth->{backups_count}->fetchrow_array();
526 # skip if allready in database!
527 next if ($count > 0);
528
529 # dump some log
530 print curr_time," ", $share;
531
532 $sth->{insert_backups}->execute(
533 $hostID,
534 $backupNum,
535 $backup->{'endTime'},
536 substr($backup->{'type'},0,4),
537 $shareID,
538 );
539
540 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
541
542 eval {
543 $sth->{update_backups_size}->execute(
544 $size,
545 $hostID,
546 $backupNum,
547 $backup->{'endTime'},
548 substr($backup->{'type'},0,4),
549 $shareID,
550 );
551 print " commit";
552 $dbh->commit();
553 };
554 if ($@) {
555 print " rollback";
556 $dbh->rollback();
557 }
558
559 my $dur = (time() - $t) || 1;
560 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
561 $nf, $f, $nd, $d,
562 ($size / 1024 / 1024),
563 ( ($f+$d) / $dur ),
564 fmt_time($dur)
565 );
566
567 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
568 }
569
570 }
571 }
572 undef $sth;
573 $dbh->commit();
574 $dbh->disconnect();
575
576 print "total duration: ",fmt_time(time() - $start_t),"\n";
577
578 $pidfile->remove;
579
580 sub getShareID() {
581
582 my ($share, $hostID, $hostname) = @_;
583
584 $sth->{share_id} ||= $dbh->prepare(qq{
585 SELECT ID FROM shares WHERE hostID=? AND name=?
586 });
587
588 $sth->{share_id}->execute($hostID,$share);
589
590 my ($id) = $sth->{share_id}->fetchrow_array();
591
592 return $id if (defined($id));
593
594 $sth->{insert_share} ||= $dbh->prepare(qq{
595 INSERT INTO shares
596 (hostID,name,share)
597 VALUES (?,?,?)
598 });
599
600 my $drop_down = $hostname . '/' . $share;
601 $drop_down =~ s#//+#/#g;
602
603 $sth->{insert_share}->execute($hostID,$share, $drop_down);
604 return $dbh->last_insert_id(undef,undef,'shares',undef);
605 }
606
607 sub found_in_db {
608
609 my @data = @_;
610 shift @data;
611
612 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
613
614 return $beenThere->{$key} if (defined($beenThere->{$key}));
615
616 $sth->{file_in_db} ||= $dbh->prepare(qq{
617 SELECT 1 FROM files
618 WHERE shareID = ? and
619 path = ? and
620 size = ? and
621 ( date = ? or date = ? or date = ? )
622 LIMIT 1
623 });
624
625 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
626 $sth->{file_in_db}->execute(@param);
627 my $rows = $sth->{file_in_db}->rows;
628 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
629
630 $beenThere->{$key}++;
631
632 $sth->{'insert_files'}->execute(@data) unless ($rows);
633 return $rows;
634 }
635
636 ####################################################
637 # recursing through filesystem structure and #
638 # and returning flattened files list #
639 ####################################################
640 sub recurseDir($$$$$$$$) {
641
642 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
643
644 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
645
646 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
647
648 { # scope
649 my @stack;
650
651 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
652 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
653
654 # first, add all the entries in current directory
655 foreach my $path_key (keys %{$filesInBackup}) {
656 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
657 my @data = (
658 $shareID,
659 $backupNum,
660 $path_key,
661 $filesInBackup->{$path_key}->{'relPath'},
662 $filesInBackup->{$path_key}->{'mtime'},
663 $filesInBackup->{$path_key}->{'type'},
664 $filesInBackup->{$path_key}->{'size'}
665 );
666
667 my $key = join(" ", (
668 $shareID,
669 $dir,
670 $path_key,
671 $filesInBackup->{$path_key}->{'mtime'},
672 $filesInBackup->{$path_key}->{'size'}
673 ));
674
675 my $key_dst_prev = join(" ", (
676 $shareID,
677 $dir,
678 $path_key,
679 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
680 $filesInBackup->{$path_key}->{'size'}
681 ));
682
683 my $key_dst_next = join(" ", (
684 $shareID,
685 $dir,
686 $path_key,
687 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
688 $filesInBackup->{$path_key}->{'size'}
689 ));
690
691 my $found;
692 if (
693 ! defined($beenThere->{$key}) &&
694 ! defined($beenThere->{$key_dst_prev}) &&
695 ! defined($beenThere->{$key_dst_next}) &&
696 ! ($found = found_in_db($key, @data))
697 ) {
698 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
699
700 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
701 $new_dirs++ unless ($found);
702 print STDERR " dir\n" if ($debug >= 2);
703 } else {
704 $new_files++ unless ($found);
705 print STDERR " file\n" if ($debug >= 2);
706 }
707 $size += $filesInBackup->{$path_key}->{'size'} || 0;
708 }
709
710 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
711 $nr_dirs++;
712
713 my $full_path = $dir . '/' . $path_key;
714 push @stack, $full_path;
715 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
716
717 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
718 #
719 # $nr_files += $f;
720 # $new_files += $nf;
721 # $nr_dirs += $d;
722 # $new_dirs += $nd;
723
724 } else {
725 $nr_files++;
726 }
727 }
728
729 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
730
731 while ( my $dir = shift @stack ) {
732 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
733 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
734 $nr_files += $f;
735 $new_files += $nf;
736 $nr_dirs += $d;
737 $new_dirs += $nd;
738 $size += $s;
739 }
740 }
741
742 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
743 }
744

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26