/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 303 - (show annotations)
Sat Jan 28 16:45:46 2006 UTC (18 years, 3 months ago) by dpavlin
File size: 16946 byte(s)
switch to Search::Estraier. Needs more testing for sure!
1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
15 use Cwd qw/abs_path/;
16
17 use constant BPC_FTYPE_DIR => 5;
18 use constant EST_CHUNK => 100000;
19
20 # daylight saving time change offset for 1h
21 my $dst_offset = 60 * 60;
22
23 my $debug = 0;
24 $|=1;
25
26 my $start_t = time();
27
28 my $pid_path = abs_path($0);
29 $pid_path =~ s/\W+/_/g;
30
31 my $pidfile = new File::Pid({
32 file => "/tmp/$pid_path",
33 });
34
35 if (my $pid = $pidfile->running ) {
36 die "$0 already running: $pid\n";
37 } elsif ($pidfile->pid ne $$) {
38 $pidfile->remove;
39 $pidfile = new File::Pid;
40 }
41 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
42 $pidfile->write;
43
44 my $t_fmt = '%Y-%m-%d %H:%M:%S';
45
46 my $hosts;
47 my $bpc = BackupPC::Lib->new || die;
48 my %Conf = $bpc->Conf();
49 my $TopDir = $bpc->TopDir();
50 my $beenThere = {};
51
52 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
53 my $user = $Conf{SearchUser} || '';
54
55 my $index_node_url = $Conf{HyperEstraierIndex};
56
57 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
58
59 my %opt;
60
61 if ( !getopts("cdm:v:ijf", \%opt ) ) {
62 print STDERR <<EOF;
63 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
64
65 Options:
66 -c create database on first use
67 -d delete database before import
68 -m num import just num increments for one host
69 -v num set verbosity (debug) level (default $debug)
70 -i update Hyper Estraier full text index
71 -j update full text, don't check existing files
72 -f don't do anything with full text index
73
74 Option -j is variation on -i. It will allow faster initial creation
75 of full-text index from existing database.
76
77 Option -f will create database which is out of sync with full text index. You
78 will have to re-run $0 with -i to fix it.
79
80 EOF
81 exit 1;
82 }
83
84 if ($opt{v}) {
85 print "Debug level at $opt{v}\n";
86 $debug = $opt{v};
87 } elsif ($opt{f}) {
88 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
89 $index_node_url = undef;
90 }
91
92 #---- subs ----
93
94 sub fmt_time {
95 my $t = shift || return;
96 my $out = "";
97 my ($ss,$mm,$hh) = gmtime($t);
98 $out .= "${hh}h" if ($hh);
99 $out .= sprintf("%02d:%02d", $mm,$ss);
100 return $out;
101 }
102
103 sub curr_time {
104 return strftime($t_fmt,localtime());
105 }
106
107 my $hest_db;
108 my $hest_node;
109
110 sub signal {
111 my($sig) = @_;
112 if ($hest_db) {
113 print "\nCaught a SIG$sig--syncing database and shutting down\n";
114 $hest_db->sync();
115 $hest_db->close();
116 }
117 exit(0);
118 }
119
120 $SIG{'INT'} = \&signal;
121 $SIG{'QUIT'} = \&signal;
122
123 sub hest_update {
124
125 my ($host_id, $share_id, $num) = @_;
126
127 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
128
129 unless (defined($index_node_url)) {
130 print STDERR "HyperEstraier support not enabled in configuration\n";
131 $index_node_url = 0;
132 return;
133 }
134
135 print curr_time," updating Hyper Estraier:";
136
137 my $t = time();
138
139 my $offset = 0;
140 my $added = 0;
141
142 print " opening index $index_node_url";
143 if ($index_node_url) {
144 $hest_node ||= Search::Estraier::Node->new($index_node_url);
145 $hest_node->set_auth('admin', 'admin');
146 print " via node URL";
147 } else {
148 die "don't know how to use Hyper Estraier Index $index_node_url";
149 }
150
151 my $results = 0;
152
153 do {
154
155 my $where = '';
156 my @data;
157 if (defined($host_id) && defined($share_id) && defined($num)) {
158 $where = qq{
159 WHERE
160 hosts.id = ? AND
161 shares.id = ? AND
162 files.backupnum = ?
163 };
164 @data = ( $host_id, $share_id, $num );
165 }
166
167 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
168
169 my $sth = $dbh->prepare(qq{
170 SELECT
171 files.id AS fid,
172 hosts.name AS hname,
173 shares.name AS sname,
174 -- shares.share AS sharename,
175 files.backupnum AS backupnum,
176 -- files.name AS filename,
177 files.path AS filepath,
178 files.date AS date,
179 files.type AS type,
180 files.size AS size,
181 files.shareid AS shareid,
182 backups.date AS backup_date
183 FROM files
184 INNER JOIN shares ON files.shareID=shares.ID
185 INNER JOIN hosts ON hosts.ID = shares.hostID
186 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
187 $where
188 $limit
189 });
190
191 $sth->execute(@data);
192 $results = $sth->rows;
193
194 if ($results == 0) {
195 print " - no new files\n";
196 last;
197 } else {
198 print " - $results files: ";
199 }
200
201 sub fmt_date {
202 my $t = shift || return;
203 my $iso = BackupPC::Lib::timeStamp($t);
204 $iso =~ s/\s/T/;
205 return $iso;
206 }
207
208 while (my $row = $sth->fetchrow_hashref()) {
209
210 my $fid = $row->{'fid'} || die "no fid?";
211 my $uri = 'file:///' . $fid;
212
213 unless ($skip_check) {
214 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
215 next unless ($id == -1);
216 }
217
218 # create a document object
219 my $doc = Search::Estraier::Document->new;
220
221 # add attributes to the document object
222 $doc->add_attr('@uri', $uri);
223
224 foreach my $c (@{ $sth->{NAME} }) {
225 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
226 }
227
228 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
229
230 # add the body text to the document object
231 my $path = $row->{'filepath'};
232 $doc->add_text($path);
233 $path =~ s/(.)/$1 /g;
234 $doc->add_hidden_text($path);
235
236 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
237
238 # register the document object to the database
239 if ($hest_node) {
240 $hest_node->put_doc($doc);
241 } else {
242 die "not supported";
243 }
244 $added++;
245 }
246
247 print " $added";
248
249 $offset += EST_CHUNK;
250
251 } while ($results == EST_CHUNK);
252
253 my $dur = (time() - $t) || 1;
254 printf(" [%.2f/s dur: %s]\n",
255 ( $added / $dur ),
256 fmt_time($dur)
257 );
258 }
259
260 #---- /subs ----
261
262
263 ## update index ##
264 if ( ( $opt{i} || $opt{j} ) && !$opt{c} ) {
265 # update all
266 print "force update of Hyper Estraier index ";
267 print "by -i flag" if ($opt{i});
268 print "by -j flag" if ($opt{j});
269 print "\n";
270 hest_update();
271 }
272
273 ## create tables ##
274 if ($opt{c}) {
275 sub do_index {
276 my $index = shift || return;
277 my ($table,$col,$unique) = split(/:/, $index);
278 $unique ||= '';
279 $index =~ s/\W+/_/g;
280 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
281 $dbh->do(qq{ create $unique index $index on $table($col) });
282 }
283
284 print "creating tables...\n";
285
286 $dbh->do( qq{
287 create table hosts (
288 ID SERIAL PRIMARY KEY,
289 name VARCHAR(30) NOT NULL,
290 IP VARCHAR(15)
291 );
292
293 create table shares (
294 ID SERIAL PRIMARY KEY,
295 hostID INTEGER NOT NULL references hosts(id),
296 name VARCHAR(30) NOT NULL,
297 share VARCHAR(200) NOT NULL
298 );
299
300 create table dvds (
301 ID SERIAL PRIMARY KEY,
302 num INTEGER NOT NULL,
303 name VARCHAR(255) NOT NULL,
304 mjesto VARCHAR(255)
305 );
306
307 create table backups (
308 id serial,
309 hostID INTEGER NOT NULL references hosts(id),
310 num INTEGER NOT NULL,
311 date integer NOT NULL,
312 type CHAR(4) not null,
313 shareID integer not null references shares(id),
314 size bigint not null,
315 inc_size bigint not null default -1,
316 inc_deleted boolean default false,
317 parts integer not null default 1,
318 PRIMARY KEY(id)
319 );
320
321 create table files (
322 ID SERIAL,
323 shareID INTEGER NOT NULL references shares(id),
324 backupNum INTEGER NOT NULL,
325 name VARCHAR(255) NOT NULL,
326 path VARCHAR(255) NOT NULL,
327 date integer NOT NULL,
328 type INTEGER NOT NULL,
329 size bigint NOT NULL,
330 primary key(id)
331 );
332
333 create table archive (
334 id serial,
335 dvd_nr int not null,
336 total_size bigint default -1,
337 note text,
338 username varchar(20) not null,
339 date timestamp default now(),
340 primary key(id)
341 );
342
343 create table archive_backup (
344 archive_id int not null references archive(id) on delete cascade,
345 backup_id int not null references backups(id),
346 primary key(archive_id, backup_id)
347 );
348
349 create table archive_burned (
350 archive_id int references archive(id),
351 date timestamp default now(),
352 part int not null default 1,
353 copy int not null default 1,
354 iso_size bigint default -1
355 );
356
357 create table backup_parts (
358 id serial,
359 backup_id int references backups(id),
360 part_nr int not null check (part_nr > 0),
361 tar_size bigint not null check (tar_size > 0),
362 size bigint not null check (size > 0),
363 md5 text not null,
364 items int not null check (items > 0),
365 date timestamp default now(),
366 primary key(id)
367 );
368 });
369
370 print "creating indexes: ";
371
372 foreach my $index (qw(
373 hosts:name
374 backups:hostID
375 backups:num
376 backups:shareID
377 shares:hostID
378 shares:name
379 files:shareID
380 files:path
381 files:name
382 files:date
383 files:size
384 archive:dvd_nr
385 archive_burned:archive_id
386 backup_parts:backup_id,part_nr
387 )) {
388 do_index($index);
389 }
390
391 print " creating sequence: ";
392 foreach my $seq (qw/dvd_nr/) {
393 print "$seq ";
394 $dbh->do( qq{ CREATE SEQUENCE $seq } );
395 }
396
397
398 print "...\n";
399
400 $dbh->commit;
401
402 }
403
404 ## delete data before inseting ##
405 if ($opt{d}) {
406 print "deleting ";
407 foreach my $table (qw(files dvds backups shares hosts)) {
408 print "$table ";
409 $dbh->do(qq{ DELETE FROM $table });
410 }
411 print " done...\n";
412
413 $dbh->commit;
414 }
415
416 ## insert new values ##
417
418 # get hosts
419 $hosts = $bpc->HostInfoRead();
420 my $hostID;
421 my $shareID;
422
423 my $sth;
424
425 $sth->{insert_hosts} = $dbh->prepare(qq{
426 INSERT INTO hosts (name, IP) VALUES (?,?)
427 });
428
429 $sth->{hosts_by_name} = $dbh->prepare(qq{
430 SELECT ID FROM hosts WHERE name=?
431 });
432
433 $sth->{backups_count} = $dbh->prepare(qq{
434 SELECT COUNT(*)
435 FROM backups
436 WHERE hostID=? AND num=? AND shareid=?
437 });
438
439 $sth->{insert_backups} = $dbh->prepare(qq{
440 INSERT INTO backups (hostID, num, date, type, shareid, size)
441 VALUES (?,?,?,?,?,-1)
442 });
443
444 $sth->{update_backups_size} = $dbh->prepare(qq{
445 UPDATE backups SET size = ?
446 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
447 });
448
449 $sth->{insert_files} = $dbh->prepare(qq{
450 INSERT INTO files
451 (shareID, backupNum, name, path, date, type, size)
452 VALUES (?,?,?,?,?,?,?)
453 });
454
455 my @hosts = keys %{$hosts};
456 my $host_nr = 0;
457
458 foreach my $host_key (@hosts) {
459
460 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
461
462 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
463
464 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
465 $sth->{insert_hosts}->execute(
466 $hosts->{$host_key}->{'host'},
467 $hosts->{$host_key}->{'ip'}
468 );
469
470 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
471 }
472
473 $host_nr++;
474 print "host ", $hosts->{$host_key}->{'host'}, " [",
475 $host_nr, "/", ($#hosts + 1), "]: ";
476
477 # get backups for a host
478 my @backups = $bpc->BackupInfoRead($hostname);
479 my $incs = scalar @backups;
480 print "$incs increments\n";
481
482 my $inc_nr = 0;
483 $beenThere = {};
484
485 foreach my $backup (@backups) {
486
487 $inc_nr++;
488 last if ($opt{m} && $inc_nr > $opt{m});
489
490 my $backupNum = $backup->{'num'};
491 my @backupShares = ();
492
493 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
494 $hosts->{$host_key}->{'host'},
495 $inc_nr, $incs, $backupNum,
496 $backup->{type} || '?',
497 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
498 strftime($t_fmt,localtime($backup->{startTime})),
499 fmt_time($backup->{endTime} - $backup->{startTime})
500 );
501
502 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
503 foreach my $share ($files->shareList($backupNum)) {
504
505 my $t = time();
506
507 $shareID = getShareID($share, $hostID, $hostname);
508
509 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
510 my ($count) = $sth->{backups_count}->fetchrow_array();
511 # skip if allready in database!
512 next if ($count > 0);
513
514 # dump some log
515 print curr_time," ", $share;
516
517 $sth->{insert_backups}->execute(
518 $hostID,
519 $backupNum,
520 $backup->{'endTime'},
521 substr($backup->{'type'},0,4),
522 $shareID,
523 );
524
525 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
526
527 eval {
528 $sth->{update_backups_size}->execute(
529 $size,
530 $hostID,
531 $backupNum,
532 $backup->{'endTime'},
533 substr($backup->{'type'},0,4),
534 $shareID,
535 );
536 print " commit";
537 $dbh->commit();
538 };
539 if ($@) {
540 print " rollback";
541 $dbh->rollback();
542 }
543
544 my $dur = (time() - $t) || 1;
545 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
546 $nf, $f, $nd, $d,
547 ($size / 1024 / 1024),
548 ( ($f+$d) / $dur ),
549 fmt_time($dur)
550 );
551
552 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
553 }
554
555 }
556 }
557 undef $sth;
558 $dbh->commit();
559 $dbh->disconnect();
560
561 print "total duration: ",fmt_time(time() - $start_t),"\n";
562
563 $pidfile->remove;
564
565 sub getShareID() {
566
567 my ($share, $hostID, $hostname) = @_;
568
569 $sth->{share_id} ||= $dbh->prepare(qq{
570 SELECT ID FROM shares WHERE hostID=? AND name=?
571 });
572
573 $sth->{share_id}->execute($hostID,$share);
574
575 my ($id) = $sth->{share_id}->fetchrow_array();
576
577 return $id if (defined($id));
578
579 $sth->{insert_share} ||= $dbh->prepare(qq{
580 INSERT INTO shares
581 (hostID,name,share)
582 VALUES (?,?,?)
583 });
584
585 my $drop_down = $hostname . '/' . $share;
586 $drop_down =~ s#//+#/#g;
587
588 $sth->{insert_share}->execute($hostID,$share, $drop_down);
589 return $dbh->last_insert_id(undef,undef,'shares',undef);
590 }
591
592 sub found_in_db {
593
594 my @data = @_;
595 shift @data;
596
597 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
598
599 return $beenThere->{$key} if (defined($beenThere->{$key}));
600
601 $sth->{file_in_db} ||= $dbh->prepare(qq{
602 SELECT 1 FROM files
603 WHERE shareID = ? and
604 path = ? and
605 size = ? and
606 ( date = ? or date = ? or date = ? )
607 LIMIT 1
608 });
609
610 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
611 $sth->{file_in_db}->execute(@param);
612 my $rows = $sth->{file_in_db}->rows;
613 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
614
615 $beenThere->{$key}++;
616
617 $sth->{'insert_files'}->execute(@data) unless ($rows);
618 return $rows;
619 }
620
621 ####################################################
622 # recursing through filesystem structure and #
623 # and returning flattened files list #
624 ####################################################
625 sub recurseDir($$$$$$$$) {
626
627 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
628
629 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
630
631 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
632
633 { # scope
634 my @stack;
635
636 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
637 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
638
639 # first, add all the entries in current directory
640 foreach my $path_key (keys %{$filesInBackup}) {
641 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
642 my @data = (
643 $shareID,
644 $backupNum,
645 $path_key,
646 $filesInBackup->{$path_key}->{'relPath'},
647 $filesInBackup->{$path_key}->{'mtime'},
648 $filesInBackup->{$path_key}->{'type'},
649 $filesInBackup->{$path_key}->{'size'}
650 );
651
652 my $key = join(" ", (
653 $shareID,
654 $dir,
655 $path_key,
656 $filesInBackup->{$path_key}->{'mtime'},
657 $filesInBackup->{$path_key}->{'size'}
658 ));
659
660 my $key_dst_prev = join(" ", (
661 $shareID,
662 $dir,
663 $path_key,
664 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
665 $filesInBackup->{$path_key}->{'size'}
666 ));
667
668 my $key_dst_next = join(" ", (
669 $shareID,
670 $dir,
671 $path_key,
672 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
673 $filesInBackup->{$path_key}->{'size'}
674 ));
675
676 my $found;
677 if (
678 ! defined($beenThere->{$key}) &&
679 ! defined($beenThere->{$key_dst_prev}) &&
680 ! defined($beenThere->{$key_dst_next}) &&
681 ! ($found = found_in_db($key, @data))
682 ) {
683 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
684
685 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
686 $new_dirs++ unless ($found);
687 print STDERR " dir\n" if ($debug >= 2);
688 } else {
689 $new_files++ unless ($found);
690 print STDERR " file\n" if ($debug >= 2);
691 }
692 $size += $filesInBackup->{$path_key}->{'size'} || 0;
693 }
694
695 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
696 $nr_dirs++;
697
698 my $full_path = $dir . '/' . $path_key;
699 push @stack, $full_path;
700 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
701
702 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
703 #
704 # $nr_files += $f;
705 # $new_files += $nf;
706 # $nr_dirs += $d;
707 # $new_dirs += $nd;
708
709 } else {
710 $nr_files++;
711 }
712 }
713
714 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
715
716 while ( my $dir = shift @stack ) {
717 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
718 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
719 $nr_files += $f;
720 $new_files += $nf;
721 $nr_dirs += $d;
722 $new_dirs += $nd;
723 $size += $s;
724 }
725 }
726
727 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
728 }
729

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26