/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 306 - (show annotations)
Sat Jan 28 20:42:42 2006 UTC (18 years, 3 months ago) by dpavlin
File size: 16800 byte(s)
fix uri_to_id (and thus updating of Hyper Estraier)
1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
15 use Cwd qw/abs_path/;
16
17 use constant BPC_FTYPE_DIR => 5;
18 use constant EST_CHUNK => 100000;
19
20 # daylight saving time change offset for 1h
21 my $dst_offset = 60 * 60;
22
23 my $debug = 0;
24 $|=1;
25
26 my $start_t = time();
27
28 my $pid_path = abs_path($0);
29 $pid_path =~ s/\W+/_/g;
30
31 my $pidfile = new File::Pid({
32 file => "/tmp/$pid_path",
33 });
34
35 if (my $pid = $pidfile->running ) {
36 die "$0 already running: $pid\n";
37 } elsif ($pidfile->pid ne $$) {
38 $pidfile->remove;
39 $pidfile = new File::Pid;
40 }
41 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
42 $pidfile->write;
43
44 my $t_fmt = '%Y-%m-%d %H:%M:%S';
45
46 my $hosts;
47 my $bpc = BackupPC::Lib->new || die;
48 my %Conf = $bpc->Conf();
49 my $TopDir = $bpc->TopDir();
50 my $beenThere = {};
51
52 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
53 my $user = $Conf{SearchUser} || '';
54
55 my $index_node_url = $Conf{HyperEstraierIndex};
56
57 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
58
59 my %opt;
60
61 if ( !getopts("cdm:v:ijf", \%opt ) ) {
62 print STDERR <<EOF;
63 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
64
65 Options:
66 -c create database on first use
67 -d delete database before import
68 -m num import just num increments for one host
69 -v num set verbosity (debug) level (default $debug)
70 -i update Hyper Estraier full text index
71 -j update full text, don't check existing files
72 -f don't do anything with full text index
73
74 Option -j is variation on -i. It will allow faster initial creation
75 of full-text index from existing database.
76
77 Option -f will create database which is out of sync with full text index. You
78 will have to re-run $0 with -i to fix it.
79
80 EOF
81 exit 1;
82 }
83
84 if ($opt{v}) {
85 print "Debug level at $opt{v}\n";
86 $debug = $opt{v};
87 } elsif ($opt{f}) {
88 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
89 $index_node_url = undef;
90 }
91
92 #---- subs ----
93
94 sub fmt_time {
95 my $t = shift || return;
96 my $out = "";
97 my ($ss,$mm,$hh) = gmtime($t);
98 $out .= "${hh}h" if ($hh);
99 $out .= sprintf("%02d:%02d", $mm,$ss);
100 return $out;
101 }
102
103 sub curr_time {
104 return strftime($t_fmt,localtime());
105 }
106
107 my $hest_node;
108
109 sub hest_update {
110
111 my ($host_id, $share_id, $num) = @_;
112
113 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
114
115 unless (defined($index_node_url)) {
116 print STDERR "HyperEstraier support not enabled in configuration\n";
117 $index_node_url = 0;
118 return;
119 }
120
121 print curr_time," updating Hyper Estraier:";
122
123 my $t = time();
124
125 my $offset = 0;
126 my $added = 0;
127
128 print " opening index $index_node_url";
129 if ($index_node_url) {
130 $hest_node ||= Search::Estraier::Node->new(
131 url => $index_node_url,
132 user => 'admin',
133 passwd => 'admin',
134 croak_on_error => 1,
135 );
136 print " via node URL";
137 } else {
138 die "don't know how to use Hyper Estraier Index $index_node_url";
139 }
140
141 my $results = 0;
142
143 do {
144
145 my $where = '';
146 my @data;
147 if (defined($host_id) && defined($share_id) && defined($num)) {
148 $where = qq{
149 WHERE
150 hosts.id = ? AND
151 shares.id = ? AND
152 files.backupnum = ?
153 };
154 @data = ( $host_id, $share_id, $num );
155 }
156
157 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
158
159 my $sth = $dbh->prepare(qq{
160 SELECT
161 files.id AS fid,
162 hosts.name AS hname,
163 shares.name AS sname,
164 -- shares.share AS sharename,
165 files.backupnum AS backupnum,
166 -- files.name AS filename,
167 files.path AS filepath,
168 files.date AS date,
169 files.type AS type,
170 files.size AS size,
171 files.shareid AS shareid,
172 backups.date AS backup_date
173 FROM files
174 INNER JOIN shares ON files.shareID=shares.ID
175 INNER JOIN hosts ON hosts.ID = shares.hostID
176 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
177 $where
178 $limit
179 });
180
181 $sth->execute(@data);
182 $results = $sth->rows;
183
184 if ($results == 0) {
185 print " - no new files\n";
186 return;
187 } else {
188 print " - $results files: ";
189 }
190
191 sub fmt_date {
192 my $t = shift || return;
193 my $iso = BackupPC::Lib::timeStamp($t);
194 $iso =~ s/\s/T/;
195 return $iso;
196 }
197
198 while (my $row = $sth->fetchrow_hashref()) {
199
200 my $fid = $row->{'fid'} || die "no fid?";
201 my $uri = 'file:///' . $fid;
202
203 unless ($skip_check) {
204 my $id = $hest_node->uri_to_id($uri);
205 next if ($id && $id == -1);
206 }
207
208 # create a document object
209 my $doc = Search::Estraier::Document->new;
210
211 # add attributes to the document object
212 $doc->add_attr('@uri', $uri);
213
214 foreach my $c (@{ $sth->{NAME} }) {
215 print STDERR "attr $c = $row->{$c}\n" if ($debug > 2);
216 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
217 }
218
219 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
220
221 # add the body text to the document object
222 my $path = $row->{'filepath'};
223 $doc->add_text($path);
224 $path =~ s/(.)/$1 /g;
225 $doc->add_hidden_text($path);
226
227 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
228
229 # register the document object to the database
230 if ($hest_node) {
231 $hest_node->put_doc($doc);
232 } else {
233 die "not supported";
234 }
235 $added++;
236 }
237
238 print " $added";
239
240 $offset += EST_CHUNK;
241
242 } while ($results == EST_CHUNK);
243
244 my $dur = (time() - $t) || 1;
245 printf(" [%.2f/s dur: %s]\n",
246 ( $added / $dur ),
247 fmt_time($dur)
248 );
249 }
250
251 #---- /subs ----
252
253
254 ## update index ##
255 if ( ( $opt{i} || $opt{j} ) && !$opt{c} ) {
256 # update all
257 print "force update of Hyper Estraier index ";
258 print "by -i flag" if ($opt{i});
259 print "by -j flag" if ($opt{j});
260 print "\n";
261 hest_update();
262 }
263
264 ## create tables ##
265 if ($opt{c}) {
266 sub do_index {
267 my $index = shift || return;
268 my ($table,$col,$unique) = split(/:/, $index);
269 $unique ||= '';
270 $index =~ s/\W+/_/g;
271 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
272 $dbh->do(qq{ create $unique index $index on $table($col) });
273 }
274
275 print "creating tables...\n";
276
277 $dbh->do( qq{
278 create table hosts (
279 ID SERIAL PRIMARY KEY,
280 name VARCHAR(30) NOT NULL,
281 IP VARCHAR(15)
282 );
283
284 create table shares (
285 ID SERIAL PRIMARY KEY,
286 hostID INTEGER NOT NULL references hosts(id),
287 name VARCHAR(30) NOT NULL,
288 share VARCHAR(200) NOT NULL
289 );
290
291 create table dvds (
292 ID SERIAL PRIMARY KEY,
293 num INTEGER NOT NULL,
294 name VARCHAR(255) NOT NULL,
295 mjesto VARCHAR(255)
296 );
297
298 create table backups (
299 id serial,
300 hostID INTEGER NOT NULL references hosts(id),
301 num INTEGER NOT NULL,
302 date integer NOT NULL,
303 type CHAR(4) not null,
304 shareID integer not null references shares(id),
305 size bigint not null,
306 inc_size bigint not null default -1,
307 inc_deleted boolean default false,
308 parts integer not null default 1,
309 PRIMARY KEY(id)
310 );
311
312 create table files (
313 ID SERIAL,
314 shareID INTEGER NOT NULL references shares(id),
315 backupNum INTEGER NOT NULL,
316 name VARCHAR(255) NOT NULL,
317 path VARCHAR(255) NOT NULL,
318 date integer NOT NULL,
319 type INTEGER NOT NULL,
320 size bigint NOT NULL,
321 primary key(id)
322 );
323
324 create table archive (
325 id serial,
326 dvd_nr int not null,
327 total_size bigint default -1,
328 note text,
329 username varchar(20) not null,
330 date timestamp default now(),
331 primary key(id)
332 );
333
334 create table archive_backup (
335 archive_id int not null references archive(id) on delete cascade,
336 backup_id int not null references backups(id),
337 primary key(archive_id, backup_id)
338 );
339
340 create table archive_burned (
341 archive_id int references archive(id),
342 date timestamp default now(),
343 part int not null default 1,
344 copy int not null default 1,
345 iso_size bigint default -1
346 );
347
348 create table backup_parts (
349 id serial,
350 backup_id int references backups(id),
351 part_nr int not null check (part_nr > 0),
352 tar_size bigint not null check (tar_size > 0),
353 size bigint not null check (size > 0),
354 md5 text not null,
355 items int not null check (items > 0),
356 date timestamp default now(),
357 primary key(id)
358 );
359 });
360
361 print "creating indexes: ";
362
363 foreach my $index (qw(
364 hosts:name
365 backups:hostID
366 backups:num
367 backups:shareID
368 shares:hostID
369 shares:name
370 files:shareID
371 files:path
372 files:name
373 files:date
374 files:size
375 archive:dvd_nr
376 archive_burned:archive_id
377 backup_parts:backup_id,part_nr
378 )) {
379 do_index($index);
380 }
381
382 print " creating sequence: ";
383 foreach my $seq (qw/dvd_nr/) {
384 print "$seq ";
385 $dbh->do( qq{ CREATE SEQUENCE $seq } );
386 }
387
388
389 print "...\n";
390
391 $dbh->commit;
392
393 }
394
395 ## delete data before inseting ##
396 if ($opt{d}) {
397 print "deleting ";
398 foreach my $table (qw(files dvds backups shares hosts)) {
399 print "$table ";
400 $dbh->do(qq{ DELETE FROM $table });
401 }
402 print " done...\n";
403
404 $dbh->commit;
405 }
406
407 ## insert new values ##
408
409 # get hosts
410 $hosts = $bpc->HostInfoRead();
411 my $hostID;
412 my $shareID;
413
414 my $sth;
415
416 $sth->{insert_hosts} = $dbh->prepare(qq{
417 INSERT INTO hosts (name, IP) VALUES (?,?)
418 });
419
420 $sth->{hosts_by_name} = $dbh->prepare(qq{
421 SELECT ID FROM hosts WHERE name=?
422 });
423
424 $sth->{backups_count} = $dbh->prepare(qq{
425 SELECT COUNT(*)
426 FROM backups
427 WHERE hostID=? AND num=? AND shareid=?
428 });
429
430 $sth->{insert_backups} = $dbh->prepare(qq{
431 INSERT INTO backups (hostID, num, date, type, shareid, size)
432 VALUES (?,?,?,?,?,-1)
433 });
434
435 $sth->{update_backups_size} = $dbh->prepare(qq{
436 UPDATE backups SET size = ?
437 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
438 });
439
440 $sth->{insert_files} = $dbh->prepare(qq{
441 INSERT INTO files
442 (shareID, backupNum, name, path, date, type, size)
443 VALUES (?,?,?,?,?,?,?)
444 });
445
446 my @hosts = keys %{$hosts};
447 my $host_nr = 0;
448
449 foreach my $host_key (@hosts) {
450
451 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
452
453 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
454
455 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
456 $sth->{insert_hosts}->execute(
457 $hosts->{$host_key}->{'host'},
458 $hosts->{$host_key}->{'ip'}
459 );
460
461 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
462 }
463
464 $host_nr++;
465 print "host ", $hosts->{$host_key}->{'host'}, " [",
466 $host_nr, "/", ($#hosts + 1), "]: ";
467
468 # get backups for a host
469 my @backups = $bpc->BackupInfoRead($hostname);
470 my $incs = scalar @backups;
471 print "$incs increments\n";
472
473 my $inc_nr = 0;
474 $beenThere = {};
475
476 foreach my $backup (@backups) {
477
478 $inc_nr++;
479 last if ($opt{m} && $inc_nr > $opt{m});
480
481 my $backupNum = $backup->{'num'};
482 my @backupShares = ();
483
484 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
485 $hosts->{$host_key}->{'host'},
486 $inc_nr, $incs, $backupNum,
487 $backup->{type} || '?',
488 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
489 strftime($t_fmt,localtime($backup->{startTime})),
490 fmt_time($backup->{endTime} - $backup->{startTime})
491 );
492
493 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
494 foreach my $share ($files->shareList($backupNum)) {
495
496 my $t = time();
497
498 $shareID = getShareID($share, $hostID, $hostname);
499
500 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
501 my ($count) = $sth->{backups_count}->fetchrow_array();
502 # skip if allready in database!
503 next if ($count > 0);
504
505 # dump some log
506 print curr_time," ", $share;
507
508 $sth->{insert_backups}->execute(
509 $hostID,
510 $backupNum,
511 $backup->{'endTime'},
512 substr($backup->{'type'},0,4),
513 $shareID,
514 );
515
516 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
517
518 eval {
519 $sth->{update_backups_size}->execute(
520 $size,
521 $hostID,
522 $backupNum,
523 $backup->{'endTime'},
524 substr($backup->{'type'},0,4),
525 $shareID,
526 );
527 print " commit";
528 $dbh->commit();
529 };
530 if ($@) {
531 print " rollback";
532 $dbh->rollback();
533 }
534
535 my $dur = (time() - $t) || 1;
536 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
537 $nf, $f, $nd, $d,
538 ($size / 1024 / 1024),
539 ( ($f+$d) / $dur ),
540 fmt_time($dur)
541 );
542
543 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
544 }
545
546 }
547 }
548 undef $sth;
549 $dbh->commit();
550 $dbh->disconnect();
551
552 print "total duration: ",fmt_time(time() - $start_t),"\n";
553
554 $pidfile->remove;
555
556 sub getShareID() {
557
558 my ($share, $hostID, $hostname) = @_;
559
560 $sth->{share_id} ||= $dbh->prepare(qq{
561 SELECT ID FROM shares WHERE hostID=? AND name=?
562 });
563
564 $sth->{share_id}->execute($hostID,$share);
565
566 my ($id) = $sth->{share_id}->fetchrow_array();
567
568 return $id if (defined($id));
569
570 $sth->{insert_share} ||= $dbh->prepare(qq{
571 INSERT INTO shares
572 (hostID,name,share)
573 VALUES (?,?,?)
574 });
575
576 my $drop_down = $hostname . '/' . $share;
577 $drop_down =~ s#//+#/#g;
578
579 $sth->{insert_share}->execute($hostID,$share, $drop_down);
580 return $dbh->last_insert_id(undef,undef,'shares',undef);
581 }
582
583 sub found_in_db {
584
585 my @data = @_;
586 shift @data;
587
588 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
589
590 return $beenThere->{$key} if (defined($beenThere->{$key}));
591
592 $sth->{file_in_db} ||= $dbh->prepare(qq{
593 SELECT 1 FROM files
594 WHERE shareID = ? and
595 path = ? and
596 size = ? and
597 ( date = ? or date = ? or date = ? )
598 LIMIT 1
599 });
600
601 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
602 $sth->{file_in_db}->execute(@param);
603 my $rows = $sth->{file_in_db}->rows;
604 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
605
606 $beenThere->{$key}++;
607
608 $sth->{'insert_files'}->execute(@data) unless ($rows);
609 return $rows;
610 }
611
612 ####################################################
613 # recursing through filesystem structure and #
614 # and returning flattened files list #
615 ####################################################
616 sub recurseDir($$$$$$$$) {
617
618 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
619
620 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
621
622 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
623
624 { # scope
625 my @stack;
626
627 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
628 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
629
630 # first, add all the entries in current directory
631 foreach my $path_key (keys %{$filesInBackup}) {
632 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
633 my @data = (
634 $shareID,
635 $backupNum,
636 $path_key,
637 $filesInBackup->{$path_key}->{'relPath'},
638 $filesInBackup->{$path_key}->{'mtime'},
639 $filesInBackup->{$path_key}->{'type'},
640 $filesInBackup->{$path_key}->{'size'}
641 );
642
643 my $key = join(" ", (
644 $shareID,
645 $dir,
646 $path_key,
647 $filesInBackup->{$path_key}->{'mtime'},
648 $filesInBackup->{$path_key}->{'size'}
649 ));
650
651 my $key_dst_prev = join(" ", (
652 $shareID,
653 $dir,
654 $path_key,
655 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
656 $filesInBackup->{$path_key}->{'size'}
657 ));
658
659 my $key_dst_next = join(" ", (
660 $shareID,
661 $dir,
662 $path_key,
663 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
664 $filesInBackup->{$path_key}->{'size'}
665 ));
666
667 my $found;
668 if (
669 ! defined($beenThere->{$key}) &&
670 ! defined($beenThere->{$key_dst_prev}) &&
671 ! defined($beenThere->{$key_dst_next}) &&
672 ! ($found = found_in_db($key, @data))
673 ) {
674 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
675
676 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
677 $new_dirs++ unless ($found);
678 print STDERR " dir\n" if ($debug >= 2);
679 } else {
680 $new_files++ unless ($found);
681 print STDERR " file\n" if ($debug >= 2);
682 }
683 $size += $filesInBackup->{$path_key}->{'size'} || 0;
684 }
685
686 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
687 $nr_dirs++;
688
689 my $full_path = $dir . '/' . $path_key;
690 push @stack, $full_path;
691 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
692
693 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
694 #
695 # $nr_files += $f;
696 # $new_files += $nf;
697 # $nr_dirs += $d;
698 # $new_dirs += $nd;
699
700 } else {
701 $nr_files++;
702 }
703 }
704
705 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
706
707 while ( my $dir = shift @stack ) {
708 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
709 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
710 $nr_files += $f;
711 $new_files += $nf;
712 $nr_dirs += $d;
713 $new_dirs += $nd;
714 $size += $s;
715 }
716 }
717
718 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
719 }
720

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26