/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 242 - (show annotations)
Fri Nov 18 19:40:47 2005 UTC (18 years, 6 months ago) by dpavlin
File size: 16833 byte(s)
insert first backup and update size at end

1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
15
16 use constant BPC_FTYPE_DIR => 5;
17 use constant EST_CHUNK => 100000;
18
19 my $debug = 0;
20 $|=1;
21
22 my $start_t = time();
23
24 my $pidfile = new File::Pid;
25
26 if (my $pid = $pidfile->running ) {
27 die "$0 already running: $pid\n";
28 } elsif ($pidfile->pid ne $$) {
29 $pidfile->remove;
30 $pidfile = new File::Pid;
31 }
32 $pidfile->write;
33 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
34
35 my $t_fmt = '%Y-%m-%d %H:%M:%S';
36
37 my $hosts;
38 my $bpc = BackupPC::Lib->new || die;
39 my %Conf = $bpc->Conf();
40 my $TopDir = $bpc->TopDir();
41 my $beenThere = {};
42
43 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
44 my $user = $Conf{SearchUser} || '';
45
46 my $use_hest = $Conf{HyperEstraierIndex};
47 my ($index_path, $index_node_url) = BackupPC::SearchLib::getHyperEstraier_url($use_hest);
48
49 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
50
51 my %opt;
52
53 if ( !getopts("cdm:v:ij", \%opt ) ) {
54 print STDERR <<EOF;
55 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
56
57 Options:
58 -c create database on first use
59 -d delete database before import
60 -m num import just num increments for one host
61 -v num set verbosity (debug) level (default $debug)
62 -i update Hyper Estraier full text index
63 -j update full text, don't check existing files
64
65 Option -j is variation on -i. It will allow faster initial creation
66 of full-text index from existing database.
67
68 EOF
69 exit 1;
70 }
71
72 if ($opt{v}) {
73 print "Debug level at $opt{v}\n";
74 $debug = $opt{v};
75 }
76
77 #---- subs ----
78
79 sub fmt_time {
80 my $t = shift || return;
81 my $out = "";
82 my ($ss,$mm,$hh) = gmtime($t);
83 $out .= "${hh}h" if ($hh);
84 $out .= sprintf("%02d:%02d", $mm,$ss);
85 return $out;
86 }
87
88 sub curr_time {
89 return strftime($t_fmt,localtime());
90 }
91
92 my $hest_db;
93 my $hest_node;
94
95 sub signal {
96 my($sig) = @_;
97 if ($hest_db) {
98 print "\nCaught a SIG$sig--syncing database and shutting down\n";
99 $hest_db->sync();
100 $hest_db->close();
101 }
102 exit(0);
103 }
104
105 $SIG{'INT'} = \&signal;
106 $SIG{'QUIT'} = \&signal;
107
108 sub hest_update {
109
110 my ($host_id, $share_id, $num) = @_;
111
112 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
113
114 unless ($use_hest) {
115 print STDERR "HyperEstraier support not enabled in configuration\n";
116 return;
117 }
118
119 print curr_time," updating HyperEstraier:";
120
121 my $t = time();
122
123 my $offset = 0;
124 my $added = 0;
125
126 print " opening index $use_hest";
127 if ($index_path) {
128 $hest_db = HyperEstraier::Database->new();
129 $hest_db->open($TopDir . $index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
130 print " directly";
131 } elsif ($index_node_url) {
132 $hest_node ||= HyperEstraier::Node->new($index_node_url);
133 $hest_node->set_auth('admin', 'admin');
134 print " via node URL";
135 } else {
136 die "don't know how to use HyperEstraier Index $use_hest";
137 }
138 print " increment is " . EST_CHUNK . " files:";
139
140 my $results = 0;
141
142 do {
143
144 my $where = '';
145 my @data;
146 if (defined($host_id) && defined($share_id) && defined($num)) {
147 $where = qq{
148 WHERE
149 hosts.id = ? AND
150 shares.id = ? AND
151 files.backupnum = ?
152 };
153 @data = ( $host_id, $share_id, $num );
154 }
155
156 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
157
158 my $sth = $dbh->prepare(qq{
159 SELECT
160 files.id AS fid,
161 hosts.name AS hname,
162 shares.name AS sname,
163 -- shares.share AS sharename,
164 files.backupnum AS backupnum,
165 -- files.name AS filename,
166 files.path AS filepath,
167 files.date AS date,
168 files.type AS type,
169 files.size AS size,
170 files.shareid AS shareid,
171 backups.date AS backup_date
172 FROM files
173 INNER JOIN shares ON files.shareID=shares.ID
174 INNER JOIN hosts ON hosts.ID = shares.hostID
175 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
176 $where
177 $limit
178 });
179
180 $sth->execute(@data);
181 $results = $sth->rows;
182
183 if ($results == 0) {
184 print " - no new files\n";
185 last;
186 }
187
188 sub fmt_date {
189 my $t = shift || return;
190 my $iso = BackupPC::Lib::timeStamp($t);
191 $iso =~ s/\s/T/;
192 return $iso;
193 }
194
195 while (my $row = $sth->fetchrow_hashref()) {
196
197 my $fid = $row->{'fid'} || die "no fid?";
198 my $uri = 'file:///' . $fid;
199
200 unless ($skip_check) {
201 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
202 next unless ($id == -1);
203 }
204
205 # create a document object
206 my $doc = HyperEstraier::Document->new;
207
208 # add attributes to the document object
209 $doc->add_attr('@uri', $uri);
210
211 foreach my $c (@{ $sth->{NAME} }) {
212 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
213 }
214
215 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
216
217 # add the body text to the document object
218 my $path = $row->{'filepath'};
219 $doc->add_text($path);
220 $path =~ s/(.)/$1 /g;
221 $doc->add_hidden_text($path);
222
223 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
224
225 # register the document object to the database
226 if ($hest_db) {
227 $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
228 } elsif ($hest_node) {
229 $hest_node->put_doc($doc);
230 } else {
231 die "not supported";
232 }
233 $added++;
234 }
235
236 print " $added";
237 $hest_db->sync() if ($index_path);
238
239 $offset += EST_CHUNK;
240
241 } while ($results == EST_CHUNK);
242
243 if ($index_path) {
244 print ", close";
245 $hest_db->close();
246 }
247
248 my $dur = (time() - $t) || 1;
249 printf(" [%.2f/s dur: %s]\n",
250 ( $added / $dur ),
251 fmt_time($dur)
252 );
253 }
254
255 #---- /subs ----
256
257
258 ## update index ##
259 if (($opt{i} || $opt{j} || ($index_path && ! -e $TopDir . $index_path)) && !$opt{c}) {
260 # update all
261 print "force update of HyperEstraier index ";
262 print "importing existing data" unless (-e $TopDir . $index_path);
263 print "by -i flag" if ($opt{i});
264 print "by -j flag" if ($opt{j});
265 print "\n";
266 hest_update();
267 }
268
269 ## create tables ##
270 if ($opt{c}) {
271 sub do_index {
272 my $index = shift || return;
273 my ($table,$col,$unique) = split(/:/, $index);
274 $unique ||= '';
275 $index =~ s/\W+/_/g;
276 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
277 $dbh->do(qq{ create $unique index $index on $table($col) });
278 }
279
280 print "creating tables...\n";
281
282 $dbh->do( qq{
283 create table hosts (
284 ID SERIAL PRIMARY KEY,
285 name VARCHAR(30) NOT NULL,
286 IP VARCHAR(15)
287 );
288
289 create table shares (
290 ID SERIAL PRIMARY KEY,
291 hostID INTEGER NOT NULL references hosts(id),
292 name VARCHAR(30) NOT NULL,
293 share VARCHAR(200) NOT NULL
294 );
295
296 create table dvds (
297 ID SERIAL PRIMARY KEY,
298 num INTEGER NOT NULL,
299 name VARCHAR(255) NOT NULL,
300 mjesto VARCHAR(255)
301 );
302
303 create table backups (
304 id serial,
305 hostID INTEGER NOT NULL references hosts(id),
306 num INTEGER NOT NULL,
307 date integer NOT NULL,
308 type CHAR(4) not null,
309 shareID integer not null references shares(id),
310 size bigint not null,
311 inc_size bigint not null default -1,
312 inc_deleted boolean default false,
313 parts integer not null default 1,
314 PRIMARY KEY(id)
315 );
316
317 create table files (
318 ID SERIAL,
319 shareID INTEGER NOT NULL references shares(id),
320 backupNum INTEGER NOT NULL,
321 name VARCHAR(255) NOT NULL,
322 path VARCHAR(255) NOT NULL,
323 date integer NOT NULL,
324 type INTEGER NOT NULL,
325 size bigint NOT NULL,
326 primary key(id)
327 );
328
329 create table archive (
330 id serial,
331 dvd_nr int not null,
332 total_size bigint default -1,
333 note text,
334 username varchar(20) not null,
335 date timestamp default now(),
336 primary key(id)
337 );
338
339 create table archive_backup (
340 archive_id int not null references archive(id) on delete cascade,
341 backup_id int not null references backups(id),
342 primary key(archive_id, backup_id)
343 );
344
345 create table archive_burned (
346 archive_id int references archive(id),
347 date timestamp default now(),
348 part int not null default 1,
349 copy int not null default 1,
350 iso_size bigint default -1
351 );
352
353 create table backup_parts (
354 id serial,
355 backup_id int references backups(id),
356 part_nr int not null check (part_nr > 0),
357 tar_size bigint not null check (tar_size > 0),
358 size bigint not null check (size > 0),
359 md5 text not null,
360 items int not null check (items > 0),
361 date timestamp default now(),
362 primary key(id)
363 );
364 });
365
366 print "creating indexes: ";
367
368 foreach my $index (qw(
369 hosts:name
370 backups:hostID
371 backups:num
372 backups:shareID
373 shares:hostID
374 shares:name
375 files:shareID
376 files:path
377 files:name
378 files:date
379 files:size
380 archive:dvd_nr
381 archive_burned:archive_id
382 backup_parts:backup_id,part_nr
383 )) {
384 do_index($index);
385 }
386
387 print " creating sequence: ";
388 foreach my $seq (qw/dvd_nr/) {
389 print "$seq ";
390 $dbh->do( qq{ CREATE SEQUENCE $seq } );
391 }
392
393
394 print "...\n";
395
396 $dbh->commit;
397
398 }
399
400 ## delete data before inseting ##
401 if ($opt{d}) {
402 print "deleting ";
403 foreach my $table (qw(files dvds backups shares hosts)) {
404 print "$table ";
405 $dbh->do(qq{ DELETE FROM $table });
406 }
407 print " done...\n";
408
409 $dbh->commit;
410 }
411
412 ## insert new values ##
413
414 # get hosts
415 $hosts = $bpc->HostInfoRead();
416 my $hostID;
417 my $shareID;
418
419 my $sth;
420
421 $sth->{insert_hosts} = $dbh->prepare(qq{
422 INSERT INTO hosts (name, IP) VALUES (?,?)
423 });
424
425 $sth->{hosts_by_name} = $dbh->prepare(qq{
426 SELECT ID FROM hosts WHERE name=?
427 });
428
429 $sth->{backups_count} = $dbh->prepare(qq{
430 SELECT COUNT(*)
431 FROM backups
432 WHERE hostID=? AND num=? AND shareid=?
433 });
434
435 $sth->{insert_backups} = $dbh->prepare(qq{
436 INSERT INTO backups (hostID, num, date, type, shareid, size)
437 VALUES (?,?,?,?,?,-1)
438 });
439
440 $sth->{update_backups_size} = $dbh->prepare(qq{
441 UPDATE backups SET size = ?
442 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
443 });
444
445 $sth->{insert_files} = $dbh->prepare(qq{
446 INSERT INTO files
447 (shareID, backupNum, name, path, date, type, size)
448 VALUES (?,?,?,?,?,?,?)
449 });
450
451 foreach my $host_key (keys %{$hosts}) {
452
453 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
454
455 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
456
457 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
458 $sth->{insert_hosts}->execute(
459 $hosts->{$host_key}->{'host'},
460 $hosts->{$host_key}->{'ip'}
461 );
462
463 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
464 }
465
466 print "host ".$hosts->{$host_key}->{'host'}.": ";
467
468 # get backups for a host
469 my @backups = $bpc->BackupInfoRead($hostname);
470 my $incs = scalar @backups;
471 print "$incs increments\n";
472
473 my $inc_nr = 0;
474 $beenThere = {};
475
476 foreach my $backup (@backups) {
477
478 $inc_nr++;
479 last if ($opt{m} && $inc_nr > $opt{m});
480
481 my $backupNum = $backup->{'num'};
482 my @backupShares = ();
483
484 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
485 $hosts->{$host_key}->{'host'},
486 $inc_nr, $incs, $backupNum,
487 $backup->{type} || '?',
488 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
489 strftime($t_fmt,localtime($backup->{startTime})),
490 fmt_time($backup->{endTime} - $backup->{startTime})
491 );
492
493 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
494 foreach my $share ($files->shareList($backupNum)) {
495
496 my $t = time();
497
498 $shareID = getShareID($share, $hostID, $hostname);
499
500 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
501 my ($count) = $sth->{backups_count}->fetchrow_array();
502 # skip if allready in database!
503 next if ($count > 0);
504
505 # dump some log
506 print curr_time," ", $share;
507
508 $sth->{insert_backups}->execute(
509 $hostID,
510 $backupNum,
511 $backup->{'endTime'},
512 substr($backup->{'type'},0,4),
513 $shareID,
514 );
515
516 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
517
518 $sth->{update_backups_size}->execute(
519 $size,
520 $hostID,
521 $backupNum,
522 $backup->{'endTime'},
523 substr($backup->{'type'},0,4),
524 $shareID,
525 );
526
527 print " commit";
528 $dbh->commit();
529
530 my $dur = (time() - $t) || 1;
531 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
532 $nf, $f, $nd, $d,
533 ($size / 1024 / 1024),
534 ( ($f+$d) / $dur ),
535 fmt_time($dur)
536 );
537
538 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
539 }
540
541 }
542 }
543 undef $sth;
544 $dbh->commit();
545 $dbh->disconnect();
546
547 print "total duration: ",fmt_time(time() - $start_t),"\n";
548
549 $pidfile->remove;
550
551 sub getShareID() {
552
553 my ($share, $hostID, $hostname) = @_;
554
555 $sth->{share_id} ||= $dbh->prepare(qq{
556 SELECT ID FROM shares WHERE hostID=? AND name=?
557 });
558
559 $sth->{share_id}->execute($hostID,$share);
560
561 my ($id) = $sth->{share_id}->fetchrow_array();
562
563 return $id if (defined($id));
564
565 $sth->{insert_share} ||= $dbh->prepare(qq{
566 INSERT INTO shares
567 (hostID,name,share)
568 VALUES (?,?,?)
569 });
570
571 my $drop_down = $hostname . '/' . $share;
572 $drop_down =~ s#//+#/#g;
573
574 $sth->{insert_share}->execute($hostID,$share, $drop_down);
575 return $dbh->last_insert_id(undef,undef,'shares',undef);
576 }
577
578 sub found_in_db {
579
580 my @data = @_;
581 shift @data;
582
583 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
584
585 return $beenThere->{$key} if (defined($beenThere->{$key}));
586
587 $sth->{file_in_db} ||= $dbh->prepare(qq{
588 SELECT 1 FROM files
589 WHERE shareID = ? and
590 path = ? and
591 date = ? and
592 size = ?
593 LIMIT 1
594 });
595
596 my @param = ($shareID,$path,$date,$size);
597 $sth->{file_in_db}->execute(@param);
598 my $rows = $sth->{file_in_db}->rows;
599 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
600
601 $beenThere->{$key}++;
602
603 $sth->{'insert_files'}->execute(@data) unless ($rows);
604 return $rows;
605 }
606
607 ####################################################
608 # recursing through filesystem structure and #
609 # and returning flattened files list #
610 ####################################################
611 sub recurseDir($$$$$$$$) {
612
613 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
614
615 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
616
617 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
618
619 { # scope
620 my @stack;
621
622 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
623 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
624
625 # first, add all the entries in current directory
626 foreach my $path_key (keys %{$filesInBackup}) {
627 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
628 my @data = (
629 $shareID,
630 $backupNum,
631 $path_key,
632 $filesInBackup->{$path_key}->{'relPath'},
633 $filesInBackup->{$path_key}->{'mtime'},
634 $filesInBackup->{$path_key}->{'type'},
635 $filesInBackup->{$path_key}->{'size'}
636 );
637
638 my $key = join(" ", (
639 $shareID,
640 $dir,
641 $path_key,
642 $filesInBackup->{$path_key}->{'mtime'},
643 $filesInBackup->{$path_key}->{'size'}
644 ));
645
646 # daylight saving time change offset for 1h
647 my $dst_offset = 60 * 60;
648
649 my $key_dst_prev = join(" ", (
650 $shareID,
651 $dir,
652 $path_key,
653 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
654 $filesInBackup->{$path_key}->{'size'}
655 ));
656
657 my $key_dst_next = join(" ", (
658 $shareID,
659 $dir,
660 $path_key,
661 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
662 $filesInBackup->{$path_key}->{'size'}
663 ));
664
665 my $found;
666 if (
667 ! defined($beenThere->{$key}) &&
668 ! defined($beenThere->{$key_dst_prev}) &&
669 ! defined($beenThere->{$key_dst_next}) &&
670 ! ($found = found_in_db($key, @data))
671 ) {
672 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
673
674 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
675 $new_dirs++ unless ($found);
676 print STDERR " dir\n" if ($debug >= 2);
677 } else {
678 $new_files++ unless ($found);
679 print STDERR " file\n" if ($debug >= 2);
680 }
681 $size += $filesInBackup->{$path_key}->{'size'} || 0;
682 }
683
684 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
685 $nr_dirs++;
686
687 my $full_path = $dir . '/' . $path_key;
688 push @stack, $full_path;
689 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
690
691 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
692 #
693 # $nr_files += $f;
694 # $new_files += $nf;
695 # $nr_dirs += $d;
696 # $new_dirs += $nd;
697
698 } else {
699 $nr_files++;
700 }
701 }
702
703 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
704
705 while ( my $dir = shift @stack ) {
706 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
707 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
708 $nr_files += $f;
709 $new_files += $nf;
710 $nr_dirs += $d;
711 $new_dirs += $nd;
712 $size += $s;
713 }
714 }
715
716 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
717 }
718

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26