/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 160 - (show annotations)
Mon Oct 10 13:39:11 2005 UTC (18 years, 6 months ago) by dpavlin
File size: 15417 byte(s)
 r8423@llin:  dpavlin | 2005-10-10 15:30:15 +0200
 make delete from archives work

1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
15
16 use constant BPC_FTYPE_DIR => 5;
17 use constant EST_CHUNK => 100000;
18
19 my $debug = 0;
20 $|=1;
21
22 my $start_t = time();
23
24 my $pidfile = new File::Pid;
25
26 if (my $pid = $pidfile->running ) {
27 die "$0 already running: $pid\n";
28 } elsif ($pidfile->pid ne $$) {
29 $pidfile->remove;
30 $pidfile = new File::Pid;
31 }
32 $pidfile->write;
33 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
34
35 my $t_fmt = '%Y-%m-%d %H:%M:%S';
36
37 my $hosts;
38 my $bpc = BackupPC::Lib->new || die;
39 my %Conf = $bpc->Conf();
40 my $TopDir = $bpc->TopDir();
41 my $beenThere = {};
42
43 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
44 my $user = $Conf{SearchUser} || '';
45
46 my $use_hest = $Conf{HyperEstraierIndex};
47 my ($index_path, $index_node_url) = BackupPC::SearchLib::getHyperEstraier_url($use_hest);
48
49 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
50
51 my %opt;
52
53 if ( !getopts("cdm:v:ij", \%opt ) ) {
54 print STDERR <<EOF;
55 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
56
57 Options:
58 -c create database on first use
59 -d delete database before import
60 -m num import just num increments for one host
61 -v num set verbosity (debug) level (default $debug)
62 -i update Hyper Estraier full text index
63 -j update full text, don't check existing files
64
65 Option -j is variation on -i. It will allow faster initial creation
66 of full-text index from existing database.
67
68 EOF
69 exit 1;
70 }
71
72 if ($opt{v}) {
73 print "Debug level at $opt{v}\n";
74 $debug = $opt{v};
75 }
76
77 #---- subs ----
78
79 sub fmt_time {
80 my $t = shift || return;
81 my $out = "";
82 my ($ss,$mm,$hh) = gmtime($t);
83 $out .= "${hh}h" if ($hh);
84 $out .= sprintf("%02d:%02d", $mm,$ss);
85 return $out;
86 }
87
88 sub curr_time {
89 return strftime($t_fmt,localtime());
90 }
91
92 my $hest_db;
93 my $hest_node;
94
95 sub signal {
96 my($sig) = @_;
97 if ($hest_db) {
98 print "\nCaught a SIG$sig--syncing database and shutting down\n";
99 $hest_db->sync();
100 $hest_db->close();
101 }
102 exit(0);
103 }
104
105 $SIG{'INT'} = \&signal;
106 $SIG{'QUIT'} = \&signal;
107
108 sub hest_update {
109
110 my ($host_id, $share_id, $num) = @_;
111
112 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
113
114 unless ($use_hest) {
115 print STDERR "HyperEstraier support not enabled in configuration\n";
116 return;
117 }
118
119 print curr_time," updating HyperEstraier:";
120
121 my $t = time();
122
123 my $offset = 0;
124 my $added = 0;
125
126 print " opening index $use_hest";
127 if ($index_path) {
128 $hest_db = HyperEstraier::Database->new();
129 $hest_db->open($TopDir . $index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
130 print " directly";
131 } elsif ($index_node_url) {
132 $hest_node ||= HyperEstraier::Node->new($index_node_url);
133 $hest_node->set_auth('admin', 'admin');
134 print " via node URL";
135 } else {
136 die "don't know how to use HyperEstraier Index $use_hest";
137 }
138 print " increment is " . EST_CHUNK . " files:";
139
140 my $results = 0;
141
142 do {
143
144 my $where = '';
145 my @data;
146 if ($host_id && $share_id && $num) {
147 $where = qq{
148 WHERE
149 hosts.id = ? AND
150 shares.id = ? AND
151 files.backupnum = ?
152 };
153 @data = ( $host_id, $share_id, $num );
154 }
155
156 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
157
158 my $sth = $dbh->prepare(qq{
159 SELECT
160 files.id AS fid,
161 hosts.name AS hname,
162 shares.name AS sname,
163 -- shares.share AS sharename,
164 files.backupnum AS backupnum,
165 -- files.name AS filename,
166 files.path AS filepath,
167 files.date AS date,
168 files.type AS type,
169 files.size AS size,
170 files.shareid AS shareid,
171 backups.date AS backup_date
172 FROM files
173 INNER JOIN shares ON files.shareID=shares.ID
174 INNER JOIN hosts ON hosts.ID = shares.hostID
175 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
176 $where
177 $limit
178 });
179
180 $sth->execute(@data);
181 $results = $sth->rows;
182
183 if ($results == 0) {
184 print " - no new files\n";
185 last;
186 }
187
188 sub fmt_date {
189 my $t = shift || return;
190 my $iso = BackupPC::Lib::timeStamp($t);
191 $iso =~ s/\s/T/;
192 return $iso;
193 }
194
195 while (my $row = $sth->fetchrow_hashref()) {
196
197 my $fid = $row->{'fid'} || die "no fid?";
198 my $uri = 'file:///' . $fid;
199
200 unless ($skip_check) {
201 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
202 next unless ($id == -1);
203 }
204
205 # create a document object
206 my $doc = HyperEstraier::Document->new;
207
208 # add attributes to the document object
209 $doc->add_attr('@uri', $uri);
210
211 foreach my $c (@{ $sth->{NAME} }) {
212 $doc->add_attr($c, $row->{$c}) if ($row->{$c});
213 }
214
215 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
216
217 # add the body text to the document object
218 my $path = $row->{'filepath'};
219 $doc->add_text($path);
220 $path =~ s/(.)/$1 /g;
221 $doc->add_hidden_text($path);
222
223 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
224
225 # register the document object to the database
226 if ($hest_db) {
227 $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
228 } elsif ($hest_node) {
229 $hest_node->put_doc($doc);
230 } else {
231 die "not supported";
232 }
233 $added++;
234 }
235
236 print " $added";
237 $hest_db->sync() if ($index_path);
238
239 $offset += EST_CHUNK;
240
241 } while ($results == EST_CHUNK);
242
243 if ($index_path) {
244 print ", close";
245 $hest_db->close();
246 }
247
248 my $dur = (time() - $t) || 1;
249 printf(" [%.2f/s dur: %s]\n",
250 ( $added / $dur ),
251 fmt_time($dur)
252 );
253 }
254
255 #---- /subs ----
256
257
258 ## update index ##
259 if (($opt{i} || $opt{j} || ($index_path && ! -e $index_path)) && !$opt{c}) {
260 # update all
261 print "force update of HyperEstraier index ";
262 print "importing existing data" unless (-e $index_path);
263 print "by -i flag" if ($opt{i});
264 print "by -j flag" if ($opt{j});
265 print "\n";
266 hest_update();
267 }
268
269 ## create tables ##
270 if ($opt{c}) {
271 sub do_index {
272 my $index = shift || return;
273 my ($table,$col,$unique) = split(/:/, $index);
274 $unique ||= '';
275 $index =~ s/\W+/_/g;
276 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
277 $dbh->do(qq{ create $unique index $index on $table($col) });
278 }
279
280 print "creating tables...\n";
281
282 $dbh->do(qq{
283 create table hosts (
284 ID SERIAL PRIMARY KEY,
285 name VARCHAR(30) NOT NULL,
286 IP VARCHAR(15)
287 );
288 });
289
290 $dbh->do(qq{
291 create table shares (
292 ID SERIAL PRIMARY KEY,
293 hostID INTEGER NOT NULL references hosts(id),
294 name VARCHAR(30) NOT NULL,
295 share VARCHAR(200) NOT NULL
296 );
297 });
298
299 $dbh->do(qq{
300 create table dvds (
301 ID SERIAL PRIMARY KEY,
302 num INTEGER NOT NULL,
303 name VARCHAR(255) NOT NULL,
304 mjesto VARCHAR(255)
305 );
306 });
307
308 $dbh->do(qq{
309 create table backups (
310 id serial,
311 hostID INTEGER NOT NULL references hosts(id),
312 num INTEGER NOT NULL,
313 date integer NOT NULL,
314 type CHAR(4) not null,
315 shareID integer not null references shares(id),
316 size bigint not null,
317 inc_size bigint not null default -1,
318 inc_deleted boolean default false,
319 PRIMARY KEY(id)
320 );
321 });
322
323 $dbh->do(qq{
324 create table files (
325 ID SERIAL,
326 shareID INTEGER NOT NULL references shares(id),
327 backupNum INTEGER NOT NULL,
328 name VARCHAR(255) NOT NULL,
329 path VARCHAR(255) NOT NULL,
330 date integer NOT NULL,
331 type INTEGER NOT NULL,
332 size bigint NOT NULL,
333 primary key(id)
334 );
335 });
336
337
338 $dbh->do( qq{
339 create table archive (
340 id serial,
341 dvd_nr int not null,
342 total_size bigint default -1,
343 note text,
344 username varchar(20) not null,
345 date timestamp default now(),
346 primary key(id)
347 );
348 }
349 );
350
351 $dbh->do( qq{
352 create table archive_backup
353 (
354 archive_id int not null references archive(id) on delete cascade,
355 backup_id int not null references backups(id),
356 primary key(archive_id, backup_id)
357 );
358 });
359
360 print "creating indexes: ";
361
362 foreach my $index (qw(
363 hosts:name
364 backups:hostID
365 backups:num
366 backups:shareID
367 shares:hostID
368 shares:name
369 files:shareID
370 files:path
371 files:name
372 files:date
373 files:size
374 archive:dvd_nr
375 )) {
376 do_index($index);
377 }
378
379 print " creating sequence: ";
380 foreach my $seq (qw/dvd_nr/) {
381 print "$seq ";
382 $dbh->do( qq{ CREATE SEQUENCE $seq } );
383 }
384
385
386 print "...\n";
387
388 $dbh->commit;
389
390 }
391
392 ## delete data before inseting ##
393 if ($opt{d}) {
394 print "deleting ";
395 foreach my $table (qw(files dvds backups shares hosts)) {
396 print "$table ";
397 $dbh->do(qq{ DELETE FROM $table });
398 }
399 print " done...\n";
400
401 $dbh->commit;
402 }
403
404 ## insert new values ##
405
406 # get hosts
407 $hosts = $bpc->HostInfoRead();
408 my $hostID;
409 my $shareID;
410
411 my $sth;
412
413 $sth->{insert_hosts} = $dbh->prepare(qq{
414 INSERT INTO hosts (name, IP) VALUES (?,?)
415 });
416
417 $sth->{hosts_by_name} = $dbh->prepare(qq{
418 SELECT ID FROM hosts WHERE name=?
419 });
420
421 $sth->{backups_count} = $dbh->prepare(qq{
422 SELECT COUNT(*)
423 FROM backups
424 WHERE hostID=? AND num=? AND shareid=?
425 });
426
427 $sth->{insert_backups} = $dbh->prepare(qq{
428 INSERT INTO backups (hostID, num, date, type, shareid, size)
429 VALUES (?,?,?,?,?,?)
430 });
431
432 $sth->{insert_files} = $dbh->prepare(qq{
433 INSERT INTO files
434 (shareID, backupNum, name, path, date, type, size)
435 VALUES (?,?,?,?,?,?,?)
436 });
437
438 foreach my $host_key (keys %{$hosts}) {
439
440 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
441
442 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
443
444 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
445 $sth->{insert_hosts}->execute(
446 $hosts->{$host_key}->{'host'},
447 $hosts->{$host_key}->{'ip'}
448 );
449
450 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
451 }
452
453 print "host ".$hosts->{$host_key}->{'host'}.": ";
454
455 # get backups for a host
456 my @backups = $bpc->BackupInfoRead($hostname);
457 my $incs = scalar @backups;
458 print "$incs increments\n";
459
460 my $inc_nr = 0;
461 $beenThere = {};
462
463 foreach my $backup (@backups) {
464
465 $inc_nr++;
466 last if ($opt{m} && $inc_nr > $opt{m});
467
468 my $backupNum = $backup->{'num'};
469 my @backupShares = ();
470
471 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
472 $hosts->{$host_key}->{'host'},
473 $inc_nr, $incs, $backupNum,
474 $backup->{type} || '?',
475 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
476 strftime($t_fmt,localtime($backup->{startTime})),
477 fmt_time($backup->{endTime} - $backup->{startTime})
478 );
479
480 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
481 foreach my $share ($files->shareList($backupNum)) {
482
483 my $t = time();
484
485 $shareID = getShareID($share, $hostID, $hostname);
486
487 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
488 my ($count) = $sth->{backups_count}->fetchrow_array();
489 # skip if allready in database!
490 next if ($count > 0);
491
492 # dump some log
493 print curr_time," ", $share;
494
495 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
496
497 $sth->{insert_backups}->execute(
498 $hostID,
499 $backupNum,
500 $backup->{'endTime'},
501 substr($backup->{'type'},0,4),
502 $shareID,
503 $size,
504 );
505
506 print " commit";
507 $dbh->commit();
508
509 my $dur = (time() - $t) || 1;
510 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
511 $nf, $f, $nd, $d,
512 ($size / 1024 / 1024),
513 ( ($f+$d) / $dur ),
514 fmt_time($dur)
515 );
516
517 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
518 }
519
520 }
521 }
522 undef $sth;
523 $dbh->commit();
524 $dbh->disconnect();
525
526 print "total duration: ",fmt_time(time() - $start_t),"\n";
527
528 $pidfile->remove;
529
530 sub getShareID() {
531
532 my ($share, $hostID, $hostname) = @_;
533
534 $sth->{share_id} ||= $dbh->prepare(qq{
535 SELECT ID FROM shares WHERE hostID=? AND name=?
536 });
537
538 $sth->{share_id}->execute($hostID,$share);
539
540 my ($id) = $sth->{share_id}->fetchrow_array();
541
542 return $id if (defined($id));
543
544 $sth->{insert_share} ||= $dbh->prepare(qq{
545 INSERT INTO shares
546 (hostID,name,share)
547 VALUES (?,?,?)
548 });
549
550 my $drop_down = $hostname . '/' . $share;
551 $drop_down =~ s#//+#/#g;
552
553 $sth->{insert_share}->execute($hostID,$share, $drop_down);
554 return $dbh->last_insert_id(undef,undef,'shares',undef);
555 }
556
557 sub found_in_db {
558
559 my @data = @_;
560 shift @data;
561
562 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
563
564 return $beenThere->{$key} if (defined($beenThere->{$key}));
565
566 $sth->{file_in_db} ||= $dbh->prepare(qq{
567 SELECT 1 FROM files
568 WHERE shareID = ? and
569 path = ? and
570 date = ? and
571 size = ?
572 LIMIT 1
573 });
574
575 my @param = ($shareID,$path,$date,$size);
576 $sth->{file_in_db}->execute(@param);
577 my $rows = $sth->{file_in_db}->rows;
578 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
579
580 $beenThere->{$key}++;
581
582 $sth->{'insert_files'}->execute(@data) unless ($rows);
583 return $rows;
584 }
585
586 ####################################################
587 # recursing through filesystem structure and #
588 # and returning flattened files list #
589 ####################################################
590 sub recurseDir($$$$$$$$) {
591
592 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
593
594 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
595
596 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
597
598 { # scope
599 my @stack;
600
601 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
602 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
603
604 # first, add all the entries in current directory
605 foreach my $path_key (keys %{$filesInBackup}) {
606 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
607 my @data = (
608 $shareID,
609 $backupNum,
610 $path_key,
611 $filesInBackup->{$path_key}->{'relPath'},
612 $filesInBackup->{$path_key}->{'mtime'},
613 $filesInBackup->{$path_key}->{'type'},
614 $filesInBackup->{$path_key}->{'size'}
615 );
616
617 my $key = join(" ", (
618 $shareID,
619 $dir,
620 $path_key,
621 $filesInBackup->{$path_key}->{'mtime'},
622 $filesInBackup->{$path_key}->{'size'}
623 ));
624
625 my $found;
626 if (! defined($beenThere->{$key}) && ! ($found = found_in_db($key, @data)) ) {
627 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
628
629 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
630 $new_dirs++ unless ($found);
631 print STDERR " dir\n" if ($debug >= 2);
632 } else {
633 $new_files++ unless ($found);
634 print STDERR " file\n" if ($debug >= 2);
635 }
636 $size += $filesInBackup->{$path_key}->{'size'} || 0;
637 }
638
639 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
640 $nr_dirs++;
641
642 my $full_path = $dir . '/' . $path_key;
643 push @stack, $full_path;
644 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
645
646 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
647 #
648 # $nr_files += $f;
649 # $new_files += $nf;
650 # $nr_dirs += $d;
651 # $new_dirs += $nd;
652
653 } else {
654 $nr_files++;
655 }
656 }
657
658 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
659
660 while ( my $dir = shift @stack ) {
661 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
662 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
663 $nr_files += $f;
664 $new_files += $nf;
665 $nr_dirs += $d;
666 $new_dirs += $nd;
667 $size += $s;
668 }
669 }
670
671 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
672 }
673

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26