/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 119 - (show annotations)
Wed Sep 14 13:20:03 2005 UTC (18 years, 8 months ago) by dpavlin
File size: 14474 byte(s)
small fixes and improvements

1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
15
16 use constant BPC_FTYPE_DIR => 5;
17 use constant EST_CHUNK => 100000;
18
19 my $debug = 0;
20 $|=1;
21
22 my $start_t = time();
23
24 my $pidfile = new File::Pid;
25
26 if (my $pid = $pidfile->running ) {
27 die "$0 already running: $pid\n";
28 } elsif ($pidfile->pid ne $$) {
29 $pidfile->remove;
30 $pidfile = new File::Pid;
31 }
32 $pidfile->write;
33 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
34
35 my $t_fmt = '%Y-%m-%d %H:%M:%S';
36
37 my $hosts;
38 my $bpc = BackupPC::Lib->new || die;
39 my %Conf = $bpc->Conf();
40 my $TopDir = $bpc->TopDir();
41 my $beenThere = {};
42
43 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
44 my $user = $Conf{SearchUser} || '';
45
46 my $use_hest = $Conf{HyperEstraierIndex};
47 my ($index_path, $index_node_url) = BackupPC::SearchLib::getHyperEstraier_url($use_hest);
48
49 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
50
51 my %opt;
52
53 if ( !getopts("cdm:v:i", \%opt ) ) {
54 print STDERR <<EOF;
55 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
56
57 Options:
58 -c create database on first use
59 -d delete database before import
60 -m num import just num increments for one host
61 -v num set verbosity (debug) level (default $debug)
62 -i update HyperEstraier full text index
63 EOF
64 exit 1;
65 }
66
67 if ($opt{v}) {
68 print "Debug level at $opt{v}\n";
69 $debug = $opt{v};
70 }
71
72 #---- subs ----
73
74 sub fmt_time {
75 my $t = shift || return;
76 my $out = "";
77 my ($ss,$mm,$hh) = gmtime($t);
78 $out .= "${hh}h" if ($hh);
79 $out .= sprintf("%02d:%02d", $mm,$ss);
80 return $out;
81 }
82
83 sub curr_time {
84 return strftime($t_fmt,localtime());
85 }
86
87 my $hest_db;
88 my $hest_node;
89
90 sub signal {
91 my($sig) = @_;
92 if ($hest_db) {
93 print "\nCaught a SIG$sig--syncing database and shutting down\n";
94 $hest_db->sync();
95 $hest_db->close();
96 }
97 exit(0);
98 }
99
100 $SIG{'INT'} = \&signal;
101 $SIG{'QUIT'} = \&signal;
102
103 sub hest_update {
104
105 my ($host_id, $share_id, $num) = @_;
106
107 unless ($use_hest) {
108 print STDERR "HyperEstraier support not enabled in configuration\n";
109 return;
110 }
111
112 print curr_time," updating HyperEstraier:";
113
114 my $t = time();
115
116 my $offset = 0;
117 my $added = 0;
118
119 print " opening index";
120 if ($index_path) {
121 $hest_db = HyperEstraier::Database->new();
122 $hest_db->open($TopDir . $index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
123 print "$index_path directly";
124 } elsif ($index_node_url) {
125 $hest_node ||= HyperEstraier::Node->new($index_node_url);
126 $hest_node->set_auth('admin', 'admin');
127 print "$index_node_url via node URL";
128 } else {
129 die "don't know how to use HyperEstraier Index $use_hest";
130 }
131 print " increment is " . EST_CHUNK . " files:";
132
133 my $results = 0;
134
135 do {
136
137 my $where = '';
138 my @data;
139 if ($host_id && $share_id && $num) {
140 $where = qq{
141 WHERE
142 hosts.id = ? AND
143 shares.id = ? AND
144 files.backupnum = ?
145 };
146 @data = ( $host_id, $share_id, $num );
147 }
148
149 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
150
151 my $sth = $dbh->prepare(qq{
152 SELECT
153 files.id AS fid,
154 hosts.name AS hname,
155 shares.name AS sname,
156 -- shares.share AS sharename,
157 files.backupnum AS backupnum,
158 -- files.name AS filename,
159 files.path AS filepath,
160 files.date AS date,
161 files.type AS type,
162 files.size AS size,
163 files.shareid AS shareid,
164 backups.date AS backup_date
165 FROM files
166 INNER JOIN shares ON files.shareID=shares.ID
167 INNER JOIN hosts ON hosts.ID = shares.hostID
168 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
169 $where
170 $limit
171 });
172
173 $sth->execute(@data);
174 $results = $sth->rows;
175
176 if ($results == 0) {
177 print " - no new files\n";
178 last;
179 }
180
181 sub fmt_date {
182 my $t = shift || return;
183 my $iso = BackupPC::Lib::timeStamp($t);
184 $iso =~ s/\s/T/;
185 return $iso;
186 }
187
188 while (my $row = $sth->fetchrow_hashref()) {
189
190 my $fid = $row->{'fid'} || die "no fid?";
191 my $uri = 'file:///' . $fid;
192
193 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
194 next unless ($id == -1);
195
196 # create a document object
197 my $doc = HyperEstraier::Document->new;
198
199 # add attributes to the document object
200 $doc->add_attr('@uri', $uri);
201
202 foreach my $c (@{ $sth->{NAME} }) {
203 $doc->add_attr($c, $row->{$c}) if ($row->{$c});
204 }
205
206 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
207
208 # add the body text to the document object
209 my $path = $row->{'filepath'};
210 $doc->add_text($path);
211 $path =~ s/(.)/$1 /g;
212 $doc->add_hidden_text($path);
213
214 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
215
216 # register the document object to the database
217 if ($hest_db) {
218 $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
219 } elsif ($hest_node) {
220 $hest_node->put_doc($doc);
221 } else {
222 die "not supported";
223 }
224 $added++;
225 }
226
227 print " $added";
228 $hest_db->sync() if ($index_path);
229
230 $offset += EST_CHUNK;
231
232 } while ($results == EST_CHUNK);
233
234 if ($index_path) {
235 print ", close";
236 $hest_db->close();
237 }
238
239 my $dur = (time() - $t) || 1;
240 printf(" [%.2f/s dur: %s]\n",
241 ( $added / $dur ),
242 fmt_time($dur)
243 );
244 }
245
246 #---- /subs ----
247
248
249 ## update index ##
250 if (($opt{i} || ($index_path && ! -e $index_path)) && !$opt{c}) {
251 # update all
252 print "force update of HyperEstraier index ";
253 print "importing existing data" unless (-e $index_path);
254 print "by -i flag" if ($opt{i});
255 print "\n";
256 hest_update();
257 }
258
259 ## create tables ##
260 if ($opt{c}) {
261 sub do_index {
262 my $index = shift || return;
263 my ($table,$col,$unique) = split(/_/, $index);
264 $unique ||= '';
265 $index =~ s/,/_/g;
266 $dbh->do(qq{ create $unique index $index on $table($col) });
267 }
268
269 print "creating tables...\n";
270
271 $dbh->do(qq{
272 create table hosts (
273 ID SERIAL PRIMARY KEY,
274 name VARCHAR(30) NOT NULL,
275 IP VARCHAR(15)
276 );
277 });
278
279 $dbh->do(qq{
280 create table shares (
281 ID SERIAL PRIMARY KEY,
282 hostID INTEGER NOT NULL references hosts(id),
283 name VARCHAR(30) NOT NULL,
284 share VARCHAR(200) NOT NULL,
285 localpath VARCHAR(200)
286 );
287 });
288
289 $dbh->do(qq{
290 create table backups (
291 hostID INTEGER NOT NULL references hosts(id),
292 num INTEGER NOT NULL,
293 date integer NOT NULL,
294 type CHAR(4) not null,
295 shareID integer not null references shares(id),
296 size integer not null,
297 PRIMARY KEY(hostID, num, shareID)
298 );
299 });
300
301 #do_index('backups_hostid,num_unique');
302
303 $dbh->do(qq{
304 create table dvds (
305 ID SERIAL PRIMARY KEY,
306 num INTEGER NOT NULL,
307 name VARCHAR(255) NOT NULL,
308 mjesto VARCHAR(255)
309 );
310 });
311
312 $dbh->do(qq{
313 create table files (
314 ID SERIAL PRIMARY KEY,
315 shareID INTEGER NOT NULL references shares(id),
316 backupNum INTEGER NOT NULL,
317 name VARCHAR(255) NOT NULL,
318 path VARCHAR(255) NOT NULL,
319 date integer NOT NULL,
320 type INTEGER NOT NULL,
321 size INTEGER NOT NULL,
322 dvdid INTEGER references dvds(id)
323 );
324 });
325
326 print "creating indexes:";
327
328 foreach my $index (qw(
329 hosts_name
330 backups_hostID
331 backups_num
332 shares_hostID
333 shares_name
334 files_shareID
335 files_path
336 files_name
337 files_date
338 files_size
339 )) {
340 print " $index";
341 do_index($index);
342 }
343 print "...\n";
344
345 $dbh->commit;
346
347 }
348
349 ## delete data before inseting ##
350 if ($opt{d}) {
351 print "deleting ";
352 foreach my $table (qw(files dvds backups shares hosts)) {
353 print "$table ";
354 $dbh->do(qq{ DELETE FROM $table });
355 }
356 print " done...\n";
357
358 $dbh->commit;
359 }
360
361 ## insert new values ##
362
363 # get hosts
364 $hosts = $bpc->HostInfoRead();
365 my $hostID;
366 my $shareID;
367
368 my $sth;
369
370 $sth->{insert_hosts} = $dbh->prepare(qq{
371 INSERT INTO hosts (name, IP) VALUES (?,?)
372 });
373
374 $sth->{hosts_by_name} = $dbh->prepare(qq{
375 SELECT ID FROM hosts WHERE name=?
376 });
377
378 $sth->{backups_count} = $dbh->prepare(qq{
379 SELECT COUNT(*)
380 FROM backups
381 WHERE hostID=? AND num=? AND shareid=?
382 });
383
384 $sth->{insert_backups} = $dbh->prepare(qq{
385 INSERT INTO backups (hostID, num, date, type, shareid, size)
386 VALUES (?,?,?,?,?,?)
387 });
388
389 $sth->{insert_files} = $dbh->prepare(qq{
390 INSERT INTO files
391 (shareID, backupNum, name, path, date, type, size)
392 VALUES (?,?,?,?,?,?,?)
393 });
394
395 foreach my $host_key (keys %{$hosts}) {
396
397 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
398
399 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
400
401 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
402 $sth->{insert_hosts}->execute(
403 $hosts->{$host_key}->{'host'},
404 $hosts->{$host_key}->{'ip'}
405 );
406
407 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
408 }
409
410 print "host ".$hosts->{$host_key}->{'host'}.": ";
411
412 # get backups for a host
413 my @backups = $bpc->BackupInfoRead($hostname);
414 my $incs = scalar @backups;
415 print "$incs increments\n";
416
417 my $inc_nr = 0;
418 $beenThere = {};
419
420 foreach my $backup (@backups) {
421
422 $inc_nr++;
423 last if ($opt{m} && $inc_nr > $opt{m});
424
425 my $backupNum = $backup->{'num'};
426 my @backupShares = ();
427
428 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
429 $hosts->{$host_key}->{'host'},
430 $inc_nr, $incs, $backupNum,
431 $backup->{type} || '?',
432 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
433 strftime($t_fmt,localtime($backup->{startTime})),
434 fmt_time($backup->{endTime} - $backup->{startTime})
435 );
436
437 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
438 foreach my $share ($files->shareList($backupNum)) {
439
440 my $t = time();
441
442 $shareID = getShareID($share, $hostID, $hostname);
443
444 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
445 my ($count) = $sth->{backups_count}->fetchrow_array();
446 # skip if allready in database!
447 next if ($count > 0);
448
449 # dump some log
450 print curr_time," ", $share;
451
452 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
453
454 $sth->{insert_backups}->execute(
455 $hostID,
456 $backupNum,
457 $backup->{'endTime'},
458 $backup->{'type'},
459 $shareID,
460 $size,
461 );
462
463 print " commit";
464 $dbh->commit();
465
466 my $dur = (time() - $t) || 1;
467 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
468 $nf, $f, $nd, $d,
469 ($size / 1024 / 1024),
470 ( ($f+$d) / $dur ),
471 fmt_time($dur)
472 );
473
474 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
475 }
476
477 }
478 }
479 undef $sth;
480 $dbh->commit();
481 $dbh->disconnect();
482
483 print "total duration: ",fmt_time(time() - $start_t),"\n";
484
485 $pidfile->remove;
486
487 sub getShareID() {
488
489 my ($share, $hostID, $hostname) = @_;
490
491 $sth->{share_id} ||= $dbh->prepare(qq{
492 SELECT ID FROM shares WHERE hostID=? AND name=?
493 });
494
495 $sth->{share_id}->execute($hostID,$share);
496
497 my ($id) = $sth->{share_id}->fetchrow_array();
498
499 return $id if (defined($id));
500
501 $sth->{insert_share} ||= $dbh->prepare(qq{
502 INSERT INTO shares
503 (hostID,name,share,localpath)
504 VALUES (?,?,?,?)
505 });
506
507 my $drop_down = $hostname . '/' . $share;
508 $drop_down =~ s#//+#/#g;
509
510 $sth->{insert_share}->execute($hostID,$share, $drop_down ,undef);
511 return $dbh->last_insert_id(undef,undef,'shares',undef);
512 }
513
514 sub found_in_db {
515
516 my @data = @_;
517 shift @data;
518
519 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
520
521 return $beenThere->{$key} if (defined($beenThere->{$key}));
522
523 $sth->{file_in_db} ||= $dbh->prepare(qq{
524 SELECT 1 FROM files
525 WHERE shareID = ? and
526 path = ? and
527 date = ? and
528 size = ?
529 LIMIT 1
530 });
531
532 my @param = ($shareID,$path,$date,$size);
533 $sth->{file_in_db}->execute(@param);
534 my $rows = $sth->{file_in_db}->rows;
535 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
536
537 $beenThere->{$key}++;
538
539 $sth->{'insert_files'}->execute(@data) unless ($rows);
540 return $rows;
541 }
542
543 ####################################################
544 # recursing through filesystem structure and #
545 # and returning flattened files list #
546 ####################################################
547 sub recurseDir($$$$$$$$) {
548
549 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
550
551 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
552
553 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
554
555 { # scope
556 my @stack;
557
558 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
559 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
560
561 # first, add all the entries in current directory
562 foreach my $path_key (keys %{$filesInBackup}) {
563 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
564 my @data = (
565 $shareID,
566 $backupNum,
567 $path_key,
568 $filesInBackup->{$path_key}->{'relPath'},
569 $filesInBackup->{$path_key}->{'mtime'},
570 $filesInBackup->{$path_key}->{'type'},
571 $filesInBackup->{$path_key}->{'size'}
572 );
573
574 my $key = join(" ", (
575 $shareID,
576 $dir,
577 $path_key,
578 $filesInBackup->{$path_key}->{'mtime'},
579 $filesInBackup->{$path_key}->{'size'}
580 ));
581
582 my $found;
583 if (! defined($beenThere->{$key}) && ! ($found = found_in_db($key, @data)) ) {
584 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
585
586 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
587 $new_dirs++ unless ($found);
588 print STDERR " dir\n" if ($debug >= 2);
589 } else {
590 $new_files++ unless ($found);
591 print STDERR " file\n" if ($debug >= 2);
592 }
593 $size += $filesInBackup->{$path_key}->{'size'} || 0;
594 }
595
596 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
597 $nr_dirs++;
598
599 my $full_path = $dir . '/' . $path_key;
600 push @stack, $full_path;
601 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
602
603 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
604 #
605 # $nr_files += $f;
606 # $new_files += $nf;
607 # $nr_dirs += $d;
608 # $new_dirs += $nd;
609
610 } else {
611 $nr_files++;
612 }
613 }
614
615 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
616
617 while ( my $dir = shift @stack ) {
618 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
619 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
620 $nr_files += $f;
621 $new_files += $nf;
622 $nr_dirs += $d;
623 $new_dirs += $nd;
624 $size += $s;
625 }
626 }
627
628 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
629 }
630

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26