/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 116 - (show annotations)
Sun Sep 11 12:39:24 2005 UTC (18 years, 8 months ago) by dpavlin
File size: 14617 byte(s)
support for node API

1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14
15 use constant BPC_FTYPE_DIR => 5;
16 use constant EST_CHUNK => 100000;
17
18 my $debug = 0;
19 $|=1;
20
21 my $start_t = time();
22
23 my $pidfile = new File::Pid;
24
25 if (my $pid = $pidfile->running ) {
26 die "$0 already running: $pid\n";
27 } elsif ($pidfile->pid ne $$) {
28 $pidfile->remove;
29 $pidfile = new File::Pid;
30 }
31 $pidfile->write;
32 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
33
34 my $t_fmt = '%Y-%m-%d %H:%M:%S';
35
36 my $hosts;
37 my $bpc = BackupPC::Lib->new || die;
38 my %Conf = $bpc->Conf();
39 my $TopDir = $bpc->TopDir();
40 my $beenThere = {};
41
42 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
43 my $user = $Conf{SearchUser} || '';
44
45 my $use_hest = $Conf{HyperEstraierIndex};
46 my ($index_path, $index_node_url);
47 if ($use_hest) {
48 use HyperEstraier;
49 if ($use_hest =~ m#^http://#) {
50 $index_node_url = $use_hest;
51 } else {
52 $index_path = $TopDir . '/' . $index_path;
53 $index_path =~ s#//#/#g;
54 }
55 }
56 print "-- $use_hest : $index_path OR $index_node_url --\n";
57
58
59 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
60
61 my %opt;
62
63 if ( !getopts("cdm:v:i", \%opt ) ) {
64 print STDERR <<EOF;
65 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
66
67 Options:
68 -c create database on first use
69 -d delete database before import
70 -m num import just num increments for one host
71 -v num set verbosity (debug) level (default $debug)
72 -i update HyperEstraier full text index
73 EOF
74 exit 1;
75 }
76
77 if ($opt{v}) {
78 print "Debug level at $opt{v}\n";
79 $debug = $opt{v};
80 }
81
82 #---- subs ----
83
84 sub fmt_time {
85 my $t = shift || return;
86 my $out = "";
87 my ($ss,$mm,$hh) = gmtime($t);
88 $out .= "${hh}h" if ($hh);
89 $out .= sprintf("%02d:%02d", $mm,$ss);
90 return $out;
91 }
92
93 sub curr_time {
94 return strftime($t_fmt,localtime());
95 }
96
97 my $hest_db;
98 my $hest_node;
99
100 sub signal {
101 my($sig) = @_;
102 if ($hest_db) {
103 print "\nCaught a SIG$sig--syncing database and shutting down\n";
104 $hest_db->sync();
105 $hest_db->close();
106 }
107 exit(0);
108 }
109
110 $SIG{'INT'} = \&signal;
111 $SIG{'QUIT'} = \&signal;
112
113 sub hest_update {
114
115 my ($host_id, $share_id, $num) = @_;
116
117 unless ($use_hest) {
118 print STDERR "HyperEstraier support not enabled in configuration\n";
119 return;
120 }
121
122 print curr_time," updating HyperEstraier:";
123
124 my $t = time();
125
126 my $offset = 0;
127 my $added = 0;
128
129 print " opening index $use_hest";
130 if ($index_path) {
131 $hest_db = HyperEstraier::Database->new();
132 $hest_db->open($index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
133 print " directly";
134 } elsif ($index_node_url) {
135 $hest_node ||= HyperEstraier::Node->new($index_node_url);
136 $hest_node->set_auth('admin', 'admin');
137 print " via node URL";
138 } else {
139 die "don't know how to use HyperEstraier Index $use_hest";
140 }
141 print " increment is " . EST_CHUNK . " files:";
142
143 my $results = 0;
144
145 do {
146
147 my $where = '';
148 my @data;
149 if ($host_id && $share_id && $num) {
150 $where = qq{
151 WHERE
152 hosts.id = ? AND
153 shares.id = ? AND
154 files.backupnum = ?
155 };
156 @data = ( $host_id, $share_id, $num );
157 }
158
159 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
160
161 my $sth = $dbh->prepare(qq{
162 SELECT
163 files.id AS fid,
164 hosts.name AS hname,
165 shares.name AS sname,
166 -- shares.share AS sharename,
167 files.backupnum AS backupnum,
168 -- files.name AS filename,
169 files.path AS filepath,
170 files.date AS date,
171 files.type AS type,
172 files.size AS size,
173 files.shareid AS shareid,
174 backups.date AS backup_date
175 FROM files
176 INNER JOIN shares ON files.shareID=shares.ID
177 INNER JOIN hosts ON hosts.ID = shares.hostID
178 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
179 $where
180 $limit
181 });
182
183 $sth->execute(@data);
184 $results = $sth->rows;
185
186 if ($results == 0) {
187 print " - no new files\n";
188 last;
189 }
190
191 sub fmt_date {
192 my $t = shift || return;
193 my $iso = BackupPC::Lib::timeStamp($t);
194 $iso =~ s/\s/T/;
195 return $iso;
196 }
197
198 while (my $row = $sth->fetchrow_hashref()) {
199
200 my $fid = $row->{'fid'} || die "no fid?";
201 my $uri = 'file:///' . $fid;
202
203 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
204 next unless ($id == -1);
205
206 # create a document object
207 my $doc = HyperEstraier::Document->new;
208
209 # add attributes to the document object
210 $doc->add_attr('@uri', $uri);
211
212 foreach my $c (@{ $sth->{NAME} }) {
213 $doc->add_attr($c, $row->{$c}) if ($row->{$c});
214 }
215
216 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
217
218 # add the body text to the document object
219 my $path = $row->{'filepath'};
220 $doc->add_text($path);
221 $path =~ s/(.)/$1 /g;
222 $doc->add_hidden_text($path);
223
224 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
225
226 # register the document object to the database
227 if ($hest_db) {
228 $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
229 } elsif ($hest_node) {
230 $hest_node->put_doc($doc);
231 } else {
232 die "not supported";
233 }
234 $added++;
235 }
236
237 print " $added";
238 $hest_db->sync() if ($index_path);
239
240 $offset += EST_CHUNK;
241
242 } while ($results == EST_CHUNK);
243
244 if ($index_path) {
245 print ", close";
246 $hest_db->close();
247 }
248
249 my $dur = (time() - $t) || 1;
250 printf(" [%.2f/s dur: %s]\n",
251 ( $added / $dur ),
252 fmt_time($dur)
253 );
254 }
255
256 #---- /subs ----
257
258
259 ## update index ##
260 if (($opt{i} || ($index_path && ! -e $index_path)) && !$opt{c}) {
261 # update all
262 print "force update of HyperEstraier index ";
263 print "importing existing data" unless (-e $index_path);
264 print "by -i flag" if ($opt{i});
265 print "\n";
266 hest_update();
267 }
268
269 ## create tables ##
270 if ($opt{c}) {
271 sub do_index {
272 my $index = shift || return;
273 my ($table,$col,$unique) = split(/_/, $index);
274 $unique ||= '';
275 $index =~ s/,/_/g;
276 $dbh->do(qq{ create $unique index $index on $table($col) });
277 }
278
279 print "creating tables...\n";
280
281 $dbh->do(qq{
282 create table hosts (
283 ID SERIAL PRIMARY KEY,
284 name VARCHAR(30) NOT NULL,
285 IP VARCHAR(15)
286 );
287 });
288
289 $dbh->do(qq{
290 create table shares (
291 ID SERIAL PRIMARY KEY,
292 hostID INTEGER NOT NULL references hosts(id),
293 name VARCHAR(30) NOT NULL,
294 share VARCHAR(200) NOT NULL,
295 localpath VARCHAR(200)
296 );
297 });
298
299 $dbh->do(qq{
300 create table backups (
301 hostID INTEGER NOT NULL references hosts(id),
302 num INTEGER NOT NULL,
303 date integer NOT NULL,
304 type CHAR(4) not null,
305 shareID integer not null references shares(id),
306 size integer not null,
307 PRIMARY KEY(hostID, num, shareID)
308 );
309 });
310
311 #do_index('backups_hostid,num_unique');
312
313 $dbh->do(qq{
314 create table dvds (
315 ID SERIAL PRIMARY KEY,
316 num INTEGER NOT NULL,
317 name VARCHAR(255) NOT NULL,
318 mjesto VARCHAR(255)
319 );
320 });
321
322 $dbh->do(qq{
323 create table files (
324 ID SERIAL PRIMARY KEY,
325 shareID INTEGER NOT NULL references shares(id),
326 backupNum INTEGER NOT NULL,
327 name VARCHAR(255) NOT NULL,
328 path VARCHAR(255) NOT NULL,
329 date integer NOT NULL,
330 type INTEGER NOT NULL,
331 size INTEGER NOT NULL,
332 dvdid INTEGER references dvds(id)
333 );
334 });
335
336 print "creating indexes:";
337
338 foreach my $index (qw(
339 hosts_name
340 backups_hostID
341 backups_num
342 shares_hostID
343 shares_name
344 files_shareID
345 files_path
346 files_name
347 files_date
348 files_size
349 )) {
350 print " $index";
351 do_index($index);
352 }
353 print "...\n";
354
355 $dbh->commit;
356
357 }
358
359 ## delete data before inseting ##
360 if ($opt{d}) {
361 print "deleting ";
362 foreach my $table (qw(files dvds backups shares hosts)) {
363 print "$table ";
364 $dbh->do(qq{ DELETE FROM $table });
365 }
366 print " done...\n";
367
368 $dbh->commit;
369 }
370
371 ## insert new values ##
372
373 # get hosts
374 $hosts = $bpc->HostInfoRead();
375 my $hostID;
376 my $shareID;
377
378 my $sth;
379
380 $sth->{insert_hosts} = $dbh->prepare(qq{
381 INSERT INTO hosts (name, IP) VALUES (?,?)
382 });
383
384 $sth->{hosts_by_name} = $dbh->prepare(qq{
385 SELECT ID FROM hosts WHERE name=?
386 });
387
388 $sth->{backups_count} = $dbh->prepare(qq{
389 SELECT COUNT(*)
390 FROM backups
391 WHERE hostID=? AND num=? AND shareid=?
392 });
393
394 $sth->{insert_backups} = $dbh->prepare(qq{
395 INSERT INTO backups (hostID, num, date, type, shareid, size)
396 VALUES (?,?,?,?,?,?)
397 });
398
399 $sth->{insert_files} = $dbh->prepare(qq{
400 INSERT INTO files
401 (shareID, backupNum, name, path, date, type, size)
402 VALUES (?,?,?,?,?,?,?)
403 });
404
405 foreach my $host_key (keys %{$hosts}) {
406
407 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
408
409 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
410
411 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
412 $sth->{insert_hosts}->execute(
413 $hosts->{$host_key}->{'host'},
414 $hosts->{$host_key}->{'ip'}
415 );
416
417 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
418 }
419
420 print "host ".$hosts->{$host_key}->{'host'}.": ";
421
422 # get backups for a host
423 my @backups = $bpc->BackupInfoRead($hostname);
424 my $incs = scalar @backups;
425 print "$incs increments\n";
426
427 my $inc_nr = 0;
428 $beenThere = {};
429
430 foreach my $backup (@backups) {
431
432 $inc_nr++;
433 last if ($opt{m} && $inc_nr > $opt{m});
434
435 my $backupNum = $backup->{'num'};
436 my @backupShares = ();
437
438 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
439 $hosts->{$host_key}->{'host'},
440 $inc_nr, $incs, $backupNum,
441 $backup->{type} || '?',
442 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
443 strftime($t_fmt,localtime($backup->{startTime})),
444 fmt_time($backup->{endTime} - $backup->{startTime})
445 );
446
447 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
448 foreach my $share ($files->shareList($backupNum)) {
449
450 my $t = time();
451
452 $shareID = getShareID($share, $hostID, $hostname);
453
454 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
455 my ($count) = $sth->{backups_count}->fetchrow_array();
456 # skip if allready in database!
457 next if ($count > 0);
458
459 # dump some log
460 print curr_time," ", $share;
461
462 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
463
464 $sth->{insert_backups}->execute(
465 $hostID,
466 $backupNum,
467 $backup->{'endTime'},
468 $backup->{'type'},
469 $shareID,
470 $size,
471 );
472
473 print " commit";
474 $dbh->commit();
475
476 my $dur = (time() - $t) || 1;
477 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
478 $nf, $f, $nd, $d,
479 ($size / 1024 / 1024),
480 ( ($f+$d) / $dur ),
481 fmt_time($dur)
482 );
483
484 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
485 }
486
487 }
488 }
489 undef $sth;
490 $dbh->commit();
491 $dbh->disconnect();
492
493 print "total duration: ",fmt_time(time() - $start_t),"\n";
494
495 $pidfile->remove;
496
497 sub getShareID() {
498
499 my ($share, $hostID, $hostname) = @_;
500
501 $sth->{share_id} ||= $dbh->prepare(qq{
502 SELECT ID FROM shares WHERE hostID=? AND name=?
503 });
504
505 $sth->{share_id}->execute($hostID,$share);
506
507 my ($id) = $sth->{share_id}->fetchrow_array();
508
509 return $id if (defined($id));
510
511 $sth->{insert_share} ||= $dbh->prepare(qq{
512 INSERT INTO shares
513 (hostID,name,share,localpath)
514 VALUES (?,?,?,?)
515 });
516
517 my $drop_down = $hostname . '/' . $share;
518 $drop_down =~ s#//+#/#g;
519
520 $sth->{insert_share}->execute($hostID,$share, $drop_down ,undef);
521 return $dbh->last_insert_id(undef,undef,'shares',undef);
522 }
523
524 sub found_in_db {
525
526 my @data = @_;
527 shift @data;
528
529 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
530
531 return $beenThere->{$key} if (defined($beenThere->{$key}));
532
533 $sth->{file_in_db} ||= $dbh->prepare(qq{
534 SELECT 1 FROM files
535 WHERE shareID = ? and
536 path = ? and
537 date = ? and
538 size = ?
539 LIMIT 1
540 });
541
542 my @param = ($shareID,$path,$date,$size);
543 $sth->{file_in_db}->execute(@param);
544 my $rows = $sth->{file_in_db}->rows;
545 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
546
547 $beenThere->{$key}++;
548
549 $sth->{'insert_files'}->execute(@data) unless ($rows);
550 return $rows;
551 }
552
553 ####################################################
554 # recursing through filesystem structure and #
555 # and returning flattened files list #
556 ####################################################
557 sub recurseDir($$$$$$$$) {
558
559 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
560
561 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
562
563 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
564
565 { # scope
566 my @stack;
567
568 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
569 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
570
571 # first, add all the entries in current directory
572 foreach my $path_key (keys %{$filesInBackup}) {
573 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
574 my @data = (
575 $shareID,
576 $backupNum,
577 $path_key,
578 $filesInBackup->{$path_key}->{'relPath'},
579 $filesInBackup->{$path_key}->{'mtime'},
580 $filesInBackup->{$path_key}->{'type'},
581 $filesInBackup->{$path_key}->{'size'}
582 );
583
584 my $key = join(" ", (
585 $shareID,
586 $dir,
587 $path_key,
588 $filesInBackup->{$path_key}->{'mtime'},
589 $filesInBackup->{$path_key}->{'size'}
590 ));
591
592 my $found;
593 if (! defined($beenThere->{$key}) && ! ($found = found_in_db($key, @data)) ) {
594 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
595
596 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
597 $new_dirs++ unless ($found);
598 print STDERR " dir\n" if ($debug >= 2);
599 } else {
600 $new_files++ unless ($found);
601 print STDERR " file\n" if ($debug >= 2);
602 }
603 $size += $filesInBackup->{$path_key}->{'size'} || 0;
604 }
605
606 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
607 $nr_dirs++;
608
609 my $full_path = $dir . '/' . $path_key;
610 push @stack, $full_path;
611 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
612
613 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
614 #
615 # $nr_files += $f;
616 # $new_files += $nf;
617 # $nr_dirs += $d;
618 # $new_dirs += $nd;
619
620 } else {
621 $nr_files++;
622 }
623 }
624
625 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
626
627 while ( my $dir = shift @stack ) {
628 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
629 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
630 $nr_files += $f;
631 $new_files += $nf;
632 $nr_dirs += $d;
633 $new_dirs += $nd;
634 $size += $s;
635 }
636 }
637
638 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
639 }
640

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26