/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 99 - (show annotations)
Tue Aug 30 14:45:33 2005 UTC (18 years, 8 months ago) by dpavlin
File size: 13799 byte(s)
increased increment to 100000 files (37Mb of working memory on my system)

1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14
15 use constant BPC_FTYPE_DIR => 5;
16 use constant EST_CHUNK => 100000;
17
18 my $debug = 0;
19 $|=1;
20
21 my $start_t = time();
22
23 my $pidfile = new File::Pid;
24
25 if (my $pid = $pidfile->running ) {
26 die "$0 already running: $pid\n";
27 } elsif ($pidfile->pid ne $$) {
28 $pidfile->remove;
29 $pidfile = new File::Pid;
30 }
31 $pidfile->write;
32 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
33
34 my $t_fmt = '%Y-%m-%d %H:%M:%S';
35
36 my $hosts;
37 my $bpc = BackupPC::Lib->new || die;
38 my %Conf = $bpc->Conf();
39 my $TopDir = $bpc->TopDir();
40 my $beenThere = {};
41
42 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
43 my $user = $Conf{SearchUser} || '';
44 my $index_path = $Conf{HyperEstraierIndex};
45 $index_path = $TopDir . '/' . $index_path;
46 $index_path =~ s#//#/#g;
47 if ($index_path) {
48 use HyperEstraier;
49 }
50
51
52 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
53
54 my %opt;
55
56 if ( !getopts("cdm:v:i", \%opt ) ) {
57 print STDERR <<EOF;
58 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
59
60 Options:
61 -c create database on first use
62 -d delete database before import
63 -m num import just num increments for one host
64 -v num set verbosity (debug) level (default $debug)
65 -i update HyperEstraier full text index
66 EOF
67 exit 1;
68 }
69
70 if ($opt{v}) {
71 print "Debug level at $opt{v}\n";
72 $debug = $opt{v};
73 }
74
75 #---- subs ----
76
77 sub fmt_time {
78 my $t = shift || return;
79 my $out = "";
80 my ($ss,$mm,$hh) = gmtime($t);
81 $out .= "${hh}h" if ($hh);
82 $out .= sprintf("%02d:%02d", $mm,$ss);
83 return $out;
84 }
85
86 sub curr_time {
87 return strftime($t_fmt,localtime());
88 }
89
90 my $hest_db;
91
92 sub signal {
93 my($sig) = @_;
94 if ($hest_db) {
95 print "\nCaught a SIG$sig--syncing database and shutting down\n";
96 $hest_db->sync();
97 $hest_db->close();
98 }
99 exit(0);
100 }
101
102 $SIG{'INT'} = \&signal;
103 $SIG{'QUIT'} = \&signal;
104
105 sub hest_update {
106
107 my ($host_id, $share_id, $num) = @_;
108
109 print curr_time," updating HyperEstraier:";
110
111 my $t = time();
112
113 my $offset = 0;
114 my $added = 0;
115
116 print " opening index $index_path";
117 $hest_db = HyperEstraier::Database->new();
118 $hest_db->open($index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
119
120 print " increment is " . EST_CHUNK . " files";
121
122 my $results = 0;
123
124 do {
125
126 my $where = '';
127 if ($host_id && $share_id && $num) {
128 $where = qq{
129 WHERE
130 hosts.id = ? AND
131 shares.id = ? AND
132 files.backupnum = ?
133 };
134 }
135
136 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
137
138 my $sth = $dbh->prepare(qq{
139 SELECT
140 files.id AS fid,
141 hosts.name AS hname,
142 shares.name AS sname,
143 -- shares.share AS sharename,
144 files.backupnum AS backupnum,
145 -- files.name AS filename,
146 files.path AS filepath,
147 files.date AS date,
148 files.type AS type,
149 files.size AS size,
150 files.shareid AS shareid,
151 backups.date AS backup_date
152 FROM files
153 INNER JOIN shares ON files.shareID=shares.ID
154 INNER JOIN hosts ON hosts.ID = shares.hostID
155 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
156 $where
157 $limit
158 });
159
160 $sth->execute(@_);
161 $results = $sth->rows;
162
163 if ($results == 0) {
164 print " - no more files\n";
165 last;
166 }
167
168 sub fmt_date {
169 my $t = shift || return;
170 my $iso = BackupPC::Lib::timeStamp($t);
171 $iso =~ s/\s/T/;
172 return $iso;
173 }
174
175 while (my $row = $sth->fetchrow_hashref()) {
176
177 my $fid = $row->{'fid'} || die "no fid?";
178 my $uri = 'file:///' . $fid;
179
180 my $id = $hest_db->uri_to_id($uri);
181 next unless ($id == -1);
182
183 # create a document object
184 my $doc = HyperEstraier::Document->new;
185
186 # add attributes to the document object
187 $doc->add_attr('@uri', $uri);
188
189 foreach my $c (@{ $sth->{NAME} }) {
190 $doc->add_attr($c, $row->{$c}) if ($row->{$c});
191 }
192
193 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
194
195 # add the body text to the document object
196 my $path = $row->{'filepath'};
197 $doc->add_text($path);
198 $path =~ s/(.)/$1 /g;
199 $doc->add_hidden_text($path);
200
201 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
202
203 # register the document object to the database
204 $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
205 $added++;
206 }
207
208 print " $added";
209 $hest_db->sync();
210
211 $offset += EST_CHUNK;
212
213 } while ($results == EST_CHUNK);
214
215 print ", close";
216 $hest_db->close();
217
218 my $dur = (time() - $t) || 1;
219 printf(" [%.2f/s dur: %s]\n",
220 ( $added / $dur ),
221 fmt_time($dur)
222 );
223 }
224
225 #---- /subs ----
226
227
228 ## update index ##
229 if (($opt{i} || ($index_path && ! -e $index_path)) && !$opt{c}) {
230 # update all
231 print "force update of HyperEstraier index ";
232 print "importing existing data" unless (-e $index_path);
233 print "by -i flag" if ($opt{i});
234 print "\n";
235 hest_update();
236 }
237
238 ## create tables ##
239 if ($opt{c}) {
240 sub do_index {
241 my $index = shift || return;
242 my ($table,$col,$unique) = split(/_/, $index);
243 $unique ||= '';
244 $index =~ s/,/_/g;
245 $dbh->do(qq{ create $unique index $index on $table($col) });
246 }
247
248 print "creating tables...\n";
249
250 $dbh->do(qq{
251 create table hosts (
252 ID SERIAL PRIMARY KEY,
253 name VARCHAR(30) NOT NULL,
254 IP VARCHAR(15)
255 );
256 });
257
258 $dbh->do(qq{
259 create table shares (
260 ID SERIAL PRIMARY KEY,
261 hostID INTEGER NOT NULL references hosts(id),
262 name VARCHAR(30) NOT NULL,
263 share VARCHAR(200) NOT NULL,
264 localpath VARCHAR(200)
265 );
266 });
267
268 $dbh->do(qq{
269 create table backups (
270 hostID INTEGER NOT NULL references hosts(id),
271 num INTEGER NOT NULL,
272 date integer NOT NULL,
273 type CHAR(4) not null,
274 shareID integer not null references shares(id),
275 size integer not null,
276 PRIMARY KEY(hostID, num, shareID)
277 );
278 });
279
280 #do_index('backups_hostid,num_unique');
281
282 $dbh->do(qq{
283 create table dvds (
284 ID SERIAL PRIMARY KEY,
285 num INTEGER NOT NULL,
286 name VARCHAR(255) NOT NULL,
287 mjesto VARCHAR(255)
288 );
289 });
290
291 $dbh->do(qq{
292 create table files (
293 ID SERIAL PRIMARY KEY,
294 shareID INTEGER NOT NULL references shares(id),
295 backupNum INTEGER NOT NULL,
296 name VARCHAR(255) NOT NULL,
297 path VARCHAR(255) NOT NULL,
298 date integer NOT NULL,
299 type INTEGER NOT NULL,
300 size INTEGER NOT NULL,
301 dvdid INTEGER references dvds(id)
302 );
303 });
304
305 print "creating indexes:";
306
307 foreach my $index (qw(
308 hosts_name
309 backups_hostID
310 backups_num
311 shares_hostID
312 shares_name
313 files_shareID
314 files_path
315 files_name
316 files_date
317 files_size
318 )) {
319 print " $index";
320 do_index($index);
321 }
322 print "...\n";
323
324 $dbh->commit;
325
326 }
327
328 ## delete data before inseting ##
329 if ($opt{d}) {
330 print "deleting ";
331 foreach my $table (qw(files dvds backups shares hosts)) {
332 print "$table ";
333 $dbh->do(qq{ DELETE FROM $table });
334 }
335 print " done...\n";
336
337 $dbh->commit;
338 }
339
340 ## insert new values ##
341
342 # get hosts
343 $hosts = $bpc->HostInfoRead();
344 my $hostID;
345 my $shareID;
346
347 my $sth;
348
349 $sth->{insert_hosts} = $dbh->prepare(qq{
350 INSERT INTO hosts (name, IP) VALUES (?,?)
351 });
352
353 $sth->{hosts_by_name} = $dbh->prepare(qq{
354 SELECT ID FROM hosts WHERE name=?
355 });
356
357 $sth->{backups_count} = $dbh->prepare(qq{
358 SELECT COUNT(*)
359 FROM backups
360 WHERE hostID=? AND num=? AND shareid=?
361 });
362
363 $sth->{insert_backups} = $dbh->prepare(qq{
364 INSERT INTO backups (hostID, num, date, type, shareid, size)
365 VALUES (?,?,?,?,?,?)
366 });
367
368 $sth->{insert_files} = $dbh->prepare(qq{
369 INSERT INTO files
370 (shareID, backupNum, name, path, date, type, size)
371 VALUES (?,?,?,?,?,?,?)
372 });
373
374 foreach my $host_key (keys %{$hosts}) {
375
376 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
377
378 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
379
380 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
381 $sth->{insert_hosts}->execute(
382 $hosts->{$host_key}->{'host'},
383 $hosts->{$host_key}->{'ip'}
384 );
385
386 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
387 }
388
389 print "host ".$hosts->{$host_key}->{'host'}.": ";
390
391 # get backups for a host
392 my @backups = $bpc->BackupInfoRead($hostname);
393 my $incs = scalar @backups;
394 print "$incs increments\n";
395
396 my $inc_nr = 0;
397 $beenThere = {};
398
399 foreach my $backup (@backups) {
400
401 $inc_nr++;
402 last if ($opt{m} && $inc_nr > $opt{m});
403
404 my $backupNum = $backup->{'num'};
405 my @backupShares = ();
406
407 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
408 $hosts->{$host_key}->{'host'},
409 $inc_nr, $incs, $backupNum,
410 $backup->{type} || '?',
411 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
412 strftime($t_fmt,localtime($backup->{startTime})),
413 fmt_time($backup->{endTime} - $backup->{startTime})
414 );
415
416 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
417 foreach my $share ($files->shareList($backupNum)) {
418
419 my $t = time();
420
421 $shareID = getShareID($share, $hostID, $hostname);
422
423 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
424 my ($count) = $sth->{backups_count}->fetchrow_array();
425 # skip if allready in database!
426 next if ($count > 0);
427
428 # dump some log
429 print curr_time," ", $share;
430
431 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
432
433 $sth->{insert_backups}->execute(
434 $hostID,
435 $backupNum,
436 $backup->{'endTime'},
437 $backup->{'type'},
438 $shareID,
439 $size,
440 );
441
442 print " commit";
443 $dbh->commit();
444
445 my $dur = (time() - $t) || 1;
446 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
447 $nf, $f, $nd, $d,
448 ($size / 1024 / 1024),
449 ( ($f+$d) / $dur ),
450 fmt_time($dur)
451 );
452
453 hest_update($hostID, $shareID, $backupNum);
454 }
455
456 }
457 }
458 undef $sth;
459 $dbh->commit();
460 $dbh->disconnect();
461
462 print "total duration: ",fmt_time(time() - $start_t),"\n";
463
464 $pidfile->remove;
465
466 sub getShareID() {
467
468 my ($share, $hostID, $hostname) = @_;
469
470 $sth->{share_id} ||= $dbh->prepare(qq{
471 SELECT ID FROM shares WHERE hostID=? AND name=?
472 });
473
474 $sth->{share_id}->execute($hostID,$share);
475
476 my ($id) = $sth->{share_id}->fetchrow_array();
477
478 return $id if (defined($id));
479
480 $sth->{insert_share} ||= $dbh->prepare(qq{
481 INSERT INTO shares
482 (hostID,name,share,localpath)
483 VALUES (?,?,?,?)
484 });
485
486 my $drop_down = $hostname . '/' . $share;
487 $drop_down =~ s#//+#/#g;
488
489 $sth->{insert_share}->execute($hostID,$share, $drop_down ,undef);
490 return $dbh->last_insert_id(undef,undef,'shares',undef);
491 }
492
493 sub found_in_db {
494
495 my @data = @_;
496 shift @data;
497
498 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
499
500 return $beenThere->{$key} if (defined($beenThere->{$key}));
501
502 $sth->{file_in_db} ||= $dbh->prepare(qq{
503 SELECT 1 FROM files
504 WHERE shareID = ? and
505 path = ? and
506 date = ? and
507 size = ?
508 LIMIT 1
509 });
510
511 my @param = ($shareID,$path,$date,$size);
512 $sth->{file_in_db}->execute(@param);
513 my $rows = $sth->{file_in_db}->rows;
514 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
515
516 $beenThere->{$key}++;
517
518 $sth->{'insert_files'}->execute(@data) unless ($rows);
519 return $rows;
520 }
521
522 ####################################################
523 # recursing through filesystem structure and #
524 # and returning flattened files list #
525 ####################################################
526 sub recurseDir($$$$$$$$) {
527
528 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
529
530 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
531
532 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
533
534 { # scope
535 my @stack;
536
537 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
538 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
539
540 # first, add all the entries in current directory
541 foreach my $path_key (keys %{$filesInBackup}) {
542 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
543 my @data = (
544 $shareID,
545 $backupNum,
546 $path_key,
547 $filesInBackup->{$path_key}->{'relPath'},
548 $filesInBackup->{$path_key}->{'mtime'},
549 $filesInBackup->{$path_key}->{'type'},
550 $filesInBackup->{$path_key}->{'size'}
551 );
552
553 my $key = join(" ", (
554 $shareID,
555 $dir,
556 $path_key,
557 $filesInBackup->{$path_key}->{'mtime'},
558 $filesInBackup->{$path_key}->{'size'}
559 ));
560
561 my $found;
562 if (! defined($beenThere->{$key}) && ! ($found = found_in_db($key, @data)) ) {
563 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
564
565 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
566 $new_dirs++ unless ($found);
567 print STDERR " dir\n" if ($debug >= 2);
568 } else {
569 $new_files++ unless ($found);
570 print STDERR " file\n" if ($debug >= 2);
571 }
572 $size += $filesInBackup->{$path_key}->{'size'} || 0;
573 }
574
575 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
576 $nr_dirs++;
577
578 my $full_path = $dir . '/' . $path_key;
579 push @stack, $full_path;
580 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
581
582 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
583 #
584 # $nr_files += $f;
585 # $new_files += $nf;
586 # $nr_dirs += $d;
587 # $new_dirs += $nd;
588
589 } else {
590 $nr_files++;
591 }
592 }
593
594 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
595
596 while ( my $dir = shift @stack ) {
597 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
598 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
599 $nr_files += $f;
600 $new_files += $nf;
601 $nr_dirs += $d;
602 $new_dirs += $nd;
603 $size += $s;
604 }
605 }
606
607 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
608 }
609

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26