/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 98 - (show annotations)
Tue Aug 30 14:19:54 2005 UTC (18 years, 8 months ago) by dpavlin
File size: 13749 byte(s)
update HyperEstraier index in chunks of EST_CHUNK (default is 10000) to
reduce memory usage with huge backup sets (because PostgreSQL tries to load
whole result set in memory).

1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14
15 use constant BPC_FTYPE_DIR => 5;
16 use constant EST_CHUNK => 10000;
17
18 my $debug = 0;
19 $|=1;
20
21 my $start_t = time();
22
23 my $pidfile = new File::Pid;
24
25 if (my $pid = $pidfile->running ) {
26 die "$0 already running: $pid\n";
27 } elsif ($pidfile->pid ne $$) {
28 $pidfile->remove;
29 $pidfile = new File::Pid;
30 }
31 $pidfile->write;
32 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
33
34 my $t_fmt = '%Y-%m-%d %H:%M:%S';
35
36 my $hosts;
37 my $bpc = BackupPC::Lib->new || die;
38 my %Conf = $bpc->Conf();
39 my $TopDir = $bpc->TopDir();
40 my $beenThere = {};
41
42 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
43 my $user = $Conf{SearchUser} || '';
44 my $index_path = $Conf{HyperEstraierIndex};
45 $index_path = $TopDir . '/' . $index_path;
46 $index_path =~ s#//#/#g;
47 if ($index_path) {
48 use HyperEstraier;
49 }
50
51
52 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
53
54 my %opt;
55
56 if ( !getopts("cdm:v:i", \%opt ) ) {
57 print STDERR <<EOF;
58 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
59
60 Options:
61 -c create database on first use
62 -d delete database before import
63 -m num import just num increments for one host
64 -v num set verbosity (debug) level (default $debug)
65 -i update HyperEstraier full text index
66 EOF
67 exit 1;
68 }
69
70 if ($opt{v}) {
71 print "Debug level at $opt{v}\n";
72 $debug = $opt{v};
73 }
74
75 #---- subs ----
76
77 sub fmt_time {
78 my $t = shift || return;
79 my $out = "";
80 my ($ss,$mm,$hh) = gmtime($t);
81 $out .= "${hh}h" if ($hh);
82 $out .= sprintf("%02d:%02d", $mm,$ss);
83 return $out;
84 }
85
86 sub curr_time {
87 return strftime($t_fmt,localtime());
88 }
89
90 my $hest_db;
91
92 sub signal {
93 my($sig) = @_;
94 if ($hest_db) {
95 print "\nCaught a SIG$sig--syncing database and shutting down\n";
96 $hest_db->sync();
97 $hest_db->close();
98 }
99 exit(0);
100 }
101
102 $SIG{'INT'} = \&signal;
103 $SIG{'QUIT'} = \&signal;
104
105 sub hest_update {
106
107 my ($host_id, $share_id, $num) = @_;
108
109 print curr_time," updating HyperEstraier:";
110
111 my $t = time();
112
113 my $offset = 0;
114 my $added = 0;
115
116 print " opening index $index_path";
117 $hest_db = HyperEstraier::Database->new();
118 $hest_db->open($index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
119
120 my $results = 0;
121
122 do {
123
124 my $where = '';
125 if ($host_id && $share_id && $num) {
126 $where = qq{
127 WHERE
128 hosts.id = ? AND
129 shares.id = ? AND
130 files.backupnum = ?
131 };
132 }
133
134 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
135
136 my $sth = $dbh->prepare(qq{
137 SELECT
138 files.id AS fid,
139 hosts.name AS hname,
140 shares.name AS sname,
141 -- shares.share AS sharename,
142 files.backupnum AS backupnum,
143 -- files.name AS filename,
144 files.path AS filepath,
145 files.date AS date,
146 files.type AS type,
147 files.size AS size,
148 files.shareid AS shareid,
149 backups.date AS backup_date
150 FROM files
151 INNER JOIN shares ON files.shareID=shares.ID
152 INNER JOIN hosts ON hosts.ID = shares.hostID
153 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
154 $where
155 $limit
156 });
157
158 $sth->execute(@_);
159 $results = $sth->rows;
160
161 if ($results == 0) {
162 print " - no more files\n";
163 last;
164 }
165
166 sub fmt_date {
167 my $t = shift || return;
168 my $iso = BackupPC::Lib::timeStamp($t);
169 $iso =~ s/\s/T/;
170 return $iso;
171 }
172
173 while (my $row = $sth->fetchrow_hashref()) {
174
175 my $fid = $row->{'fid'} || die "no fid?";
176 my $uri = 'file:///' . $fid;
177
178 my $id = $hest_db->uri_to_id($uri);
179 next unless ($id == -1);
180
181 # create a document object
182 my $doc = HyperEstraier::Document->new;
183
184 # add attributes to the document object
185 $doc->add_attr('@uri', $uri);
186
187 foreach my $c (@{ $sth->{NAME} }) {
188 $doc->add_attr($c, $row->{$c}) if ($row->{$c});
189 }
190
191 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
192
193 # add the body text to the document object
194 my $path = $row->{'filepath'};
195 $doc->add_text($path);
196 $path =~ s/(.)/$1 /g;
197 $doc->add_hidden_text($path);
198
199 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
200
201 # register the document object to the database
202 $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
203 $added++;
204 }
205
206 print " $added";
207 $hest_db->sync();
208
209 $offset += EST_CHUNK;
210
211 } while ($results == EST_CHUNK);
212
213 print ", close";
214 $hest_db->close();
215
216 my $dur = (time() - $t) || 1;
217 printf(" [%.2f/s dur: %s]\n",
218 ( $added / $dur ),
219 fmt_time($dur)
220 );
221 }
222
223 #---- /subs ----
224
225
226 ## update index ##
227 if (($opt{i} || ($index_path && ! -e $index_path)) && !$opt{c}) {
228 # update all
229 print "force update of HyperEstraier index ";
230 print "importing existing data" unless (-e $index_path);
231 print "by -i flag" if ($opt{i});
232 print "\n";
233 hest_update();
234 }
235
236 ## create tables ##
237 if ($opt{c}) {
238 sub do_index {
239 my $index = shift || return;
240 my ($table,$col,$unique) = split(/_/, $index);
241 $unique ||= '';
242 $index =~ s/,/_/g;
243 $dbh->do(qq{ create $unique index $index on $table($col) });
244 }
245
246 print "creating tables...\n";
247
248 $dbh->do(qq{
249 create table hosts (
250 ID SERIAL PRIMARY KEY,
251 name VARCHAR(30) NOT NULL,
252 IP VARCHAR(15)
253 );
254 });
255
256 $dbh->do(qq{
257 create table shares (
258 ID SERIAL PRIMARY KEY,
259 hostID INTEGER NOT NULL references hosts(id),
260 name VARCHAR(30) NOT NULL,
261 share VARCHAR(200) NOT NULL,
262 localpath VARCHAR(200)
263 );
264 });
265
266 $dbh->do(qq{
267 create table backups (
268 hostID INTEGER NOT NULL references hosts(id),
269 num INTEGER NOT NULL,
270 date integer NOT NULL,
271 type CHAR(4) not null,
272 shareID integer not null references shares(id),
273 size integer not null,
274 PRIMARY KEY(hostID, num, shareID)
275 );
276 });
277
278 #do_index('backups_hostid,num_unique');
279
280 $dbh->do(qq{
281 create table dvds (
282 ID SERIAL PRIMARY KEY,
283 num INTEGER NOT NULL,
284 name VARCHAR(255) NOT NULL,
285 mjesto VARCHAR(255)
286 );
287 });
288
289 $dbh->do(qq{
290 create table files (
291 ID SERIAL PRIMARY KEY,
292 shareID INTEGER NOT NULL references shares(id),
293 backupNum INTEGER NOT NULL,
294 name VARCHAR(255) NOT NULL,
295 path VARCHAR(255) NOT NULL,
296 date integer NOT NULL,
297 type INTEGER NOT NULL,
298 size INTEGER NOT NULL,
299 dvdid INTEGER references dvds(id)
300 );
301 });
302
303 print "creating indexes:";
304
305 foreach my $index (qw(
306 hosts_name
307 backups_hostID
308 backups_num
309 shares_hostID
310 shares_name
311 files_shareID
312 files_path
313 files_name
314 files_date
315 files_size
316 )) {
317 print " $index";
318 do_index($index);
319 }
320 print "...\n";
321
322 $dbh->commit;
323
324 }
325
326 ## delete data before inseting ##
327 if ($opt{d}) {
328 print "deleting ";
329 foreach my $table (qw(files dvds backups shares hosts)) {
330 print "$table ";
331 $dbh->do(qq{ DELETE FROM $table });
332 }
333 print " done...\n";
334
335 $dbh->commit;
336 }
337
338 ## insert new values ##
339
340 # get hosts
341 $hosts = $bpc->HostInfoRead();
342 my $hostID;
343 my $shareID;
344
345 my $sth;
346
347 $sth->{insert_hosts} = $dbh->prepare(qq{
348 INSERT INTO hosts (name, IP) VALUES (?,?)
349 });
350
351 $sth->{hosts_by_name} = $dbh->prepare(qq{
352 SELECT ID FROM hosts WHERE name=?
353 });
354
355 $sth->{backups_count} = $dbh->prepare(qq{
356 SELECT COUNT(*)
357 FROM backups
358 WHERE hostID=? AND num=? AND shareid=?
359 });
360
361 $sth->{insert_backups} = $dbh->prepare(qq{
362 INSERT INTO backups (hostID, num, date, type, shareid, size)
363 VALUES (?,?,?,?,?,?)
364 });
365
366 $sth->{insert_files} = $dbh->prepare(qq{
367 INSERT INTO files
368 (shareID, backupNum, name, path, date, type, size)
369 VALUES (?,?,?,?,?,?,?)
370 });
371
372 foreach my $host_key (keys %{$hosts}) {
373
374 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
375
376 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
377
378 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
379 $sth->{insert_hosts}->execute(
380 $hosts->{$host_key}->{'host'},
381 $hosts->{$host_key}->{'ip'}
382 );
383
384 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
385 }
386
387 print "host ".$hosts->{$host_key}->{'host'}.": ";
388
389 # get backups for a host
390 my @backups = $bpc->BackupInfoRead($hostname);
391 my $incs = scalar @backups;
392 print "$incs increments\n";
393
394 my $inc_nr = 0;
395 $beenThere = {};
396
397 foreach my $backup (@backups) {
398
399 $inc_nr++;
400 last if ($opt{m} && $inc_nr > $opt{m});
401
402 my $backupNum = $backup->{'num'};
403 my @backupShares = ();
404
405 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
406 $hosts->{$host_key}->{'host'},
407 $inc_nr, $incs, $backupNum,
408 $backup->{type} || '?',
409 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
410 strftime($t_fmt,localtime($backup->{startTime})),
411 fmt_time($backup->{endTime} - $backup->{startTime})
412 );
413
414 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
415 foreach my $share ($files->shareList($backupNum)) {
416
417 my $t = time();
418
419 $shareID = getShareID($share, $hostID, $hostname);
420
421 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
422 my ($count) = $sth->{backups_count}->fetchrow_array();
423 # skip if allready in database!
424 next if ($count > 0);
425
426 # dump some log
427 print curr_time," ", $share;
428
429 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
430
431 $sth->{insert_backups}->execute(
432 $hostID,
433 $backupNum,
434 $backup->{'endTime'},
435 $backup->{'type'},
436 $shareID,
437 $size,
438 );
439
440 print " commit";
441 $dbh->commit();
442
443 my $dur = (time() - $t) || 1;
444 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
445 $nf, $f, $nd, $d,
446 ($size / 1024 / 1024),
447 ( ($f+$d) / $dur ),
448 fmt_time($dur)
449 );
450
451 hest_update($hostID, $shareID, $backupNum);
452 }
453
454 }
455 }
456 undef $sth;
457 $dbh->commit();
458 $dbh->disconnect();
459
460 print "total duration: ",fmt_time(time() - $start_t),"\n";
461
462 $pidfile->remove;
463
464 sub getShareID() {
465
466 my ($share, $hostID, $hostname) = @_;
467
468 $sth->{share_id} ||= $dbh->prepare(qq{
469 SELECT ID FROM shares WHERE hostID=? AND name=?
470 });
471
472 $sth->{share_id}->execute($hostID,$share);
473
474 my ($id) = $sth->{share_id}->fetchrow_array();
475
476 return $id if (defined($id));
477
478 $sth->{insert_share} ||= $dbh->prepare(qq{
479 INSERT INTO shares
480 (hostID,name,share,localpath)
481 VALUES (?,?,?,?)
482 });
483
484 my $drop_down = $hostname . '/' . $share;
485 $drop_down =~ s#//+#/#g;
486
487 $sth->{insert_share}->execute($hostID,$share, $drop_down ,undef);
488 return $dbh->last_insert_id(undef,undef,'shares',undef);
489 }
490
491 sub found_in_db {
492
493 my @data = @_;
494 shift @data;
495
496 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
497
498 return $beenThere->{$key} if (defined($beenThere->{$key}));
499
500 $sth->{file_in_db} ||= $dbh->prepare(qq{
501 SELECT 1 FROM files
502 WHERE shareID = ? and
503 path = ? and
504 date = ? and
505 size = ?
506 LIMIT 1
507 });
508
509 my @param = ($shareID,$path,$date,$size);
510 $sth->{file_in_db}->execute(@param);
511 my $rows = $sth->{file_in_db}->rows;
512 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
513
514 $beenThere->{$key}++;
515
516 $sth->{'insert_files'}->execute(@data) unless ($rows);
517 return $rows;
518 }
519
520 ####################################################
521 # recursing through filesystem structure and #
522 # and returning flattened files list #
523 ####################################################
524 sub recurseDir($$$$$$$$) {
525
526 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
527
528 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
529
530 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
531
532 { # scope
533 my @stack;
534
535 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
536 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
537
538 # first, add all the entries in current directory
539 foreach my $path_key (keys %{$filesInBackup}) {
540 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
541 my @data = (
542 $shareID,
543 $backupNum,
544 $path_key,
545 $filesInBackup->{$path_key}->{'relPath'},
546 $filesInBackup->{$path_key}->{'mtime'},
547 $filesInBackup->{$path_key}->{'type'},
548 $filesInBackup->{$path_key}->{'size'}
549 );
550
551 my $key = join(" ", (
552 $shareID,
553 $dir,
554 $path_key,
555 $filesInBackup->{$path_key}->{'mtime'},
556 $filesInBackup->{$path_key}->{'size'}
557 ));
558
559 my $found;
560 if (! defined($beenThere->{$key}) && ! ($found = found_in_db($key, @data)) ) {
561 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
562
563 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
564 $new_dirs++ unless ($found);
565 print STDERR " dir\n" if ($debug >= 2);
566 } else {
567 $new_files++ unless ($found);
568 print STDERR " file\n" if ($debug >= 2);
569 }
570 $size += $filesInBackup->{$path_key}->{'size'} || 0;
571 }
572
573 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
574 $nr_dirs++;
575
576 my $full_path = $dir . '/' . $path_key;
577 push @stack, $full_path;
578 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
579
580 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
581 #
582 # $nr_files += $f;
583 # $new_files += $nf;
584 # $nr_dirs += $d;
585 # $new_dirs += $nd;
586
587 } else {
588 $nr_files++;
589 }
590 }
591
592 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
593
594 while ( my $dir = shift @stack ) {
595 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
596 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
597 $nr_files += $f;
598 $new_files += $nf;
599 $nr_dirs += $d;
600 $new_dirs += $nd;
601 $size += $s;
602 }
603 }
604
605 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
606 }
607

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26