/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 89 - (show annotations)
Sun Aug 28 17:04:12 2005 UTC (18 years, 8 months ago) by dpavlin
File size: 13476 byte(s)
automatic update of HyperEstraier index after each increment,
added -i flag to force re-scan of all files and insert missing
ones into index

1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14
15 use constant BPC_FTYPE_DIR => 5;
16
17 my $debug = 0;
18 $|=1;
19
20 my $start_t = time();
21
22 my $pidfile = new File::Pid;
23
24 if (my $pid = $pidfile->running ) {
25 die "$0 already running: $pid\n";
26 } elsif ($pidfile->pid ne $$) {
27 $pidfile->remove;
28 $pidfile = new File::Pid;
29 }
30 $pidfile->write;
31 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
32
33 my $t_fmt = '%Y-%m-%d %H:%M:%S';
34
35 my $hosts;
36 my $bpc = BackupPC::Lib->new || die;
37 my %Conf = $bpc->Conf();
38 my $TopDir = $bpc->TopDir();
39 my $beenThere = {};
40
41 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
42 my $user = $Conf{SearchUser} || '';
43 my $index_path = $Conf{HyperEstraierIndex};
44 $index_path = $TopDir . '/' . $index_path;
45 $index_path =~ s#//#/#g;
46
47
48 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
49
50 my %opt;
51
52 if ( !getopts("cdm:v:i", \%opt ) ) {
53 print STDERR <<EOF;
54 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
55
56 Options:
57 -c create database on first use
58 -d delete database before import
59 -m num import just num increments for one host
60 -v num set verbosity (debug) level (default $debug)
61 -i update HyperEstraier full text index
62 EOF
63 exit 1;
64 }
65
66 if ($opt{v}) {
67 print "Debug level at $opt{v}\n";
68 $debug = $opt{v};
69 }
70
71 #---- subs ----
72
73 sub fmt_time {
74 my $t = shift || return;
75 my $out = "";
76 my ($ss,$mm,$hh) = gmtime($t);
77 $out .= "${hh}h" if ($hh);
78 $out .= sprintf("%02d:%02d", $mm,$ss);
79 return $out;
80 }
81
82 sub curr_time {
83 return strftime($t_fmt,localtime());
84 }
85
86 my $hest_db;
87
88 sub hest_update {
89
90 my ($host_id, $share_id, $num) = @_;
91
92 print curr_time," updating HyperEstraier: files";
93
94 my $t = time();
95
96 my $where = '';
97 if ($host_id && $share_id && $num) {
98 $where = qq{
99 WHERE
100 hosts.id = ? AND
101 shares.id = ? AND
102 files.backupnum = ?
103 };
104 }
105
106 my $sth = $dbh->prepare(qq{
107 SELECT
108 files.id AS fid,
109 hosts.name AS hname,
110 shares.name AS sname,
111 -- shares.share AS sharename,
112 files.backupnum AS backupnum,
113 -- files.name AS filename,
114 files.path AS filepath,
115 files.date AS date,
116 files.type AS filetype,
117 files.size AS size,
118 files.shareid AS shareid,
119 backups.date AS backup_date
120 FROM files
121 INNER JOIN shares ON files.shareID=shares.ID
122 INNER JOIN hosts ON hosts.ID = shares.hostID
123 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
124 $where
125 });
126
127 $sth->execute(@_);
128 my $results = $sth->rows;
129
130 if ($results == 0) {
131 print " no files\n";
132 return;
133 }
134
135 my $dot = int($results / 15) || 1;
136
137 print " $results ($dot/#)";
138
139 sub fmt_date {
140 my $t = shift || return;
141 my $iso = BackupPC::Lib::timeStamp($t);
142 $iso =~ s/\s/T/;
143 return $iso;
144 }
145
146 my $max = int($results / $dot);
147
148 print " index $index_path...";
149 use HyperEstraier;
150 my $db = HyperEstraier::Database->new();
151
152 # unless ($hest_db) {
153 # print " open reader";
154 # $hest_db = HyperEstraier::Database->new();
155 #
156 # }
157
158
159 $db->open($index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
160
161 my $added = 0;
162
163 while (my $row = $sth->fetchrow_hashref()) {
164
165 my $fid = $row->{'fid'} || die "no fid?";
166 my $uri = 'file:///' . $fid;
167
168 next if ($db->uri_to_id($uri));
169
170 # create a document object
171 my $doc = HyperEstraier::Document->new;
172
173 # add attributes to the document object
174 $doc->add_attr('@uri', $uri);
175
176 foreach my $c (@{ $sth->{NAME} }) {
177 $doc->add_attr($c, $row->{$c}) if ($row->{$c});
178 }
179
180 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
181
182 # add the body text to the document object
183 my $path = $row->{'filepath'};
184 $doc->add_text($path);
185 $path =~ s/(.)/$1 /g;
186 $doc->add_hidden_text($path);
187
188 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
189
190 # register the document object to the database
191 $db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
192
193 $added++;
194 if ($added % $dot == 0) {
195 print "$max ";
196 $max--;
197 }
198
199 }
200
201 print "sync $added new files";
202 $db->sync();
203 print " close";
204 $db->close();
205
206 my $dur = (time() - $t) || 1;
207 printf(" [%.2f/s new %.2f/s dur: %s]\n",
208 ( $results / $dur ),
209 ( $added / $dur ),
210 fmt_time($dur)
211 );
212 }
213
214 #---- /subs ----
215
216
217 ## update index ##
218 if ($opt{i} || ($index_path && ! -e $index_path)) {
219 # update all
220 print "force update of HyperEstraier index ";
221 print "importing existing data" unless (-e $index_path);
222 print "by -i flag" if ($opt{i});
223 print "\n";
224 hest_update();
225 }
226
227 ## create tables ##
228 if ($opt{c}) {
229 sub do_index {
230 my $index = shift || return;
231 my ($table,$col,$unique) = split(/_/, $index);
232 $unique ||= '';
233 $index =~ s/,/_/g;
234 $dbh->do(qq{ create $unique index $index on $table($col) });
235 }
236
237 print "creating tables...\n";
238
239 $dbh->do(qq{
240 create table hosts (
241 ID SERIAL PRIMARY KEY,
242 name VARCHAR(30) NOT NULL,
243 IP VARCHAR(15)
244 );
245 });
246
247 $dbh->do(qq{
248 create table shares (
249 ID SERIAL PRIMARY KEY,
250 hostID INTEGER NOT NULL references hosts(id),
251 name VARCHAR(30) NOT NULL,
252 share VARCHAR(200) NOT NULL,
253 localpath VARCHAR(200)
254 );
255 });
256
257 $dbh->do(qq{
258 create table backups (
259 hostID INTEGER NOT NULL references hosts(id),
260 num INTEGER NOT NULL,
261 date integer NOT NULL,
262 type CHAR(4) not null,
263 shareID integer not null references shares(id),
264 size integer not null,
265 PRIMARY KEY(hostID, num, shareID)
266 );
267 });
268
269 #do_index('backups_hostid,num_unique');
270
271 $dbh->do(qq{
272 create table dvds (
273 ID SERIAL PRIMARY KEY,
274 num INTEGER NOT NULL,
275 name VARCHAR(255) NOT NULL,
276 mjesto VARCHAR(255)
277 );
278 });
279
280 $dbh->do(qq{
281 create table files (
282 ID SERIAL PRIMARY KEY,
283 shareID INTEGER NOT NULL references shares(id),
284 backupNum INTEGER NOT NULL,
285 name VARCHAR(255) NOT NULL,
286 path VARCHAR(255) NOT NULL,
287 date integer NOT NULL,
288 type INTEGER NOT NULL,
289 size INTEGER NOT NULL,
290 dvdid INTEGER references dvds(id)
291 );
292 });
293
294 print "creating indexes:";
295
296 foreach my $index (qw(
297 hosts_name
298 backups_hostID
299 backups_num
300 shares_hostID
301 shares_name
302 files_shareID
303 files_path
304 files_name
305 files_date
306 files_size
307 )) {
308 print " $index";
309 do_index($index);
310 }
311 print "...\n";
312
313 $dbh->commit;
314
315 }
316
317 ## delete data before inseting ##
318 if ($opt{d}) {
319 print "deleting ";
320 foreach my $table (qw(files dvds backups shares hosts)) {
321 print "$table ";
322 $dbh->do(qq{ DELETE FROM $table });
323 }
324 print " done...\n";
325
326 $dbh->commit;
327 }
328
329 ## insert new values ##
330
331 # get hosts
332 $hosts = $bpc->HostInfoRead();
333 my $hostID;
334 my $shareID;
335
336 my $sth;
337
338 $sth->{insert_hosts} = $dbh->prepare(qq{
339 INSERT INTO hosts (name, IP) VALUES (?,?)
340 });
341
342 $sth->{hosts_by_name} = $dbh->prepare(qq{
343 SELECT ID FROM hosts WHERE name=?
344 });
345
346 $sth->{backups_count} = $dbh->prepare(qq{
347 SELECT COUNT(*)
348 FROM backups
349 WHERE hostID=? AND num=? AND shareid=?
350 });
351
352 $sth->{insert_backups} = $dbh->prepare(qq{
353 INSERT INTO backups (hostID, num, date, type, shareid, size)
354 VALUES (?,?,?,?,?,?)
355 });
356
357 $sth->{insert_files} = $dbh->prepare(qq{
358 INSERT INTO files
359 (shareID, backupNum, name, path, date, type, size)
360 VALUES (?,?,?,?,?,?,?)
361 });
362
363 foreach my $host_key (keys %{$hosts}) {
364
365 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
366
367 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
368
369 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
370 $sth->{insert_hosts}->execute(
371 $hosts->{$host_key}->{'host'},
372 $hosts->{$host_key}->{'ip'}
373 );
374
375 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
376 }
377
378 print "host ".$hosts->{$host_key}->{'host'}.": ";
379
380 # get backups for a host
381 my @backups = $bpc->BackupInfoRead($hostname);
382 my $incs = scalar @backups;
383 print "$incs increments\n";
384
385 my $inc_nr = 0;
386 $beenThere = {};
387
388 foreach my $backup (@backups) {
389
390 $inc_nr++;
391 last if ($opt{m} && $inc_nr > $opt{m});
392
393 my $backupNum = $backup->{'num'};
394 my @backupShares = ();
395
396 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
397 $hosts->{$host_key}->{'host'},
398 $inc_nr, $incs, $backupNum,
399 $backup->{type} || '?',
400 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
401 strftime($t_fmt,localtime($backup->{startTime})),
402 fmt_time($backup->{endTime} - $backup->{startTime})
403 );
404
405 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
406 foreach my $share ($files->shareList($backupNum)) {
407
408 my $t = time();
409
410 $shareID = getShareID($share, $hostID, $hostname);
411
412 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
413 my ($count) = $sth->{backups_count}->fetchrow_array();
414 # skip if allready in database!
415 next if ($count > 0);
416
417 # dump some log
418 print curr_time," ", $share;
419
420 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
421
422 $sth->{insert_backups}->execute(
423 $hostID,
424 $backupNum,
425 $backup->{'endTime'},
426 $backup->{'type'},
427 $shareID,
428 $size,
429 );
430
431 print " commit";
432 $dbh->commit();
433
434 my $dur = (time() - $t) || 1;
435 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
436 $nf, $f, $nd, $d,
437 ($size / 1024 / 1024),
438 ( ($f+$d) / $dur ),
439 fmt_time($dur)
440 );
441
442 hest_update($hostID, $shareID, $backupNum);
443 }
444
445 }
446 }
447 undef $sth;
448 $dbh->commit();
449 $dbh->disconnect();
450
451 print "total duration: ",fmt_time(time() - $start_t),"\n";
452
453 $pidfile->remove;
454
455 sub getShareID() {
456
457 my ($share, $hostID, $hostname) = @_;
458
459 $sth->{share_id} ||= $dbh->prepare(qq{
460 SELECT ID FROM shares WHERE hostID=? AND name=?
461 });
462
463 $sth->{share_id}->execute($hostID,$share);
464
465 my ($id) = $sth->{share_id}->fetchrow_array();
466
467 return $id if (defined($id));
468
469 $sth->{insert_share} ||= $dbh->prepare(qq{
470 INSERT INTO shares
471 (hostID,name,share,localpath)
472 VALUES (?,?,?,?)
473 });
474
475 my $drop_down = $hostname . '/' . $share;
476 $drop_down =~ s#//+#/#g;
477
478 $sth->{insert_share}->execute($hostID,$share, $drop_down ,undef);
479 return $dbh->last_insert_id(undef,undef,'shares',undef);
480 }
481
482 sub found_in_db {
483
484 my @data = @_;
485 shift @data;
486
487 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
488
489 return $beenThere->{$key} if (defined($beenThere->{$key}));
490
491 $sth->{file_in_db} ||= $dbh->prepare(qq{
492 SELECT 1 FROM files
493 WHERE shareID = ? and
494 path = ? and
495 date = ? and
496 size = ?
497 LIMIT 1
498 });
499
500 my @param = ($shareID,$path,$date,$size);
501 $sth->{file_in_db}->execute(@param);
502 my $rows = $sth->{file_in_db}->rows;
503 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
504
505 $beenThere->{$key}++;
506
507 $sth->{'insert_files'}->execute(@data) unless ($rows);
508 return $rows;
509 }
510
511 ####################################################
512 # recursing through filesystem structure and #
513 # and returning flattened files list #
514 ####################################################
515 sub recurseDir($$$$$$$$) {
516
517 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
518
519 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
520
521 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
522
523 { # scope
524 my @stack;
525
526 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
527 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
528
529 # first, add all the entries in current directory
530 foreach my $path_key (keys %{$filesInBackup}) {
531 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
532 my @data = (
533 $shareID,
534 $backupNum,
535 $path_key,
536 $filesInBackup->{$path_key}->{'relPath'},
537 $filesInBackup->{$path_key}->{'mtime'},
538 $filesInBackup->{$path_key}->{'type'},
539 $filesInBackup->{$path_key}->{'size'}
540 );
541
542 my $key = join(" ", (
543 $shareID,
544 $dir,
545 $path_key,
546 $filesInBackup->{$path_key}->{'mtime'},
547 $filesInBackup->{$path_key}->{'size'}
548 ));
549
550 my $found;
551 if (! defined($beenThere->{$key}) && ! ($found = found_in_db($key, @data)) ) {
552 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
553
554 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
555 $new_dirs++ unless ($found);
556 print STDERR " dir\n" if ($debug >= 2);
557 } else {
558 $new_files++ unless ($found);
559 print STDERR " file\n" if ($debug >= 2);
560 }
561 $size += $filesInBackup->{$path_key}->{'size'} || 0;
562 }
563
564 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
565 $nr_dirs++;
566
567 my $full_path = $dir . '/' . $path_key;
568 push @stack, $full_path;
569 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
570
571 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
572 #
573 # $nr_files += $f;
574 # $new_files += $nf;
575 # $nr_dirs += $d;
576 # $new_dirs += $nd;
577
578 } else {
579 $nr_files++;
580 }
581 }
582
583 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
584
585 while ( my $dir = shift @stack ) {
586 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
587 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
588 $nr_files += $f;
589 $new_files += $nf;
590 $nr_dirs += $d;
591 $new_dirs += $nd;
592 $size += $s;
593 }
594 }
595
596 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
597 }
598

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26