/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 95 - (show annotations)
Tue Aug 30 09:55:34 2005 UTC (18 years, 8 months ago) by dpavlin
File size: 13541 byte(s)
don't try to create HyperEstraier index if creating database

1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14
15 use constant BPC_FTYPE_DIR => 5;
16
17 my $debug = 0;
18 $|=1;
19
20 my $start_t = time();
21
22 my $pidfile = new File::Pid;
23
24 if (my $pid = $pidfile->running ) {
25 die "$0 already running: $pid\n";
26 } elsif ($pidfile->pid ne $$) {
27 $pidfile->remove;
28 $pidfile = new File::Pid;
29 }
30 $pidfile->write;
31 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
32
33 my $t_fmt = '%Y-%m-%d %H:%M:%S';
34
35 my $hosts;
36 my $bpc = BackupPC::Lib->new || die;
37 my %Conf = $bpc->Conf();
38 my $TopDir = $bpc->TopDir();
39 my $beenThere = {};
40
41 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
42 my $user = $Conf{SearchUser} || '';
43 my $index_path = $Conf{HyperEstraierIndex};
44 $index_path = $TopDir . '/' . $index_path;
45 $index_path =~ s#//#/#g;
46
47
48 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
49
50 my %opt;
51
52 if ( !getopts("cdm:v:i", \%opt ) ) {
53 print STDERR <<EOF;
54 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
55
56 Options:
57 -c create database on first use
58 -d delete database before import
59 -m num import just num increments for one host
60 -v num set verbosity (debug) level (default $debug)
61 -i update HyperEstraier full text index
62 EOF
63 exit 1;
64 }
65
66 if ($opt{v}) {
67 print "Debug level at $opt{v}\n";
68 $debug = $opt{v};
69 }
70
71 #---- subs ----
72
73 sub fmt_time {
74 my $t = shift || return;
75 my $out = "";
76 my ($ss,$mm,$hh) = gmtime($t);
77 $out .= "${hh}h" if ($hh);
78 $out .= sprintf("%02d:%02d", $mm,$ss);
79 return $out;
80 }
81
82 sub curr_time {
83 return strftime($t_fmt,localtime());
84 }
85
86 my $hest_db;
87
88 sub hest_update {
89
90 my ($host_id, $share_id, $num) = @_;
91
92 print curr_time," updating HyperEstraier: select files";
93
94 my $t = time();
95
96 my $where = '';
97 if ($host_id && $share_id && $num) {
98 $where = qq{
99 WHERE
100 hosts.id = ? AND
101 shares.id = ? AND
102 files.backupnum = ?
103 };
104 }
105
106 my $sth = $dbh->prepare(qq{
107 SELECT
108 files.id AS fid,
109 hosts.name AS hname,
110 shares.name AS sname,
111 -- shares.share AS sharename,
112 files.backupnum AS backupnum,
113 -- files.name AS filename,
114 files.path AS filepath,
115 files.date AS date,
116 files.type AS type,
117 files.size AS size,
118 files.shareid AS shareid,
119 backups.date AS backup_date
120 FROM files
121 INNER JOIN shares ON files.shareID=shares.ID
122 INNER JOIN hosts ON hosts.ID = shares.hostID
123 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
124 $where
125 });
126
127 $sth->execute(@_);
128 my $results = $sth->rows;
129
130 if ($results == 0) {
131 print " - no files, skipping\n";
132 return;
133 }
134
135 my $dot = int($results / 15) || 1;
136
137 print " $results ($dot/#)";
138
139 sub fmt_date {
140 my $t = shift || return;
141 my $iso = BackupPC::Lib::timeStamp($t);
142 $iso =~ s/\s/T/;
143 return $iso;
144 }
145
146 my $max = int($results / $dot);
147
148 print ", opening index $index_path...";
149 use HyperEstraier;
150 my $db = HyperEstraier::Database->new();
151
152 # unless ($hest_db) {
153 # print " open reader";
154 # $hest_db = HyperEstraier::Database->new();
155 #
156 # }
157
158
159 $db->open($index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
160
161 my $added = 0;
162
163 while (my $row = $sth->fetchrow_hashref()) {
164
165 my $fid = $row->{'fid'} || die "no fid?";
166 my $uri = 'file:///' . $fid;
167
168 my $id = $db->uri_to_id($uri);
169 next unless ($id == -1);
170
171 # create a document object
172 my $doc = HyperEstraier::Document->new;
173
174 # add attributes to the document object
175 $doc->add_attr('@uri', $uri);
176
177 foreach my $c (@{ $sth->{NAME} }) {
178 $doc->add_attr($c, $row->{$c}) if ($row->{$c});
179 }
180
181 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
182
183 # add the body text to the document object
184 my $path = $row->{'filepath'};
185 $doc->add_text($path);
186 $path =~ s/(.)/$1 /g;
187 $doc->add_hidden_text($path);
188
189 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
190
191 # register the document object to the database
192 $db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
193
194 $added++;
195 if ($added % $dot == 0) {
196 print "$max ";
197 $max--;
198 }
199
200 }
201
202 print "sync $added new files";
203 $db->sync();
204 print ", close";
205 $db->close();
206
207 my $dur = (time() - $t) || 1;
208 printf(" [%.2f/s new %.2f/s dur: %s]\n",
209 ( $results / $dur ),
210 ( $added / $dur ),
211 fmt_time($dur)
212 );
213 }
214
215 #---- /subs ----
216
217
218 ## update index ##
219 if (($opt{i} || ($index_path && ! -e $index_path)) && !$opt{c}) {
220 # update all
221 print "force update of HyperEstraier index ";
222 print "importing existing data" unless (-e $index_path);
223 print "by -i flag" if ($opt{i});
224 print "\n";
225 hest_update();
226 }
227
228 ## create tables ##
229 if ($opt{c}) {
230 sub do_index {
231 my $index = shift || return;
232 my ($table,$col,$unique) = split(/_/, $index);
233 $unique ||= '';
234 $index =~ s/,/_/g;
235 $dbh->do(qq{ create $unique index $index on $table($col) });
236 }
237
238 print "creating tables...\n";
239
240 $dbh->do(qq{
241 create table hosts (
242 ID SERIAL PRIMARY KEY,
243 name VARCHAR(30) NOT NULL,
244 IP VARCHAR(15)
245 );
246 });
247
248 $dbh->do(qq{
249 create table shares (
250 ID SERIAL PRIMARY KEY,
251 hostID INTEGER NOT NULL references hosts(id),
252 name VARCHAR(30) NOT NULL,
253 share VARCHAR(200) NOT NULL,
254 localpath VARCHAR(200)
255 );
256 });
257
258 $dbh->do(qq{
259 create table backups (
260 hostID INTEGER NOT NULL references hosts(id),
261 num INTEGER NOT NULL,
262 date integer NOT NULL,
263 type CHAR(4) not null,
264 shareID integer not null references shares(id),
265 size integer not null,
266 PRIMARY KEY(hostID, num, shareID)
267 );
268 });
269
270 #do_index('backups_hostid,num_unique');
271
272 $dbh->do(qq{
273 create table dvds (
274 ID SERIAL PRIMARY KEY,
275 num INTEGER NOT NULL,
276 name VARCHAR(255) NOT NULL,
277 mjesto VARCHAR(255)
278 );
279 });
280
281 $dbh->do(qq{
282 create table files (
283 ID SERIAL PRIMARY KEY,
284 shareID INTEGER NOT NULL references shares(id),
285 backupNum INTEGER NOT NULL,
286 name VARCHAR(255) NOT NULL,
287 path VARCHAR(255) NOT NULL,
288 date integer NOT NULL,
289 type INTEGER NOT NULL,
290 size INTEGER NOT NULL,
291 dvdid INTEGER references dvds(id)
292 );
293 });
294
295 print "creating indexes:";
296
297 foreach my $index (qw(
298 hosts_name
299 backups_hostID
300 backups_num
301 shares_hostID
302 shares_name
303 files_shareID
304 files_path
305 files_name
306 files_date
307 files_size
308 )) {
309 print " $index";
310 do_index($index);
311 }
312 print "...\n";
313
314 $dbh->commit;
315
316 }
317
318 ## delete data before inseting ##
319 if ($opt{d}) {
320 print "deleting ";
321 foreach my $table (qw(files dvds backups shares hosts)) {
322 print "$table ";
323 $dbh->do(qq{ DELETE FROM $table });
324 }
325 print " done...\n";
326
327 $dbh->commit;
328 }
329
330 ## insert new values ##
331
332 # get hosts
333 $hosts = $bpc->HostInfoRead();
334 my $hostID;
335 my $shareID;
336
337 my $sth;
338
339 $sth->{insert_hosts} = $dbh->prepare(qq{
340 INSERT INTO hosts (name, IP) VALUES (?,?)
341 });
342
343 $sth->{hosts_by_name} = $dbh->prepare(qq{
344 SELECT ID FROM hosts WHERE name=?
345 });
346
347 $sth->{backups_count} = $dbh->prepare(qq{
348 SELECT COUNT(*)
349 FROM backups
350 WHERE hostID=? AND num=? AND shareid=?
351 });
352
353 $sth->{insert_backups} = $dbh->prepare(qq{
354 INSERT INTO backups (hostID, num, date, type, shareid, size)
355 VALUES (?,?,?,?,?,?)
356 });
357
358 $sth->{insert_files} = $dbh->prepare(qq{
359 INSERT INTO files
360 (shareID, backupNum, name, path, date, type, size)
361 VALUES (?,?,?,?,?,?,?)
362 });
363
364 foreach my $host_key (keys %{$hosts}) {
365
366 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
367
368 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
369
370 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
371 $sth->{insert_hosts}->execute(
372 $hosts->{$host_key}->{'host'},
373 $hosts->{$host_key}->{'ip'}
374 );
375
376 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
377 }
378
379 print "host ".$hosts->{$host_key}->{'host'}.": ";
380
381 # get backups for a host
382 my @backups = $bpc->BackupInfoRead($hostname);
383 my $incs = scalar @backups;
384 print "$incs increments\n";
385
386 my $inc_nr = 0;
387 $beenThere = {};
388
389 foreach my $backup (@backups) {
390
391 $inc_nr++;
392 last if ($opt{m} && $inc_nr > $opt{m});
393
394 my $backupNum = $backup->{'num'};
395 my @backupShares = ();
396
397 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
398 $hosts->{$host_key}->{'host'},
399 $inc_nr, $incs, $backupNum,
400 $backup->{type} || '?',
401 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
402 strftime($t_fmt,localtime($backup->{startTime})),
403 fmt_time($backup->{endTime} - $backup->{startTime})
404 );
405
406 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
407 foreach my $share ($files->shareList($backupNum)) {
408
409 my $t = time();
410
411 $shareID = getShareID($share, $hostID, $hostname);
412
413 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
414 my ($count) = $sth->{backups_count}->fetchrow_array();
415 # skip if allready in database!
416 next if ($count > 0);
417
418 # dump some log
419 print curr_time," ", $share;
420
421 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
422
423 $sth->{insert_backups}->execute(
424 $hostID,
425 $backupNum,
426 $backup->{'endTime'},
427 $backup->{'type'},
428 $shareID,
429 $size,
430 );
431
432 print " commit";
433 $dbh->commit();
434
435 my $dur = (time() - $t) || 1;
436 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
437 $nf, $f, $nd, $d,
438 ($size / 1024 / 1024),
439 ( ($f+$d) / $dur ),
440 fmt_time($dur)
441 );
442
443 hest_update($hostID, $shareID, $backupNum);
444 }
445
446 }
447 }
448 undef $sth;
449 $dbh->commit();
450 $dbh->disconnect();
451
452 print "total duration: ",fmt_time(time() - $start_t),"\n";
453
454 $pidfile->remove;
455
456 sub getShareID() {
457
458 my ($share, $hostID, $hostname) = @_;
459
460 $sth->{share_id} ||= $dbh->prepare(qq{
461 SELECT ID FROM shares WHERE hostID=? AND name=?
462 });
463
464 $sth->{share_id}->execute($hostID,$share);
465
466 my ($id) = $sth->{share_id}->fetchrow_array();
467
468 return $id if (defined($id));
469
470 $sth->{insert_share} ||= $dbh->prepare(qq{
471 INSERT INTO shares
472 (hostID,name,share,localpath)
473 VALUES (?,?,?,?)
474 });
475
476 my $drop_down = $hostname . '/' . $share;
477 $drop_down =~ s#//+#/#g;
478
479 $sth->{insert_share}->execute($hostID,$share, $drop_down ,undef);
480 return $dbh->last_insert_id(undef,undef,'shares',undef);
481 }
482
483 sub found_in_db {
484
485 my @data = @_;
486 shift @data;
487
488 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
489
490 return $beenThere->{$key} if (defined($beenThere->{$key}));
491
492 $sth->{file_in_db} ||= $dbh->prepare(qq{
493 SELECT 1 FROM files
494 WHERE shareID = ? and
495 path = ? and
496 date = ? and
497 size = ?
498 LIMIT 1
499 });
500
501 my @param = ($shareID,$path,$date,$size);
502 $sth->{file_in_db}->execute(@param);
503 my $rows = $sth->{file_in_db}->rows;
504 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
505
506 $beenThere->{$key}++;
507
508 $sth->{'insert_files'}->execute(@data) unless ($rows);
509 return $rows;
510 }
511
512 ####################################################
513 # recursing through filesystem structure and #
514 # and returning flattened files list #
515 ####################################################
516 sub recurseDir($$$$$$$$) {
517
518 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
519
520 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
521
522 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
523
524 { # scope
525 my @stack;
526
527 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
528 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
529
530 # first, add all the entries in current directory
531 foreach my $path_key (keys %{$filesInBackup}) {
532 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
533 my @data = (
534 $shareID,
535 $backupNum,
536 $path_key,
537 $filesInBackup->{$path_key}->{'relPath'},
538 $filesInBackup->{$path_key}->{'mtime'},
539 $filesInBackup->{$path_key}->{'type'},
540 $filesInBackup->{$path_key}->{'size'}
541 );
542
543 my $key = join(" ", (
544 $shareID,
545 $dir,
546 $path_key,
547 $filesInBackup->{$path_key}->{'mtime'},
548 $filesInBackup->{$path_key}->{'size'}
549 ));
550
551 my $found;
552 if (! defined($beenThere->{$key}) && ! ($found = found_in_db($key, @data)) ) {
553 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
554
555 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
556 $new_dirs++ unless ($found);
557 print STDERR " dir\n" if ($debug >= 2);
558 } else {
559 $new_files++ unless ($found);
560 print STDERR " file\n" if ($debug >= 2);
561 }
562 $size += $filesInBackup->{$path_key}->{'size'} || 0;
563 }
564
565 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
566 $nr_dirs++;
567
568 my $full_path = $dir . '/' . $path_key;
569 push @stack, $full_path;
570 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
571
572 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
573 #
574 # $nr_files += $f;
575 # $new_files += $nf;
576 # $nr_dirs += $d;
577 # $new_dirs += $nd;
578
579 } else {
580 $nr_files++;
581 }
582 }
583
584 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
585
586 while ( my $dir = shift @stack ) {
587 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
588 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
589 $nr_files += $f;
590 $new_files += $nf;
591 $nr_dirs += $d;
592 $new_dirs += $nd;
593 $size += $s;
594 }
595 }
596
597 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
598 }
599

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26