/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 152 - (show annotations)
Mon Oct 10 11:43:08 2005 UTC (18 years, 7 months ago) by dpavlin
File size: 15270 byte(s)
cleanup database schema

1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
15
16 use constant BPC_FTYPE_DIR => 5;
17 use constant EST_CHUNK => 100000;
18
19 my $debug = 0;
20 $|=1;
21
22 my $start_t = time();
23
24 my $pidfile = new File::Pid;
25
26 if (my $pid = $pidfile->running ) {
27 die "$0 already running: $pid\n";
28 } elsif ($pidfile->pid ne $$) {
29 $pidfile->remove;
30 $pidfile = new File::Pid;
31 }
32 $pidfile->write;
33 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
34
35 my $t_fmt = '%Y-%m-%d %H:%M:%S';
36
37 my $hosts;
38 my $bpc = BackupPC::Lib->new || die;
39 my %Conf = $bpc->Conf();
40 my $TopDir = $bpc->TopDir();
41 my $beenThere = {};
42
43 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
44 my $user = $Conf{SearchUser} || '';
45
46 my $use_hest = $Conf{HyperEstraierIndex};
47 my ($index_path, $index_node_url) = BackupPC::SearchLib::getHyperEstraier_url($use_hest);
48
49 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
50
51 my %opt;
52
53 if ( !getopts("cdm:v:ij", \%opt ) ) {
54 print STDERR <<EOF;
55 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
56
57 Options:
58 -c create database on first use
59 -d delete database before import
60 -m num import just num increments for one host
61 -v num set verbosity (debug) level (default $debug)
62 -i update Hyper Estraier full text index
63 -j update full text, don't check existing files
64
65 Option -j is variation on -i. It will allow faster initial creation
66 of full-text index from existing database.
67
68 EOF
69 exit 1;
70 }
71
72 if ($opt{v}) {
73 print "Debug level at $opt{v}\n";
74 $debug = $opt{v};
75 }
76
77 #---- subs ----
78
79 sub fmt_time {
80 my $t = shift || return;
81 my $out = "";
82 my ($ss,$mm,$hh) = gmtime($t);
83 $out .= "${hh}h" if ($hh);
84 $out .= sprintf("%02d:%02d", $mm,$ss);
85 return $out;
86 }
87
88 sub curr_time {
89 return strftime($t_fmt,localtime());
90 }
91
92 my $hest_db;
93 my $hest_node;
94
95 sub signal {
96 my($sig) = @_;
97 if ($hest_db) {
98 print "\nCaught a SIG$sig--syncing database and shutting down\n";
99 $hest_db->sync();
100 $hest_db->close();
101 }
102 exit(0);
103 }
104
105 $SIG{'INT'} = \&signal;
106 $SIG{'QUIT'} = \&signal;
107
108 sub hest_update {
109
110 my ($host_id, $share_id, $num) = @_;
111
112 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
113
114 unless ($use_hest) {
115 print STDERR "HyperEstraier support not enabled in configuration\n";
116 return;
117 }
118
119 print curr_time," updating HyperEstraier:";
120
121 my $t = time();
122
123 my $offset = 0;
124 my $added = 0;
125
126 print " opening index $use_hest";
127 if ($index_path) {
128 $hest_db = HyperEstraier::Database->new();
129 $hest_db->open($TopDir . $index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
130 print " directly";
131 } elsif ($index_node_url) {
132 $hest_node ||= HyperEstraier::Node->new($index_node_url);
133 $hest_node->set_auth('admin', 'admin');
134 print " via node URL";
135 } else {
136 die "don't know how to use HyperEstraier Index $use_hest";
137 }
138 print " increment is " . EST_CHUNK . " files:";
139
140 my $results = 0;
141
142 do {
143
144 my $where = '';
145 my @data;
146 if ($host_id && $share_id && $num) {
147 $where = qq{
148 WHERE
149 hosts.id = ? AND
150 shares.id = ? AND
151 files.backupnum = ?
152 };
153 @data = ( $host_id, $share_id, $num );
154 }
155
156 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
157
158 my $sth = $dbh->prepare(qq{
159 SELECT
160 files.id AS fid,
161 hosts.name AS hname,
162 shares.name AS sname,
163 -- shares.share AS sharename,
164 files.backupnum AS backupnum,
165 -- files.name AS filename,
166 files.path AS filepath,
167 files.date AS date,
168 files.type AS type,
169 files.size AS size,
170 files.shareid AS shareid,
171 backups.date AS backup_date
172 FROM files
173 INNER JOIN shares ON files.shareID=shares.ID
174 INNER JOIN hosts ON hosts.ID = shares.hostID
175 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
176 $where
177 $limit
178 });
179
180 $sth->execute(@data);
181 $results = $sth->rows;
182
183 if ($results == 0) {
184 print " - no new files\n";
185 last;
186 }
187
188 sub fmt_date {
189 my $t = shift || return;
190 my $iso = BackupPC::Lib::timeStamp($t);
191 $iso =~ s/\s/T/;
192 return $iso;
193 }
194
195 while (my $row = $sth->fetchrow_hashref()) {
196
197 my $fid = $row->{'fid'} || die "no fid?";
198 my $uri = 'file:///' . $fid;
199
200 unless ($skip_check) {
201 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
202 next unless ($id == -1);
203 }
204
205 # create a document object
206 my $doc = HyperEstraier::Document->new;
207
208 # add attributes to the document object
209 $doc->add_attr('@uri', $uri);
210
211 foreach my $c (@{ $sth->{NAME} }) {
212 $doc->add_attr($c, $row->{$c}) if ($row->{$c});
213 }
214
215 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
216
217 # add the body text to the document object
218 my $path = $row->{'filepath'};
219 $doc->add_text($path);
220 $path =~ s/(.)/$1 /g;
221 $doc->add_hidden_text($path);
222
223 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
224
225 # register the document object to the database
226 if ($hest_db) {
227 $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
228 } elsif ($hest_node) {
229 $hest_node->put_doc($doc);
230 } else {
231 die "not supported";
232 }
233 $added++;
234 }
235
236 print " $added";
237 $hest_db->sync() if ($index_path);
238
239 $offset += EST_CHUNK;
240
241 } while ($results == EST_CHUNK);
242
243 if ($index_path) {
244 print ", close";
245 $hest_db->close();
246 }
247
248 my $dur = (time() - $t) || 1;
249 printf(" [%.2f/s dur: %s]\n",
250 ( $added / $dur ),
251 fmt_time($dur)
252 );
253 }
254
255 #---- /subs ----
256
257
258 ## update index ##
259 if (($opt{i} || $opt{j} || ($index_path && ! -e $index_path)) && !$opt{c}) {
260 # update all
261 print "force update of HyperEstraier index ";
262 print "importing existing data" unless (-e $index_path);
263 print "by -i flag" if ($opt{i});
264 print "by -j flag" if ($opt{j});
265 print "\n";
266 hest_update();
267 }
268
269 ## create tables ##
270 if ($opt{c}) {
271 sub do_index {
272 my $index = shift || return;
273 my ($table,$col,$unique) = split(/:/, $index);
274 $unique ||= '';
275 $index =~ s/\W+/_/g;
276 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
277 $dbh->do(qq{ create $unique index $index on $table($col) });
278 }
279
280 print "creating tables...\n";
281
282 $dbh->do(qq{
283 create table hosts (
284 ID SERIAL PRIMARY KEY,
285 name VARCHAR(30) NOT NULL,
286 IP VARCHAR(15)
287 );
288 });
289
290 $dbh->do(qq{
291 create table shares (
292 ID SERIAL PRIMARY KEY,
293 hostID INTEGER NOT NULL references hosts(id),
294 name VARCHAR(30) NOT NULL,
295 share VARCHAR(200) NOT NULL
296 );
297 });
298
299 $dbh->do(qq{
300 create table dvds (
301 ID SERIAL PRIMARY KEY,
302 num INTEGER NOT NULL,
303 name VARCHAR(255) NOT NULL,
304 mjesto VARCHAR(255)
305 );
306 });
307
308 $dbh->do(qq{
309 create table backups (
310 id serial,
311 hostID INTEGER NOT NULL references hosts(id),
312 num INTEGER NOT NULL,
313 date integer NOT NULL,
314 type CHAR(4) not null,
315 shareID integer not null references shares(id),
316 size bigint not null,
317 inc_size bigint not null default -1,
318 inc_deleted boolean default false,
319 PRIMARY KEY(id)
320 );
321 });
322
323 $dbh->do(qq{
324 create table files (
325 ID SERIAL,
326 shareID INTEGER NOT NULL references shares(id),
327 backupNum INTEGER NOT NULL,
328 name VARCHAR(255) NOT NULL,
329 path VARCHAR(255) NOT NULL,
330 date integer NOT NULL,
331 type INTEGER NOT NULL,
332 size bigint NOT NULL,
333 primary key(id)
334 );
335 });
336
337
338 $dbh->do( qq{
339 create table archive (
340 id serial,
341 dvd_nr int not null,
342 total_size bigint default -1,
343 note text,
344 username varchar(20) not null,
345 date timestamp default now(),
346 primary key(id)
347 );
348 }
349 );
350
351 $dbh->do( qq{
352 create table archive_backup
353 (
354 archive_id int not null references archive(id),
355 backup_id int not null references backups(id),
356 primary key(archive_id, backup_id)
357 );
358 });
359
360 print "creating indexes:";
361
362 foreach my $index (qw(
363 hosts:name
364 backups:hostID
365 backups:num
366 backups:shareID
367 shares:hostID
368 shares:name
369 files:shareID
370 files:path
371 files:name
372 files:date
373 files:size
374 archive:dvd_nr
375 )) {
376 do_index($index);
377 }
378 print "...\n";
379
380 $dbh->commit;
381
382 }
383
384 ## delete data before inseting ##
385 if ($opt{d}) {
386 print "deleting ";
387 foreach my $table (qw(files dvds backups shares hosts)) {
388 print "$table ";
389 $dbh->do(qq{ DELETE FROM $table });
390 }
391 print " done...\n";
392
393 $dbh->commit;
394 }
395
396 ## insert new values ##
397
398 # get hosts
399 $hosts = $bpc->HostInfoRead();
400 my $hostID;
401 my $shareID;
402
403 my $sth;
404
405 $sth->{insert_hosts} = $dbh->prepare(qq{
406 INSERT INTO hosts (name, IP) VALUES (?,?)
407 });
408
409 $sth->{hosts_by_name} = $dbh->prepare(qq{
410 SELECT ID FROM hosts WHERE name=?
411 });
412
413 $sth->{backups_count} = $dbh->prepare(qq{
414 SELECT COUNT(*)
415 FROM backups
416 WHERE hostID=? AND num=? AND shareid=?
417 });
418
419 $sth->{insert_backups} = $dbh->prepare(qq{
420 INSERT INTO backups (hostID, num, date, type, shareid, size)
421 VALUES (?,?,?,?,?,?)
422 });
423
424 $sth->{insert_files} = $dbh->prepare(qq{
425 INSERT INTO files
426 (shareID, backupNum, name, path, date, type, size)
427 VALUES (?,?,?,?,?,?,?)
428 });
429
430 foreach my $host_key (keys %{$hosts}) {
431
432 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
433
434 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
435
436 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
437 $sth->{insert_hosts}->execute(
438 $hosts->{$host_key}->{'host'},
439 $hosts->{$host_key}->{'ip'}
440 );
441
442 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
443 }
444
445 print "host ".$hosts->{$host_key}->{'host'}.": ";
446
447 # get backups for a host
448 my @backups = $bpc->BackupInfoRead($hostname);
449 my $incs = scalar @backups;
450 print "$incs increments\n";
451
452 my $inc_nr = 0;
453 $beenThere = {};
454
455 foreach my $backup (@backups) {
456
457 $inc_nr++;
458 last if ($opt{m} && $inc_nr > $opt{m});
459
460 my $backupNum = $backup->{'num'};
461 my @backupShares = ();
462
463 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
464 $hosts->{$host_key}->{'host'},
465 $inc_nr, $incs, $backupNum,
466 $backup->{type} || '?',
467 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
468 strftime($t_fmt,localtime($backup->{startTime})),
469 fmt_time($backup->{endTime} - $backup->{startTime})
470 );
471
472 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
473 foreach my $share ($files->shareList($backupNum)) {
474
475 my $t = time();
476
477 $shareID = getShareID($share, $hostID, $hostname);
478
479 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
480 my ($count) = $sth->{backups_count}->fetchrow_array();
481 # skip if allready in database!
482 next if ($count > 0);
483
484 # dump some log
485 print curr_time," ", $share;
486
487 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
488
489 $sth->{insert_backups}->execute(
490 $hostID,
491 $backupNum,
492 $backup->{'endTime'},
493 substr($backup->{'type'},0,4),
494 $shareID,
495 $size,
496 );
497
498 print " commit";
499 $dbh->commit();
500
501 my $dur = (time() - $t) || 1;
502 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
503 $nf, $f, $nd, $d,
504 ($size / 1024 / 1024),
505 ( ($f+$d) / $dur ),
506 fmt_time($dur)
507 );
508
509 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
510 }
511
512 }
513 }
514 undef $sth;
515 $dbh->commit();
516 $dbh->disconnect();
517
518 print "total duration: ",fmt_time(time() - $start_t),"\n";
519
520 $pidfile->remove;
521
522 sub getShareID() {
523
524 my ($share, $hostID, $hostname) = @_;
525
526 $sth->{share_id} ||= $dbh->prepare(qq{
527 SELECT ID FROM shares WHERE hostID=? AND name=?
528 });
529
530 $sth->{share_id}->execute($hostID,$share);
531
532 my ($id) = $sth->{share_id}->fetchrow_array();
533
534 return $id if (defined($id));
535
536 $sth->{insert_share} ||= $dbh->prepare(qq{
537 INSERT INTO shares
538 (hostID,name,share)
539 VALUES (?,?,?)
540 });
541
542 my $drop_down = $hostname . '/' . $share;
543 $drop_down =~ s#//+#/#g;
544
545 $sth->{insert_share}->execute($hostID,$share, $drop_down);
546 return $dbh->last_insert_id(undef,undef,'shares',undef);
547 }
548
549 sub found_in_db {
550
551 my @data = @_;
552 shift @data;
553
554 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
555
556 return $beenThere->{$key} if (defined($beenThere->{$key}));
557
558 $sth->{file_in_db} ||= $dbh->prepare(qq{
559 SELECT 1 FROM files
560 WHERE shareID = ? and
561 path = ? and
562 date = ? and
563 size = ?
564 LIMIT 1
565 });
566
567 my @param = ($shareID,$path,$date,$size);
568 $sth->{file_in_db}->execute(@param);
569 my $rows = $sth->{file_in_db}->rows;
570 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
571
572 $beenThere->{$key}++;
573
574 $sth->{'insert_files'}->execute(@data) unless ($rows);
575 return $rows;
576 }
577
578 ####################################################
579 # recursing through filesystem structure and #
580 # and returning flattened files list #
581 ####################################################
582 sub recurseDir($$$$$$$$) {
583
584 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
585
586 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
587
588 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
589
590 { # scope
591 my @stack;
592
593 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
594 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
595
596 # first, add all the entries in current directory
597 foreach my $path_key (keys %{$filesInBackup}) {
598 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
599 my @data = (
600 $shareID,
601 $backupNum,
602 $path_key,
603 $filesInBackup->{$path_key}->{'relPath'},
604 $filesInBackup->{$path_key}->{'mtime'},
605 $filesInBackup->{$path_key}->{'type'},
606 $filesInBackup->{$path_key}->{'size'}
607 );
608
609 my $key = join(" ", (
610 $shareID,
611 $dir,
612 $path_key,
613 $filesInBackup->{$path_key}->{'mtime'},
614 $filesInBackup->{$path_key}->{'size'}
615 ));
616
617 my $found;
618 if (! defined($beenThere->{$key}) && ! ($found = found_in_db($key, @data)) ) {
619 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
620
621 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
622 $new_dirs++ unless ($found);
623 print STDERR " dir\n" if ($debug >= 2);
624 } else {
625 $new_files++ unless ($found);
626 print STDERR " file\n" if ($debug >= 2);
627 }
628 $size += $filesInBackup->{$path_key}->{'size'} || 0;
629 }
630
631 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
632 $nr_dirs++;
633
634 my $full_path = $dir . '/' . $path_key;
635 push @stack, $full_path;
636 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
637
638 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
639 #
640 # $nr_files += $f;
641 # $new_files += $nf;
642 # $nr_dirs += $d;
643 # $new_dirs += $nd;
644
645 } else {
646 $nr_files++;
647 }
648 }
649
650 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
651
652 while ( my $dir = shift @stack ) {
653 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
654 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
655 $nr_files += $f;
656 $new_files += $nf;
657 $nr_dirs += $d;
658 $new_dirs += $nd;
659 $size += $s;
660 }
661 }
662
663 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
664 }
665

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26