/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 82 - (hide annotations)
Sun Aug 28 09:12:54 2005 UTC (18 years, 9 months ago) by dpavlin
File size: 12528 byte(s)
added HyperEstraierIndex configuration option

1 dpavlin 14 #!/usr/local/bin/perl -w
2 dpavlin 6
3     use strict;
4 dpavlin 48 use lib "__INSTALLDIR__/lib";
5    
6 dpavlin 6 use DBI;
7     use BackupPC::Lib;
8     use BackupPC::View;
9     use Data::Dumper;
10     use Getopt::Std;
11 dpavlin 37 use Time::HiRes qw/time/;
12 dpavlin 38 use File::Pid;
13 dpavlin 37 use POSIX qw/strftime/;
14 dpavlin 48
15 dpavlin 6 use constant BPC_FTYPE_DIR => 5;
16    
17 dpavlin 30 my $debug = 0;
18 dpavlin 14 $|=1;
19 dpavlin 6
20 dpavlin 51 my $start_t = time();
21    
22 dpavlin 38 my $pidfile = new File::Pid;
23    
24     if (my $pid = $pidfile->running ) {
25     die "$0 already running: $pid\n";
26     } elsif ($pidfile->pid ne $$) {
27     $pidfile->remove;
28     $pidfile = new File::Pid;
29     }
30 dpavlin 39 $pidfile->write;
31     print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
32 dpavlin 38
33 dpavlin 37 my $t_fmt = '%Y-%m-%d %H:%M:%S';
34    
35 dpavlin 6 my $hosts;
36     my $bpc = BackupPC::Lib->new || die;
37     my %Conf = $bpc->Conf();
38     my $TopDir = $bpc->TopDir();
39 dpavlin 14 my $beenThere = {};
40 dpavlin 6
41 dpavlin 51 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
42     my $user = $Conf{SearchUser} || '';
43 dpavlin 82 my $index_path = $Conf{HyperEstraierIndex};
44 dpavlin 6
45 dpavlin 49 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
46    
47 dpavlin 6 my %opt;
48    
49 dpavlin 81 if ( !getopts("cdm:v:i", \%opt ) ) {
50 dpavlin 6 print STDERR <<EOF;
51 dpavlin 81 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
52 dpavlin 6
53     Options:
54 dpavlin 14 -c create database on first use
55     -d delete database before import
56     -m num import just num increments for one host
57 dpavlin 44 -v num set verbosity (debug) level (default $debug)
58 dpavlin 81 -i update HyperEstraier full text index
59 dpavlin 6 EOF
60     exit 1;
61     }
62    
63 dpavlin 81 if ($opt{v}) {
64     print "Debug level at $opt{v}\n";
65     $debug = $opt{v};
66     }
67    
68     ## update index ##
69     if ($opt{i}) {
70    
71 dpavlin 82 print "updating HyperEstraier files ";
72    
73 dpavlin 81 my $sth = $dbh->prepare(qq{
74     SELECT
75     files.id AS fid,
76     hosts.name AS hname,
77     shares.name AS sname,
78     shares.share AS sharename,
79     files.backupNum AS backupNum,
80     files.name AS filename,
81     files.path AS filepath,
82     files.date AS date,
83     files.type AS filetype,
84     files.size AS size,
85     files.shareid AS shareid
86     FROM files
87     INNER JOIN shares ON files.shareID=shares.ID
88     INNER JOIN hosts ON hosts.ID = shares.hostID
89     INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
90     });
91    
92     $sth->execute();
93    
94     my $dot = int($sth->rows / 15);
95    
96     print $sth->rows, " files ($dot/#) ";
97    
98     sub fmt_date {
99     my $t = shift || return;
100     my $iso = BackupPC::Lib::timeStamp($t);
101     $iso =~ s/\s/T/;
102     return $iso;
103     }
104    
105     my $i = 0;
106     my $max = int($sth->rows / $dot);
107    
108 dpavlin 82 $index_path = $TopDir . '/' . $index_path;
109     $index_path =~ s#//#/#g;
110    
111     print "index $index_path...";
112     use HyperEstraier;
113     my $db = HyperEstraier::Database->new();
114     $db->open($index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
115    
116    
117 dpavlin 81 while (my $row = $sth->fetchrow_hashref()) {
118    
119     # create a document object
120     my $doc = HyperEstraier::Document->new;
121    
122     # add attributes to the document object
123     $doc->add_attr('@uri', 'file:///' . $row->{'fid'});
124    
125     foreach my $c (qw/fid hname sname sharename backupNum filename filepath shareid/) {
126     $doc->add_attr($c, $row->{$c}) if ($row->{$c});
127     }
128    
129     $doc->add_attr('date', fmt_date($row->{'date'}));
130    
131     # add the body text to the document object
132     my $path = $row->{'filepath'};
133     $doc->add_text($path);
134     $path =~ s/(.)/$1 /g;
135     $doc->add_hidden_text($path);
136    
137     print STDERR $doc->dump_draft,"\n" if ($debug > 1);
138    
139     # register the document object to the database
140     $db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
141    
142     $i++;
143     if ($i % $dot == 0) {
144     print "$max ";
145     $max--;
146     }
147    
148     }
149    
150     print "sync";
151     $db->sync();
152     print " close\n";
153     $db->close();
154    
155     exit;
156     }
157    
158 dpavlin 6 ###################################create tables############################3
159    
160 dpavlin 14 if ($opt{c}) {
161 dpavlin 49 sub do_index {
162     my $index = shift || return;
163     my ($table,$col,$unique) = split(/_/, $index);
164     $unique ||= '';
165 dpavlin 52 $index =~ s/,/_/g;
166 dpavlin 49 $dbh->do(qq{ create $unique index $index on $table($col) });
167     }
168    
169 dpavlin 14 print "creating tables...\n";
170 dpavlin 6
171 dpavlin 14 $dbh->do(qq{
172     create table hosts (
173 dpavlin 49 ID SERIAL PRIMARY KEY,
174 dpavlin 14 name VARCHAR(30) NOT NULL,
175     IP VARCHAR(15)
176 dpavlin 6 );
177 dpavlin 14 });
178 dpavlin 6
179 dpavlin 14 $dbh->do(qq{
180     create table shares (
181 dpavlin 49 ID SERIAL PRIMARY KEY,
182 dpavlin 14 hostID INTEGER NOT NULL references hosts(id),
183     name VARCHAR(30) NOT NULL,
184     share VARCHAR(200) NOT NULL,
185     localpath VARCHAR(200)
186 dpavlin 6 );
187 dpavlin 14 });
188 dpavlin 6
189 dpavlin 14 $dbh->do(qq{
190     create table backups (
191     hostID INTEGER NOT NULL references hosts(id),
192     num INTEGER NOT NULL,
193 dpavlin 49 date integer NOT NULL,
194     type CHAR(4) not null,
195 dpavlin 65 shareID integer not null references shares(id),
196 dpavlin 66 size integer not null,
197 dpavlin 65 PRIMARY KEY(hostID, num, shareID)
198 dpavlin 6 );
199 dpavlin 14 });
200 dpavlin 6
201 dpavlin 65 #do_index('backups_hostid,num_unique');
202 dpavlin 49
203 dpavlin 14 $dbh->do(qq{
204     create table dvds (
205 dpavlin 49 ID SERIAL PRIMARY KEY,
206 dpavlin 14 num INTEGER NOT NULL,
207     name VARCHAR(255) NOT NULL,
208     mjesto VARCHAR(255)
209 dpavlin 6 );
210 dpavlin 14 });
211 dpavlin 6
212 dpavlin 14 $dbh->do(qq{
213     create table files (
214 dpavlin 49 ID SERIAL PRIMARY KEY,
215 dpavlin 14 shareID INTEGER NOT NULL references shares(id),
216 dpavlin 52 backupNum INTEGER NOT NULL,
217 dpavlin 14 name VARCHAR(255) NOT NULL,
218     path VARCHAR(255) NOT NULL,
219 dpavlin 49 date integer NOT NULL,
220 dpavlin 14 type INTEGER NOT NULL,
221     size INTEGER NOT NULL,
222     dvdid INTEGER references dvds(id)
223 dpavlin 6 );
224 dpavlin 14 });
225 dpavlin 6
226 dpavlin 49 print "creating indexes:";
227 dpavlin 6
228 dpavlin 14 foreach my $index (qw(
229     hosts_name
230     backups_hostID
231     backups_num
232     shares_hostID
233     shares_name
234     files_shareID
235     files_path
236     files_name
237     files_date
238     files_size
239     )) {
240 dpavlin 49 print " $index";
241     do_index($index);
242 dpavlin 14 }
243 dpavlin 49 print "...\n";
244 dpavlin 14
245 dpavlin 49 $dbh->commit;
246 dpavlin 14
247     }
248    
249     if ($opt{d}) {
250     print "deleting ";
251 dpavlin 49 foreach my $table (qw(files dvds backups shares hosts)) {
252 dpavlin 14 print "$table ";
253     $dbh->do(qq{ DELETE FROM $table });
254     }
255     print " done...\n";
256 dpavlin 49
257 dpavlin 51 $dbh->commit;
258 dpavlin 14 }
259    
260 dpavlin 6 #################################INSERT VALUES#############################
261    
262     # get hosts
263 dpavlin 8 $hosts = $bpc->HostInfoRead();
264 dpavlin 6 my $hostID;
265     my $shareID;
266    
267 dpavlin 14 my $sth;
268 dpavlin 6
269 dpavlin 14 $sth->{insert_hosts} = $dbh->prepare(qq{
270     INSERT INTO hosts (name, IP) VALUES (?,?)
271     });
272 dpavlin 6
273 dpavlin 14 $sth->{hosts_by_name} = $dbh->prepare(qq{
274     SELECT ID FROM hosts WHERE name=?
275     });
276    
277 dpavlin 65 $sth->{backups_count} = $dbh->prepare(qq{
278 dpavlin 14 SELECT COUNT(*)
279     FROM backups
280 dpavlin 65 WHERE hostID=? AND num=? AND shareid=?
281 dpavlin 14 });
282    
283     $sth->{insert_backups} = $dbh->prepare(qq{
284 dpavlin 66 INSERT INTO backups (hostID, num, date, type, shareid, size)
285     VALUES (?,?,?,?,?,?)
286 dpavlin 14 });
287    
288     $sth->{insert_files} = $dbh->prepare(qq{
289     INSERT INTO files
290 dpavlin 62 (shareID, backupNum, name, path, date, type, size)
291     VALUES (?,?,?,?,?,?,?)
292 dpavlin 14 });
293    
294 dpavlin 50 sub fmt_time {
295     my $t = shift || return;
296     my $out = "";
297     my ($ss,$mm,$hh) = gmtime($t);
298     $out .= "${hh}h" if ($hh);
299     $out .= sprintf("%02d:%02d", $mm,$ss);
300     return $out;
301     }
302    
303 dpavlin 14 foreach my $host_key (keys %{$hosts}) {
304    
305     my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
306    
307     $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
308    
309     unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
310     $sth->{insert_hosts}->execute(
311     $hosts->{$host_key}->{'host'},
312     $hosts->{$host_key}->{'ip'}
313     );
314    
315 dpavlin 49 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
316 dpavlin 14 }
317    
318 dpavlin 67 print "host ".$hosts->{$host_key}->{'host'}.": ";
319 dpavlin 6
320 dpavlin 14 # get backups for a host
321     my @backups = $bpc->BackupInfoRead($hostname);
322 dpavlin 67 my $incs = scalar @backups;
323     print "$incs increments\n";
324 dpavlin 6
325 dpavlin 14 my $inc_nr = 0;
326 dpavlin 67 $beenThere = {};
327 dpavlin 14
328     foreach my $backup (@backups) {
329 dpavlin 40
330 dpavlin 14 $inc_nr++;
331     last if ($opt{m} && $inc_nr > $opt{m});
332    
333     my $backupNum = $backup->{'num'};
334     my @backupShares = ();
335    
336 dpavlin 67 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
337     $hosts->{$host_key}->{'host'},
338     $inc_nr, $incs, $backupNum,
339     $backup->{type} || '?',
340     $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
341 dpavlin 57 strftime($t_fmt,localtime($backup->{startTime})),
342 dpavlin 67 fmt_time($backup->{endTime} - $backup->{startTime})
343     );
344 dpavlin 14
345 dpavlin 34 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
346 dpavlin 14 foreach my $share ($files->shareList($backupNum)) {
347    
348 dpavlin 37 my $t = time();
349    
350 dpavlin 14 $shareID = getShareID($share, $hostID, $hostname);
351    
352 dpavlin 65 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
353     my ($count) = $sth->{backups_count}->fetchrow_array();
354     # skip if allready in database!
355     next if ($count > 0);
356    
357     # dump some log
358     print strftime($t_fmt,localtime())," ", $share;
359    
360 dpavlin 66 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
361 dpavlin 65
362     $sth->{insert_backups}->execute(
363     $hostID,
364     $backupNum,
365     $backup->{'endTime'},
366     $backup->{'type'},
367 dpavlin 66 $shareID,
368     $size,
369 dpavlin 65 );
370    
371     print " commit";
372     $dbh->commit();
373    
374 dpavlin 50 my $dur = (time() - $t) || 1;
375 dpavlin 66 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
376 dpavlin 37 $nf, $f, $nd, $d,
377 dpavlin 66 ($size / 1024 / 1024),
378 dpavlin 50 ( ($f+$d) / $dur ),
379     fmt_time($dur)
380 dpavlin 37 );
381 dpavlin 14 }
382 dpavlin 29
383 dpavlin 6 }
384     }
385 dpavlin 14 undef $sth;
386 dpavlin 6 $dbh->commit();
387     $dbh->disconnect();
388    
389 dpavlin 51 print "total duration: ",fmt_time(time() - $start_t),"\n";
390    
391 dpavlin 38 $pidfile->remove;
392    
393 dpavlin 14 sub getShareID() {
394 dpavlin 6
395 dpavlin 14 my ($share, $hostID, $hostname) = @_;
396    
397     $sth->{share_id} ||= $dbh->prepare(qq{
398     SELECT ID FROM shares WHERE hostID=? AND name=?
399     });
400    
401     $sth->{share_id}->execute($hostID,$share);
402    
403     my ($id) = $sth->{share_id}->fetchrow_array();
404    
405     return $id if (defined($id));
406    
407     $sth->{insert_share} ||= $dbh->prepare(qq{
408     INSERT INTO shares
409     (hostID,name,share,localpath)
410     VALUES (?,?,?,?)
411     });
412    
413 dpavlin 25 my $drop_down = $hostname . '/' . $share;
414     $drop_down =~ s#//+#/#g;
415    
416     $sth->{insert_share}->execute($hostID,$share, $drop_down ,undef);
417 dpavlin 49 return $dbh->last_insert_id(undef,undef,'shares',undef);
418 dpavlin 6 }
419    
420 dpavlin 14 sub found_in_db {
421    
422 dpavlin 48 my @data = @_;
423     shift @data;
424 dpavlin 14
425 dpavlin 74 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
426 dpavlin 48
427     return $beenThere->{$key} if (defined($beenThere->{$key}));
428    
429 dpavlin 14 $sth->{file_in_db} ||= $dbh->prepare(qq{
430 dpavlin 48 SELECT 1 FROM files
431 dpavlin 14 WHERE shareID = ? and
432     path = ? and
433     date = ? and
434     size = ?
435 dpavlin 74 LIMIT 1
436 dpavlin 14 });
437    
438 dpavlin 67 my @param = ($shareID,$path,$date,$size);
439 dpavlin 14 $sth->{file_in_db}->execute(@param);
440 dpavlin 48 my $rows = $sth->{file_in_db}->rows;
441 dpavlin 74 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
442 dpavlin 48
443     $beenThere->{$key}++;
444    
445     $sth->{'insert_files'}->execute(@data) unless ($rows);
446 dpavlin 14 return $rows;
447 dpavlin 6 }
448    
449     ####################################################
450     # recursing through filesystem structure and #
451     # and returning flattened files list #
452     ####################################################
453 dpavlin 14 sub recurseDir($$$$$$$$) {
454 dpavlin 6
455 dpavlin 35 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
456 dpavlin 14
457 dpavlin 44 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
458 dpavlin 29
459 dpavlin 66 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
460 dpavlin 14
461 dpavlin 27 { # scope
462 dpavlin 29 my @stack;
463 dpavlin 14
464 dpavlin 45 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
465 dpavlin 27 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
466 dpavlin 14
467 dpavlin 27 # first, add all the entries in current directory
468     foreach my $path_key (keys %{$filesInBackup}) {
469 dpavlin 66 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
470 dpavlin 27 my @data = (
471     $shareID,
472     $backupNum,
473     $path_key,
474     $filesInBackup->{$path_key}->{'relPath'},
475     $filesInBackup->{$path_key}->{'mtime'},
476     $filesInBackup->{$path_key}->{'type'},
477     $filesInBackup->{$path_key}->{'size'}
478     );
479    
480     my $key = join(" ", (
481     $shareID,
482     $dir,
483     $path_key,
484     $filesInBackup->{$path_key}->{'mtime'},
485     $filesInBackup->{$path_key}->{'size'}
486     ));
487    
488 dpavlin 70 my $found;
489     if (! defined($beenThere->{$key}) && ! ($found = found_in_db($key, @data)) ) {
490 dpavlin 30 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
491 dpavlin 48
492 dpavlin 27 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
493 dpavlin 70 $new_dirs++ unless ($found);
494 dpavlin 30 print STDERR " dir\n" if ($debug >= 2);
495 dpavlin 27 } else {
496 dpavlin 70 $new_files++ unless ($found);
497 dpavlin 30 print STDERR " file\n" if ($debug >= 2);
498 dpavlin 27 }
499 dpavlin 66 $size += $filesInBackup->{$path_key}->{'size'} || 0;
500 dpavlin 27 }
501    
502 dpavlin 14 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
503 dpavlin 27 $nr_dirs++;
504    
505 dpavlin 29 my $full_path = $dir . '/' . $path_key;
506     push @stack, $full_path;
507 dpavlin 30 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
508 dpavlin 29
509 dpavlin 27 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
510     #
511     # $nr_files += $f;
512     # $new_files += $nf;
513     # $nr_dirs += $d;
514     # $new_dirs += $nd;
515    
516 dpavlin 14 } else {
517 dpavlin 27 $nr_files++;
518 dpavlin 14 }
519     }
520    
521 dpavlin 30 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
522 dpavlin 14
523 dpavlin 29 while ( my $dir = shift @stack ) {
524 dpavlin 66 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
525 dpavlin 30 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
526 dpavlin 29 $nr_files += $f;
527     $new_files += $nf;
528     $nr_dirs += $d;
529     $new_dirs += $nd;
530 dpavlin 66 $size += $s;
531 dpavlin 29 }
532 dpavlin 14 }
533    
534 dpavlin 66 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
535 dpavlin 6 }
536    

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26