/[BackupPC]/trunk/bin/BackupPC_updatedb
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/bin/BackupPC_updatedb

Parent Directory Parent Directory | Revision Log Revision Log


Revision 86 - (hide annotations)
Sun Aug 28 12:35:59 2005 UTC (18 years, 8 months ago) by dpavlin
File size: 12744 byte(s)
first cut at implemeting HyperEstraier search,
shares are now formated like host:share, fix date display,
initial cut at removing unneeded database columns,
rename all fields to comply with database columns

1 dpavlin 14 #!/usr/local/bin/perl -w
2 dpavlin 6
3     use strict;
4 dpavlin 48 use lib "__INSTALLDIR__/lib";
5    
6 dpavlin 6 use DBI;
7     use BackupPC::Lib;
8     use BackupPC::View;
9     use Data::Dumper;
10     use Getopt::Std;
11 dpavlin 37 use Time::HiRes qw/time/;
12 dpavlin 38 use File::Pid;
13 dpavlin 37 use POSIX qw/strftime/;
14 dpavlin 48
15 dpavlin 6 use constant BPC_FTYPE_DIR => 5;
16    
17 dpavlin 30 my $debug = 0;
18 dpavlin 14 $|=1;
19 dpavlin 6
20 dpavlin 51 my $start_t = time();
21    
22 dpavlin 38 my $pidfile = new File::Pid;
23    
24     if (my $pid = $pidfile->running ) {
25     die "$0 already running: $pid\n";
26     } elsif ($pidfile->pid ne $$) {
27     $pidfile->remove;
28     $pidfile = new File::Pid;
29     }
30 dpavlin 39 $pidfile->write;
31     print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
32 dpavlin 38
33 dpavlin 37 my $t_fmt = '%Y-%m-%d %H:%M:%S';
34    
35 dpavlin 6 my $hosts;
36     my $bpc = BackupPC::Lib->new || die;
37     my %Conf = $bpc->Conf();
38     my $TopDir = $bpc->TopDir();
39 dpavlin 14 my $beenThere = {};
40 dpavlin 6
41 dpavlin 51 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
42     my $user = $Conf{SearchUser} || '';
43 dpavlin 82 my $index_path = $Conf{HyperEstraierIndex};
44 dpavlin 6
45 dpavlin 49 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
46    
47 dpavlin 6 my %opt;
48    
49 dpavlin 81 if ( !getopts("cdm:v:i", \%opt ) ) {
50 dpavlin 6 print STDERR <<EOF;
51 dpavlin 81 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
52 dpavlin 6
53     Options:
54 dpavlin 14 -c create database on first use
55     -d delete database before import
56     -m num import just num increments for one host
57 dpavlin 44 -v num set verbosity (debug) level (default $debug)
58 dpavlin 81 -i update HyperEstraier full text index
59 dpavlin 6 EOF
60     exit 1;
61     }
62    
63 dpavlin 81 if ($opt{v}) {
64     print "Debug level at $opt{v}\n";
65     $debug = $opt{v};
66     }
67    
68 dpavlin 86 #---- subs ----
69    
70     sub fmt_time {
71     my $t = shift || return;
72     my $out = "";
73     my ($ss,$mm,$hh) = gmtime($t);
74     $out .= "${hh}h" if ($hh);
75     $out .= sprintf("%02d:%02d", $mm,$ss);
76     return $out;
77     }
78    
79     sub curr_time {
80     return strftime($t_fmt,localtime());
81     }
82    
83     #---- /subs ----
84    
85 dpavlin 81 ## update index ##
86     if ($opt{i}) {
87    
88 dpavlin 86 print curr_time," updating HyperEstraier: files";
89    
90     my $t = time();
91 dpavlin 82
92 dpavlin 81 my $sth = $dbh->prepare(qq{
93     SELECT
94     files.id AS fid,
95     hosts.name AS hname,
96     shares.name AS sname,
97 dpavlin 86 -- shares.share AS sharename,
98     files.backupnum AS backupnum,
99     -- files.name AS filename,
100 dpavlin 81 files.path AS filepath,
101     files.date AS date,
102     files.type AS filetype,
103     files.size AS size,
104 dpavlin 86 files.shareid AS shareid,
105     backups.date AS backup_date
106 dpavlin 81 FROM files
107     INNER JOIN shares ON files.shareID=shares.ID
108     INNER JOIN hosts ON hosts.ID = shares.hostID
109     INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
110     });
111    
112     $sth->execute();
113 dpavlin 86 my $results = $sth->rows;
114 dpavlin 81
115 dpavlin 86 my $dot = int($results / 15);
116 dpavlin 81
117 dpavlin 86 print " $results ($dot/#)";
118 dpavlin 81
119     sub fmt_date {
120     my $t = shift || return;
121     my $iso = BackupPC::Lib::timeStamp($t);
122     $iso =~ s/\s/T/;
123     return $iso;
124     }
125    
126     my $i = 0;
127 dpavlin 86 my $max = int($results / $dot);
128 dpavlin 81
129 dpavlin 82 $index_path = $TopDir . '/' . $index_path;
130     $index_path =~ s#//#/#g;
131    
132 dpavlin 86 print " index $index_path...";
133 dpavlin 82 use HyperEstraier;
134     my $db = HyperEstraier::Database->new();
135     $db->open($index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
136    
137    
138 dpavlin 81 while (my $row = $sth->fetchrow_hashref()) {
139    
140     # create a document object
141     my $doc = HyperEstraier::Document->new;
142    
143     # add attributes to the document object
144     $doc->add_attr('@uri', 'file:///' . $row->{'fid'});
145    
146 dpavlin 86 foreach my $c (@{ $sth->{NAME} }) {
147 dpavlin 81 $doc->add_attr($c, $row->{$c}) if ($row->{$c});
148     }
149    
150 dpavlin 86 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
151 dpavlin 81
152     # add the body text to the document object
153     my $path = $row->{'filepath'};
154     $doc->add_text($path);
155     $path =~ s/(.)/$1 /g;
156     $doc->add_hidden_text($path);
157    
158     print STDERR $doc->dump_draft,"\n" if ($debug > 1);
159    
160     # register the document object to the database
161     $db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
162    
163     $i++;
164     if ($i % $dot == 0) {
165     print "$max ";
166     $max--;
167     }
168    
169     }
170    
171     print "sync";
172     $db->sync();
173 dpavlin 86 print " close";
174 dpavlin 81 $db->close();
175    
176 dpavlin 86 my $dur = (time() - $t) || 1;
177     printf(" [%.2f/s dur: %s]\n",
178     ( $results / $dur ),
179     fmt_time($dur)
180     );
181    
182 dpavlin 81 exit;
183     }
184    
185 dpavlin 6 ###################################create tables############################3
186    
187 dpavlin 14 if ($opt{c}) {
188 dpavlin 49 sub do_index {
189     my $index = shift || return;
190     my ($table,$col,$unique) = split(/_/, $index);
191     $unique ||= '';
192 dpavlin 52 $index =~ s/,/_/g;
193 dpavlin 49 $dbh->do(qq{ create $unique index $index on $table($col) });
194     }
195    
196 dpavlin 14 print "creating tables...\n";
197 dpavlin 6
198 dpavlin 14 $dbh->do(qq{
199     create table hosts (
200 dpavlin 49 ID SERIAL PRIMARY KEY,
201 dpavlin 14 name VARCHAR(30) NOT NULL,
202     IP VARCHAR(15)
203 dpavlin 6 );
204 dpavlin 14 });
205 dpavlin 6
206 dpavlin 14 $dbh->do(qq{
207     create table shares (
208 dpavlin 49 ID SERIAL PRIMARY KEY,
209 dpavlin 14 hostID INTEGER NOT NULL references hosts(id),
210     name VARCHAR(30) NOT NULL,
211     share VARCHAR(200) NOT NULL,
212     localpath VARCHAR(200)
213 dpavlin 6 );
214 dpavlin 14 });
215 dpavlin 6
216 dpavlin 14 $dbh->do(qq{
217     create table backups (
218     hostID INTEGER NOT NULL references hosts(id),
219     num INTEGER NOT NULL,
220 dpavlin 49 date integer NOT NULL,
221     type CHAR(4) not null,
222 dpavlin 65 shareID integer not null references shares(id),
223 dpavlin 66 size integer not null,
224 dpavlin 65 PRIMARY KEY(hostID, num, shareID)
225 dpavlin 6 );
226 dpavlin 14 });
227 dpavlin 6
228 dpavlin 65 #do_index('backups_hostid,num_unique');
229 dpavlin 49
230 dpavlin 14 $dbh->do(qq{
231     create table dvds (
232 dpavlin 49 ID SERIAL PRIMARY KEY,
233 dpavlin 14 num INTEGER NOT NULL,
234     name VARCHAR(255) NOT NULL,
235     mjesto VARCHAR(255)
236 dpavlin 6 );
237 dpavlin 14 });
238 dpavlin 6
239 dpavlin 14 $dbh->do(qq{
240     create table files (
241 dpavlin 49 ID SERIAL PRIMARY KEY,
242 dpavlin 14 shareID INTEGER NOT NULL references shares(id),
243 dpavlin 52 backupNum INTEGER NOT NULL,
244 dpavlin 14 name VARCHAR(255) NOT NULL,
245     path VARCHAR(255) NOT NULL,
246 dpavlin 49 date integer NOT NULL,
247 dpavlin 14 type INTEGER NOT NULL,
248     size INTEGER NOT NULL,
249     dvdid INTEGER references dvds(id)
250 dpavlin 6 );
251 dpavlin 14 });
252 dpavlin 6
253 dpavlin 49 print "creating indexes:";
254 dpavlin 6
255 dpavlin 14 foreach my $index (qw(
256     hosts_name
257     backups_hostID
258     backups_num
259     shares_hostID
260     shares_name
261     files_shareID
262     files_path
263     files_name
264     files_date
265     files_size
266     )) {
267 dpavlin 49 print " $index";
268     do_index($index);
269 dpavlin 14 }
270 dpavlin 49 print "...\n";
271 dpavlin 14
272 dpavlin 49 $dbh->commit;
273 dpavlin 14
274     }
275    
276     if ($opt{d}) {
277     print "deleting ";
278 dpavlin 49 foreach my $table (qw(files dvds backups shares hosts)) {
279 dpavlin 14 print "$table ";
280     $dbh->do(qq{ DELETE FROM $table });
281     }
282     print " done...\n";
283 dpavlin 49
284 dpavlin 51 $dbh->commit;
285 dpavlin 14 }
286    
287 dpavlin 6 #################################INSERT VALUES#############################
288    
289     # get hosts
290 dpavlin 8 $hosts = $bpc->HostInfoRead();
291 dpavlin 6 my $hostID;
292     my $shareID;
293    
294 dpavlin 14 my $sth;
295 dpavlin 6
296 dpavlin 14 $sth->{insert_hosts} = $dbh->prepare(qq{
297     INSERT INTO hosts (name, IP) VALUES (?,?)
298     });
299 dpavlin 6
300 dpavlin 14 $sth->{hosts_by_name} = $dbh->prepare(qq{
301     SELECT ID FROM hosts WHERE name=?
302     });
303    
304 dpavlin 65 $sth->{backups_count} = $dbh->prepare(qq{
305 dpavlin 14 SELECT COUNT(*)
306     FROM backups
307 dpavlin 65 WHERE hostID=? AND num=? AND shareid=?
308 dpavlin 14 });
309    
310     $sth->{insert_backups} = $dbh->prepare(qq{
311 dpavlin 66 INSERT INTO backups (hostID, num, date, type, shareid, size)
312     VALUES (?,?,?,?,?,?)
313 dpavlin 14 });
314    
315     $sth->{insert_files} = $dbh->prepare(qq{
316     INSERT INTO files
317 dpavlin 62 (shareID, backupNum, name, path, date, type, size)
318     VALUES (?,?,?,?,?,?,?)
319 dpavlin 14 });
320    
321     foreach my $host_key (keys %{$hosts}) {
322    
323     my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
324    
325     $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
326    
327     unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
328     $sth->{insert_hosts}->execute(
329     $hosts->{$host_key}->{'host'},
330     $hosts->{$host_key}->{'ip'}
331     );
332    
333 dpavlin 49 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
334 dpavlin 14 }
335    
336 dpavlin 67 print "host ".$hosts->{$host_key}->{'host'}.": ";
337 dpavlin 6
338 dpavlin 14 # get backups for a host
339     my @backups = $bpc->BackupInfoRead($hostname);
340 dpavlin 67 my $incs = scalar @backups;
341     print "$incs increments\n";
342 dpavlin 6
343 dpavlin 14 my $inc_nr = 0;
344 dpavlin 67 $beenThere = {};
345 dpavlin 14
346     foreach my $backup (@backups) {
347 dpavlin 40
348 dpavlin 14 $inc_nr++;
349     last if ($opt{m} && $inc_nr > $opt{m});
350    
351     my $backupNum = $backup->{'num'};
352     my @backupShares = ();
353    
354 dpavlin 67 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
355     $hosts->{$host_key}->{'host'},
356     $inc_nr, $incs, $backupNum,
357     $backup->{type} || '?',
358     $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
359 dpavlin 57 strftime($t_fmt,localtime($backup->{startTime})),
360 dpavlin 67 fmt_time($backup->{endTime} - $backup->{startTime})
361     );
362 dpavlin 14
363 dpavlin 34 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
364 dpavlin 14 foreach my $share ($files->shareList($backupNum)) {
365    
366 dpavlin 37 my $t = time();
367    
368 dpavlin 14 $shareID = getShareID($share, $hostID, $hostname);
369    
370 dpavlin 65 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
371     my ($count) = $sth->{backups_count}->fetchrow_array();
372     # skip if allready in database!
373     next if ($count > 0);
374    
375     # dump some log
376 dpavlin 86 print curr_time," ", $share;
377 dpavlin 65
378 dpavlin 66 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
379 dpavlin 65
380     $sth->{insert_backups}->execute(
381     $hostID,
382     $backupNum,
383     $backup->{'endTime'},
384     $backup->{'type'},
385 dpavlin 66 $shareID,
386     $size,
387 dpavlin 65 );
388    
389     print " commit";
390     $dbh->commit();
391    
392 dpavlin 50 my $dur = (time() - $t) || 1;
393 dpavlin 66 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
394 dpavlin 37 $nf, $f, $nd, $d,
395 dpavlin 66 ($size / 1024 / 1024),
396 dpavlin 50 ( ($f+$d) / $dur ),
397     fmt_time($dur)
398 dpavlin 37 );
399 dpavlin 14 }
400 dpavlin 29
401 dpavlin 6 }
402     }
403 dpavlin 14 undef $sth;
404 dpavlin 6 $dbh->commit();
405     $dbh->disconnect();
406    
407 dpavlin 51 print "total duration: ",fmt_time(time() - $start_t),"\n";
408    
409 dpavlin 38 $pidfile->remove;
410    
411 dpavlin 14 sub getShareID() {
412 dpavlin 6
413 dpavlin 14 my ($share, $hostID, $hostname) = @_;
414    
415     $sth->{share_id} ||= $dbh->prepare(qq{
416     SELECT ID FROM shares WHERE hostID=? AND name=?
417     });
418    
419     $sth->{share_id}->execute($hostID,$share);
420    
421     my ($id) = $sth->{share_id}->fetchrow_array();
422    
423     return $id if (defined($id));
424    
425     $sth->{insert_share} ||= $dbh->prepare(qq{
426     INSERT INTO shares
427     (hostID,name,share,localpath)
428     VALUES (?,?,?,?)
429     });
430    
431 dpavlin 25 my $drop_down = $hostname . '/' . $share;
432     $drop_down =~ s#//+#/#g;
433    
434     $sth->{insert_share}->execute($hostID,$share, $drop_down ,undef);
435 dpavlin 49 return $dbh->last_insert_id(undef,undef,'shares',undef);
436 dpavlin 6 }
437    
438 dpavlin 14 sub found_in_db {
439    
440 dpavlin 48 my @data = @_;
441     shift @data;
442 dpavlin 14
443 dpavlin 74 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
444 dpavlin 48
445     return $beenThere->{$key} if (defined($beenThere->{$key}));
446    
447 dpavlin 14 $sth->{file_in_db} ||= $dbh->prepare(qq{
448 dpavlin 48 SELECT 1 FROM files
449 dpavlin 14 WHERE shareID = ? and
450     path = ? and
451     date = ? and
452     size = ?
453 dpavlin 74 LIMIT 1
454 dpavlin 14 });
455    
456 dpavlin 67 my @param = ($shareID,$path,$date,$size);
457 dpavlin 14 $sth->{file_in_db}->execute(@param);
458 dpavlin 48 my $rows = $sth->{file_in_db}->rows;
459 dpavlin 74 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
460 dpavlin 48
461     $beenThere->{$key}++;
462    
463     $sth->{'insert_files'}->execute(@data) unless ($rows);
464 dpavlin 14 return $rows;
465 dpavlin 6 }
466    
467     ####################################################
468     # recursing through filesystem structure and #
469     # and returning flattened files list #
470     ####################################################
471 dpavlin 14 sub recurseDir($$$$$$$$) {
472 dpavlin 6
473 dpavlin 35 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
474 dpavlin 14
475 dpavlin 44 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
476 dpavlin 29
477 dpavlin 66 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
478 dpavlin 14
479 dpavlin 27 { # scope
480 dpavlin 29 my @stack;
481 dpavlin 14
482 dpavlin 45 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
483 dpavlin 27 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
484 dpavlin 14
485 dpavlin 27 # first, add all the entries in current directory
486     foreach my $path_key (keys %{$filesInBackup}) {
487 dpavlin 66 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
488 dpavlin 27 my @data = (
489     $shareID,
490     $backupNum,
491     $path_key,
492     $filesInBackup->{$path_key}->{'relPath'},
493     $filesInBackup->{$path_key}->{'mtime'},
494     $filesInBackup->{$path_key}->{'type'},
495     $filesInBackup->{$path_key}->{'size'}
496     );
497    
498     my $key = join(" ", (
499     $shareID,
500     $dir,
501     $path_key,
502     $filesInBackup->{$path_key}->{'mtime'},
503     $filesInBackup->{$path_key}->{'size'}
504     ));
505    
506 dpavlin 70 my $found;
507     if (! defined($beenThere->{$key}) && ! ($found = found_in_db($key, @data)) ) {
508 dpavlin 30 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
509 dpavlin 48
510 dpavlin 27 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
511 dpavlin 70 $new_dirs++ unless ($found);
512 dpavlin 30 print STDERR " dir\n" if ($debug >= 2);
513 dpavlin 27 } else {
514 dpavlin 70 $new_files++ unless ($found);
515 dpavlin 30 print STDERR " file\n" if ($debug >= 2);
516 dpavlin 27 }
517 dpavlin 66 $size += $filesInBackup->{$path_key}->{'size'} || 0;
518 dpavlin 27 }
519    
520 dpavlin 14 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
521 dpavlin 27 $nr_dirs++;
522    
523 dpavlin 29 my $full_path = $dir . '/' . $path_key;
524     push @stack, $full_path;
525 dpavlin 30 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
526 dpavlin 29
527 dpavlin 27 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
528     #
529     # $nr_files += $f;
530     # $new_files += $nf;
531     # $nr_dirs += $d;
532     # $new_dirs += $nd;
533    
534 dpavlin 14 } else {
535 dpavlin 27 $nr_files++;
536 dpavlin 14 }
537     }
538    
539 dpavlin 30 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
540 dpavlin 14
541 dpavlin 29 while ( my $dir = shift @stack ) {
542 dpavlin 66 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
543 dpavlin 30 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
544 dpavlin 29 $nr_files += $f;
545     $new_files += $nf;
546     $nr_dirs += $d;
547     $new_dirs += $nd;
548 dpavlin 66 $size += $s;
549 dpavlin 29 }
550 dpavlin 14 }
551    
552 dpavlin 66 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
553 dpavlin 6 }
554    

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26