/[BackupPC]/trunk/bin/BackupPC_incPartsUpdate
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/bin/BackupPC_incPartsUpdate

Parent Directory Parent Directory | Revision Log Revision Log


Revision 289 - (hide annotations)
Wed Jan 18 15:16:31 2006 UTC (18 years, 4 months ago) by dpavlin
File size: 9428 byte(s)
 r9051@llin:  dpavlin | 2006-01-18 16:16:23 +0100
 added File::Pid to prevent two copies running at same time

1 dpavlin 157 #!/usr/local/bin/perl -w
2    
3     use strict;
4     use lib "__INSTALLDIR__/lib";
5    
6     use DBI;
7     use BackupPC::Lib;
8     use BackupPC::View;
9 dpavlin 214 use BackupPC::Attrib qw/:all/;
10 dpavlin 157 use Data::Dumper;
11     use Time::HiRes qw/time/;
12     use POSIX qw/strftime/;
13     use Cwd qw/abs_path/;
14 dpavlin 201 use File::Which;
15 dpavlin 215 use Archive::Tar::Streamed;
16 dpavlin 214 use Algorithm::Diff;
17     use Getopt::Std;
18 dpavlin 264 use File::Slurp;
19 dpavlin 289 use File::Pid;
20 dpavlin 157
21 dpavlin 289 my $pid_path = abs_path($0);
22     $pid_path =~ s/\W+/_/g;
23    
24     my $pidfile = new File::Pid({
25     file => "/tmp/$pid_path",
26     });
27    
28     if (my $pid = $pidfile->running ) {
29     die "$0 already running: $pid\n";
30     } elsif ($pidfile->pid ne $$) {
31     $pidfile->remove;
32     $pidfile = new File::Pid;
33     }
34    
35     print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
36     $pidfile->write;
37    
38 dpavlin 230 my $bpc = BackupPC::Lib->new || die "can't create BackupPC::Lib";
39     my %Conf = $bpc->Conf();
40    
41     use BackupPC::SearchLib;
42     %BackupPC::SearchLib::Conf = %Conf;
43    
44 dpavlin 157 my $path = abs_path($0);
45     $path =~ s#/[^/]+$#/#;
46     my $tarIncCreate = $path .= 'BackupPC_tarIncCreate';
47    
48     die "can't find $tarIncCreate: $!\n" unless (-x $tarIncCreate);
49    
50 dpavlin 201 my $bin;
51 dpavlin 254 foreach my $c (qw/gzip md5sum/) {
52 dpavlin 201 $bin->{$c} = which($c) || die "$0 needs $c, install it\n";
53     }
54    
55 dpavlin 214 my %opt;
56     getopts("cd", \%opt );
57 dpavlin 201
58 dpavlin 214 my $debug = $opt{d};
59     my $check = $opt{c} && print STDERR "NOTICE: tar archive check forced\n";
60    
61 dpavlin 157 $|=1;
62    
63     my $start_t = time();
64    
65     my $t_fmt = '%Y-%m-%d %H:%M:%S';
66    
67     my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
68     my $user = $Conf{SearchUser} || '';
69    
70     my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
71    
72     my $tar_dir = $Conf{InstallDir}.'/'.$Conf{GzipTempDir};
73    
74     die "problem with $tar_dir, check GzipTempDir in configuration\n" unless (-d $tar_dir && -w $tar_dir);
75    
76     #---- subs ----
77    
78     sub fmt_time {
79     my $t = shift || return;
80     my $out = "";
81     my ($ss,$mm,$hh) = gmtime($t);
82     $out .= "${hh}h" if ($hh);
83     $out .= sprintf("%02d:%02d", $mm,$ss);
84     return $out;
85     }
86    
87     sub curr_time {
88     return strftime($t_fmt,localtime());
89     }
90    
91 dpavlin 264 my $hsn_cache;
92    
93     sub get_backup_id($$$) {
94     my ($host, $share, $num) = @_;
95    
96     my $key = "$host $share $num";
97     return $hsn_cache->{$key} if ($hsn_cache->{$key});
98    
99     my $sth = $dbh->prepare(qq{
100     SELECT
101     backups.id
102     FROM backups
103     INNER JOIN shares ON backups.shareID=shares.ID
104     INNER JOIN hosts ON backups.hostID = hosts.ID
105     where hosts.name = ? and shares.name = ? and backups.num = ?
106     });
107     $sth->execute($host, $share, $num);
108     my ($id) = $sth->fetchrow_array;
109    
110     $hsn_cache->{"$host $share $num"} = $id;
111    
112     print STDERR "# $host $share $num == $id\n" if ($opt{d});
113    
114     return $id;
115     }
116    
117    
118 dpavlin 214 sub tar_check($$$$) {
119     my ($host,$share,$num,$filename) = @_;
120    
121 dpavlin 270 my $t = time();
122     print curr_time, " check $host:$share#$num -> $filename";
123    
124 dpavlin 281 # depending on expected returned value this is used like:
125     # my $uncompress_size = get_gzip_size('/full/path/to.gz');
126     # my ($compress_size, $uncompress_size) = get_gzip_size('/path.gz');
127     sub get_gzip_size($) {
128     my $filename = shift;
129     die "file $filename problem: $!" unless (-r $filename);
130     open(my $gzip, $bin->{gzip}." -l $filename |") || die "can't gzip -l $filename: $!";
131     my $line = <$gzip>;
132     chomp($line);
133     $line = <$gzip> if ($line =~ /^\s+compressed/);
134    
135     my ($comp, $uncomp) = (0,0);
136    
137     if ($line =~ m/^\s+(\d+)\s+(\d+)\s+\d+\.\d+/) {
138     if (wantarray) {
139     return [ $1, $2 ];
140     } else {
141     return $2;
142     }
143     } else {
144     die "can't find size in line: $line";
145     }
146     }
147    
148 dpavlin 264 sub check_part {
149     my ($host, $share, $num, $part_nr, $tar_size, $size, $md5, $items) = @_;
150     my $backup_id = get_backup_id($host, $share, $num);
151     my $sth_md5 = $dbh->prepare(qq{
152     select
153     id, tar_size, size, md5, items
154     from backup_parts
155     where backup_id = ? and part_nr = ?
156     });
157    
158     $sth_md5->execute($backup_id, $part_nr);
159    
160     if (my $row = $sth_md5->fetchrow_hashref) {
161     return if (
162 dpavlin 265 $row->{tar_size} >= $tar_size &&
163 dpavlin 264 $row->{size} == $size &&
164     $row->{md5} eq $md5 &&
165     $row->{items} == $items
166     );
167 dpavlin 270 print ", deleting invalid backup_parts $row->{id}";
168 dpavlin 264 $dbh->do(qq{ delete from backup_parts where id = $row->{id} });
169     }
170 dpavlin 270 print ", inserting new";
171 dpavlin 264 my $sth_insert = $dbh->prepare(qq{
172     insert into backup_parts (
173     backup_id,
174     part_nr,
175     tar_size,
176     size,
177     md5,
178     items
179     ) values (?,?,?,?,?,?)
180     });
181    
182     $sth_insert->execute($backup_id, $part_nr, $tar_size, $size, $md5, $items);
183     $dbh->commit;
184     }
185    
186 dpavlin 254 my @tar_parts;
187    
188     if (-d "$tar_dir/$filename") {
189 dpavlin 270 print ", multi-part";
190 dpavlin 254 opendir(my $dir, "$tar_dir/$filename") || die "can't readdir $tar_dir/$filename: $!";
191     @tar_parts = map { my $p = $_; $p =~ s#^#${filename}/#; $p } grep { !/^\./ && !/md5/ && -f "$tar_dir/$filename/$_" } readdir($dir);
192     closedir($dir);
193     } else {
194     push @tar_parts, "${filename}.tar.gz";
195 dpavlin 214 }
196    
197 dpavlin 256 print " [parts: ",join(", ", @tar_parts),"]" if ($opt{d});
198    
199     my $same = 1;
200 dpavlin 254 my @tar_files;
201 dpavlin 214
202 dpavlin 264 my $backup_part;
203    
204 dpavlin 268 print " reading" if ($opt{d});
205 dpavlin 254
206     foreach my $tarfilename (@tar_parts) {
207    
208 dpavlin 270 print "\n\t- $tarfilename";
209 dpavlin 254
210 dpavlin 281 my $path = "$tar_dir/$tarfilename";
211 dpavlin 269
212 dpavlin 281 my $size = (stat( $path ))[7] || die "can't stat $path: $!";
213    
214 dpavlin 269 if ($size > $Conf{MaxArchiveSize}) {
215 dpavlin 274 print ", part bigger than media $size > $Conf{MaxArchiveSize}\n";
216     return 0;
217 dpavlin 254 }
218    
219 dpavlin 270 print ", $size bytes";
220    
221 dpavlin 264
222 dpavlin 281 open(my $fh, "gzip -cd $path |") or die "can't open $path: $!";
223 dpavlin 254 binmode($fh);
224     my $tar = Archive::Tar::Streamed->new($fh);
225    
226 dpavlin 281 my $tar_size_inarc = 0;
227 dpavlin 264 my $items = 0;
228 dpavlin 256
229 dpavlin 254 while(my $entry = $tar->next) {
230     push @tar_files, $entry->name;
231 dpavlin 264 $items++;
232 dpavlin 281 $tar_size_inarc += $entry->size;
233 dpavlin 269
234 dpavlin 281 if ($tar_size_inarc > $Conf{MaxArchiveFileSize}) {
235     print ", part $tarfilename is too big $tar_size_inarc > $Conf{MaxArchiveFileSize}\n";
236 dpavlin 274 return 0;
237 dpavlin 269 }
238    
239 dpavlin 254 }
240 dpavlin 256
241 dpavlin 281 close($fh);
242    
243 dpavlin 270 print ", $items items";
244    
245 dpavlin 281 if ($tar_size_inarc == 0 && $items == 0) {
246 dpavlin 279 print ", EMPTY tar\n";
247    
248     my $backup_id = get_backup_id($host, $share, $num);
249    
250     my $sth_inc_deleted = $dbh->prepare(qq{
251     update backups set
252     inc_deleted = true
253     where id = ?
254     });
255     $sth_inc_deleted->execute($backup_id);
256 dpavlin 280
257     $dbh->commit;
258    
259     return 1;
260 dpavlin 279 }
261    
262 dpavlin 281 my $tar_size = get_gzip_size( $path );
263 dpavlin 279
264 dpavlin 281 # real tar size is bigger because of padding
265     if ($tar_size_inarc > $tar_size) {
266 dpavlin 282 print ", size of files in tar ($tar_size_inarc) bigger than whole tar ($tar_size)!\n";
267 dpavlin 281 return 0;
268     }
269 dpavlin 280
270 dpavlin 269 #
271     # check if md5 exists, and if not, create one
272     #
273    
274     my $md5_path = $path;
275     $md5_path =~ s/\.tar\.gz$/.md5/ || die "can't create md5 filename from $md5_path";
276     if (! -e $md5_path || -z $md5_path) {
277     print ", creating md5";
278     system( $bin->{md5sum} . " $path > $md5_path") == 0 or die "can't create md5 $path: $!";
279 dpavlin 270 } else {
280     ## FIXME check if existing md5 is valid
281 dpavlin 256 }
282 dpavlin 264
283 dpavlin 269 my $md5 = read_file( $md5_path ) || die "can't read md5sum file $md5_path: $!";
284     $md5 =~ s#\s.*$##;
285    
286     # extract part number from filename
287     my $part_nr = 1;
288     $part_nr = $1 if ($tarfilename =~ m#/(\d+)\.tar\.gz#);
289    
290     #
291     # finally, check if backup_parts table in database is valid
292     #
293    
294 dpavlin 264 check_part($host, $share, $num, $part_nr, $tar_size, $size, $md5, $items);
295 dpavlin 215 }
296 dpavlin 254
297 dpavlin 256 # short-cut and exit;
298     return $same unless($same);
299    
300 dpavlin 215 @tar_files = sort @tar_files;
301 dpavlin 270 print "\n\t",($#tar_files + 1), " tar files";
302 dpavlin 214
303     my $sth = $dbh->prepare(qq{
304     SELECT path,type
305     FROM files
306     JOIN shares on shares.id = shareid
307     JOIN hosts on hosts.id = shares.hostid
308     WHERE hosts.name = ? and shares.name = ? and backupnum = ?
309     });
310     $sth->execute($host, $share, $num);
311     my @db_files;
312     while( my $row = $sth->fetchrow_hashref ) {
313    
314     my $path = $row->{'path'} || die "no path?";
315     $path =~ s#^/#./#;
316     $path .= '/' if ($row->{'type'} == BPC_FTYPE_DIR);
317     push @db_files, $path;
318     }
319    
320 dpavlin 270 print " ",($#db_files + 1), " database files, diff";
321 dpavlin 214
322     @db_files = sort @db_files;
323    
324     if ($#tar_files != $#db_files) {
325     $same = 0;
326 dpavlin 270 print " NUMBER";
327 dpavlin 214 } else {
328     my $diff = Algorithm::Diff->new(\@tar_files, \@db_files);
329     while ( $diff->Next() ) {
330     next if $diff->Same();
331     $same = 0;
332     print "< $_\n" for $diff->Items(1);
333     print "> $_\n" for $diff->Items(2);
334     }
335     }
336    
337 dpavlin 270 print " ",($same ? 'ok' : 'DIFFERENT'),
338     ", dur: ",fmt_time(time() - $t), "\n";
339 dpavlin 214
340     return $same;
341     }
342    
343    
344 dpavlin 157 #----- main
345    
346     my $sth = $dbh->prepare( qq{
347    
348     select
349 dpavlin 158 backups.id as backup_id,
350 dpavlin 157 hosts.name as host,
351     shares.name as share,
352 dpavlin 192 backups.num as num,
353 dpavlin 196 inc_size,
354     parts
355 dpavlin 157 from backups
356     join shares on backups.hostid = shares.hostid
357     and shares.id = backups.shareid
358     join hosts on shares.hostid = hosts.id
359 dpavlin 192 where not inc_deleted
360 dpavlin 157 order by backups.date
361    
362     } );
363    
364 dpavlin 213 $sth->execute();
365     my $num_backups = $sth->rows;
366     my $curr_backup = 1;
367    
368 dpavlin 157 while (my $row = $sth->fetchrow_hashref) {
369 dpavlin 271
370     $curr_backup++;
371    
372 dpavlin 157 my $tar_file = BackupPC::SearchLib::getGzipName($row->{'host'}, $row->{'share'}, $row->{'num'});
373 dpavlin 192
374 dpavlin 194 # this will return -1 if file doesn't exist
375     my $size = BackupPC::SearchLib::get_tgz_size_by_name($tar_file);
376 dpavlin 192
377 dpavlin 253 print "# size: $size backup.size: ", $row->{inc_size},"\n" if ($opt{d});
378    
379 dpavlin 266 if ( $row->{'inc_size'} != -1 && $size != -1 && $row->{'inc_size'} >= $size) {
380 dpavlin 256 if ($check) {
381     tar_check($row->{'host'}, $row->{'share'}, $row->{'num'}, $tar_file) && next;
382     } else {
383     next;
384     }
385 dpavlin 253 }
386    
387 dpavlin 271 print curr_time, " creating $curr_backup/$num_backups ", $row->{'host'}, ":", $row->{'share'}, " #", $row->{'num'}, " -> $tar_file";
388 dpavlin 157
389     my $t = time();
390    
391 dpavlin 194 # re-create archive?
392 dpavlin 254 my $cmd = qq{ $tarIncCreate -h "$row->{'host'}" -s "$row->{'share'}" -n $row->{'num'} -f };
393 dpavlin 253 print STDERR "## $cmd\n" if ($debug);
394 dpavlin 157
395 dpavlin 253 if (system($cmd) != 0) {
396     print STDERR " FAILED";
397 dpavlin 194 }
398 dpavlin 157
399 dpavlin 194 print ", dur: ",fmt_time(time() - $t), "\n";
400 dpavlin 158
401     $dbh->commit;
402    
403 dpavlin 157 }
404    
405     undef $sth;
406     $dbh->disconnect;

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26