--- trunk/bin/BackupPC_incPartsUpdate 2005/10/10 14:05:09 167 +++ trunk/bin/BackupPC_incPartsUpdate 2005/12/12 20:59:53 256 @@ -6,12 +6,21 @@ use DBI; use BackupPC::Lib; use BackupPC::View; +use BackupPC::Attrib qw/:all/; use Data::Dumper; use Time::HiRes qw/time/; -use File::Pid; use POSIX qw/strftime/; -use BackupPC::SearchLib; use Cwd qw/abs_path/; +use File::Which; +use Archive::Tar::Streamed; +use Algorithm::Diff; +use Getopt::Std; + +my $bpc = BackupPC::Lib->new || die "can't create BackupPC::Lib"; +my %Conf = $bpc->Conf(); + +use BackupPC::SearchLib; +%BackupPC::SearchLib::Conf = %Conf; my $path = abs_path($0); $path =~ s#/[^/]+$#/#; @@ -19,19 +28,23 @@ die "can't find $tarIncCreate: $!\n" unless (-x $tarIncCreate); -my $debug = 0; +my $bin; +foreach my $c (qw/gzip md5sum/) { + $bin->{$c} = which($c) || die "$0 needs $c, install it\n"; +} + +my %opt; +getopts("cd", \%opt ); + +my $debug = $opt{d}; +my $check = $opt{c} && print STDERR "NOTICE: tar archive check forced\n"; + $|=1; my $start_t = time(); my $t_fmt = '%Y-%m-%d %H:%M:%S'; -my $hosts; -my $bpc = BackupPC::Lib->new || die; -my %Conf = $bpc->Conf(); -my $TopDir = $bpc->TopDir(); -my $beenThere = {}; - my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n"; my $user = $Conf{SearchUser} || ''; @@ -56,6 +69,116 @@ return strftime($t_fmt,localtime()); } +sub tar_check($$$$) { + my ($host,$share,$num,$filename) = @_; + + if ($debug) { + print STDERR " {{ CHECK: ${host}:${share}#${num} and $filename"; + } else { + print " check"; + } + + my @tar_parts; + + if (-d "$tar_dir/$filename") { + print STDERR " multi-part" if ($opt{d}); + opendir(my $dir, "$tar_dir/$filename") || die "can't readdir $tar_dir/$filename: $!"; + @tar_parts = map { my $p = $_; $p =~ s#^#${filename}/#; $p } grep { !/^\./ && !/md5/ && -f "$tar_dir/$filename/$_" } readdir($dir); + closedir($dir); + } else { + push @tar_parts, "${filename}.tar.gz"; + } + + print " [parts: ",join(", ", @tar_parts),"]" if ($opt{d}); + + my $same = 1; + my @tar_files; + + print " reading"; + + foreach my $tarfilename (@tar_parts) { + + print STDERR " $tarfilename" if ($debug); + + my $path = "$tar_dir/$tarfilename"; + my $md5 = $path; + $md5 =~ s/\.tar\.gz$/.md5/ || die "can't create md5 filename from $md5"; + if (! -e $md5) { + print ", creating md5"; + system( $bin->{md5sum} . " $path > $md5") == 0 or die "can't create md5 $path: $!"; + } + + open(my $fh, "gzip -cd $tar_dir/$tarfilename |") or die "can't open $tar_dir/$tarfilename: $!"; + binmode($fh); + my $tar = Archive::Tar::Streamed->new($fh); + + my $total_size = 0; + + while(my $entry = $tar->next) { + push @tar_files, $entry->name; + $total_size += $entry->size; + } + + if ($total_size > $Conf{MaxArchiveFileSize}) { + print STDERR " part too big $total_size > $Conf{MaxArchiveFileSize} }}" if ($debug); + $same = 0; + last; + } elsif ($total_size > $Conf{MaxArchiveSize}) { + print STDERR " part bigger than media $total_size > $Conf{MaxArchiveSize} }}" if ($debug); + $same = 0; + last; + } + } + + # short-cut and exit; + return $same unless($same); + + @tar_files = sort @tar_files; + print STDERR " ",($#tar_files + 1), " files" if ($debug); + + print STDERR ", database" if ($debug); + + my $sth = $dbh->prepare(qq{ + SELECT path,type + FROM files + JOIN shares on shares.id = shareid + JOIN hosts on hosts.id = shares.hostid + WHERE hosts.name = ? and shares.name = ? and backupnum = ? + }); + $sth->execute($host, $share, $num); + my @db_files; + while( my $row = $sth->fetchrow_hashref ) { + + my $path = $row->{'path'} || die "no path?"; + $path =~ s#^/#./#; + $path .= '/' if ($row->{'type'} == BPC_FTYPE_DIR); + push @db_files, $path; + } + + print STDERR " ",($#db_files + 1), " files, diff" if ($debug); + + @db_files = sort @db_files; + + if ($#tar_files != $#db_files) { + $same = 0; + print STDERR " NUMBER" if ($debug); + } else { + my $diff = Algorithm::Diff->new(\@tar_files, \@db_files); + while ( $diff->Next() ) { + next if $diff->Same(); + $same = 0; + print "< $_\n" for $diff->Items(1); + print "> $_\n" for $diff->Items(2); + } + } + + print " ",($same ? 'ok' : 'DIFFERENT'); + print STDERR " }} " if ($debug); + + return $same; +} + + #----- main my $sth = $dbh->prepare( qq{ @@ -64,46 +187,52 @@ backups.id as backup_id, hosts.name as host, shares.name as share, - backups.num as num + backups.num as num, + inc_size, + parts from backups join shares on backups.hostid = shares.hostid and shares.id = backups.shareid join hosts on shares.hostid = hosts.id -where inc_size < 0 and not inc_deleted +where not inc_deleted order by backups.date } ); $sth->execute(); - -my $sth_inc_size = $dbh->prepare(qq{ update backups set inc_size = ? where id = ? }); -my $sth_inc_deleted = $dbh->prepare(qq{ update backups set inc_deleted = ? where id = ? }); - -%BackupPC::SearchLib::Conf = %Conf; +my $num_backups = $sth->rows; +my $curr_backup = 1; while (my $row = $sth->fetchrow_hashref) { my $tar_file = BackupPC::SearchLib::getGzipName($row->{'host'}, $row->{'share'}, $row->{'num'}); - print curr_time, sprintf(" %s:%s %-3d ", $row->{'host'}, $row->{'share'}, $row->{'num'}), " -> $tar_file "; - my $t = time(); + # this will return -1 if file doesn't exist + my $size = BackupPC::SearchLib::get_tgz_size_by_name($tar_file); - my $cmd = qq{$tarIncCreate -h "$row->{'host'}" -s "$row->{'share'}" -n $row->{'num'} | gzip -9 > $tar_dir/$tar_file}; - print STDERR "## $cmd\n" if ($debug); + print "# size: $size backup.size: ", $row->{inc_size},"\n" if ($opt{d}); - system($cmd) == 0 or die "failed: $?"; + if ( $row->{'inc_size'} != -1 && $size != -1 && $row->{'inc_size'} == $size) { + if ($check) { + tar_check($row->{'host'}, $row->{'share'}, $row->{'num'}, $tar_file) && next; + } else { + next; + } + } - my $size = (stat( "$tar_dir/$tar_file" ))[7]; + print curr_time, " $curr_backup/$num_backups ", $row->{'host'}, ":", $row->{'share'}, " #", $row->{'num'}, " -> $tar_file"; + $curr_backup++; - print " dur: ",fmt_time(time() - $t)," $size bytes"; + my $t = time(); - if ($size > 45) { - $sth_inc_size->execute($size, $row->{'backup_id'}); - } else { - $sth_inc_deleted->execute(1, $row->{'backup_id'}); - unlink "$tar_dir/$tar_file" || die "can't delete $tar_dir/$tar_file: $!\n"; - print " EMPTY"; + # re-create archive? + my $cmd = qq{ $tarIncCreate -h "$row->{'host'}" -s "$row->{'share'}" -n $row->{'num'} -f }; + print STDERR "## $cmd\n" if ($debug); + + if (system($cmd) != 0) { + print STDERR " FAILED"; } - print "\n"; + + print ", dur: ",fmt_time(time() - $t), "\n"; $dbh->commit;