--- trunk/bin/BackupPC_tarIncCreate 2005/11/04 15:40:51 233 +++ trunk/bin/BackupPC_tarIncCreate 2005/11/08 20:24:45 234 @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/perl -w #============================================================= -*-perl-*- # # BackupPC_tarIncCreate: create a tar archive of an existing incremental dump @@ -72,17 +72,20 @@ use BackupPC::SearchLib; use Time::HiRes qw/time/; use POSIX qw/strftime/; +use File::Which; +use File::Path; use Data::Dumper; ### FIXME die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) ); my $TopDir = $bpc->TopDir(); my $BinDir = $bpc->BinDir(); my %Conf = $bpc->Conf(); +%BackupPC::SearchLib::Conf = %Conf; my %opts; my $in_backup_increment; -if ( !getopts("th:n:p:r:s:b:w:v", \%opts) ) { +if ( !getopts("th:n:p:r:s:b:w:vd", \%opts) ) { print STDERR <{$c} = which($c) || die "$0 needs $c, install it\n"; +} + my @Backups = $bpc->BackupInfoRead($Host); my $FileCnt = 0; my $ByteCnt = 0; my $DirCnt = 0; my $SpecialCnt = 0; my $ErrorCnt = 0; +my $current_tar_size = 0; my $i; $Num = $Backups[@Backups + $Num]{num} if ( -@Backups <= $Num && $Num < 0 ); @@ -163,14 +173,77 @@ # # Write out all the requested files/directories # -binmode(STDOUT); -my $fh = *STDOUT; + +my $max_file_size = $Conf{'MaxArchiveFileSize'} || die "problem with MaxArchiveFileSize parametar"; +$max_file_size *= 1024; + +my $tar_dir = $Conf{InstallDir}.'/'.$Conf{GzipTempDir}; +die "problem with $tar_dir, check GzipTempDir in configuration\n" unless (-d $tar_dir && -w $tar_dir); + +my $tar_file = BackupPC::SearchLib::getGzipName($Host, $ShareName, $Num) || die "can't getGzipName($Host, $ShareName, $Num)"; + +my $tar_path = $tar_dir . '/' . $tar_file . '.tmp'; +$tar_path =~ s#//#/#g; + +print STDERR "working dir: $tar_dir, max uncompressed size $max_file_size bytes, tar $tar_file\n" if ($opts{d}); + +my $fh; +my $part = 0; +my $no_files = 0; + +sub new_tar_part { + if ($fh) { + return if ($current_tar_size == 0); + + print STDERR "# closing part $part\n" if ($opts{d}); + + # finish tar archive + my $data = "\0" x ($tar_header_length * 2); + TarWrite($fh, \$data); + TarWrite($fh, undef); + + close($fh) || die "can't close archive part $part: $!"; + } + + $part++; + + # if this is first part, create directory + + if ($part == 1) { + if (-d $tar_path) { + print STDERR "# deleting existing $tar_path\n" if ($opts{d}); + rmtree($tar_path); + } + mkdir($tar_path) || die "can't create directory $tar_path: $!"; + } + + my $file = $tar_path . '/' . $part; + + # + # create comprex pipe which will pass output through gzip + # for compression, create file on disk using tee + # and pipe same output to md5sum to create checksum + # + + my $cmd = '| ' . $bin->{'gzip'} . ' ' . $Conf{GzipLevel} . ' ' . + '| ' . $bin->{'tee'} . ' ' . $file . '.tar.gz' . ' ' . + '| ' . $bin->{'md5sum'} . ' - > ' . $file . '.md5'; + + print STDERR "## $cmd\n" if ($opts{d}); + + open($fh, $cmd) or die "can't open $cmd: $!"; + binmode($fh); + $current_tar_size = 0; +} + +new_tar_part(); if (seedCache($Host, $ShareName, $Num)) { archiveWrite($fh, '/'); archiveWriteHardLinks($fh); } else { print STDERR "NOTE: no files found for $Host:$ShareName, increment $Num\n" if ($opts{v}); + $no_files = 1; } # @@ -181,6 +254,18 @@ TarWrite($fh, \$data); TarWrite($fh, undef); +if (! close($fh)) { + rmtree($tar_path); + die "can't close archive\n"; +} + +# remove temporary files if there are no files +if ($no_files) { + rmtree($tar_path); +} elsif ($part == 1) { + warn "FIXME: if there is only one part move to parent directory and rename"; +} + # # print out totals if requested # @@ -193,8 +278,10 @@ # Got errors, with no files or directories; exit with non-zero # status # + cleanup(); exit(1); } + exit(0); ########################################################################### @@ -260,6 +347,7 @@ { my($fh, $dataRef) = @_; + if ( !defined($dataRef) ) { # # do flush by padding to a full $WriteBufSz @@ -267,6 +355,10 @@ my $data = "\0" x ($WriteBufSz - length($WriteBuf)); $dataRef = \$data; } + + # poor man's tell :-) + $current_tar_size += length($$dataRef); + if ( length($WriteBuf) + length($$dataRef) < $WriteBufSz ) { # # just buffer and return @@ -394,7 +486,7 @@ print STDERR curr_time(), "getting files for $host:$share increment $dumpNo..." if ($opts{v}); my $sql = q{ - SELECT path + SELECT path,size FROM files JOIN shares on shares.id = shareid JOIN hosts on hosts.id = shares.hostid @@ -408,7 +500,7 @@ print STDERR " found $count items\n" if ($opts{v}); while (my $row = $sth->fetchrow_arrayref) { #print STDERR "+ ", $row->[0],"\n"; - $in_backup_increment->{ $row->[0] }++; + $in_backup_increment->{ $row->[0] } = $row->[1]; } $sth->finish(); @@ -417,6 +509,24 @@ return $count; } +# +# calculate overhad for one file in tar +# +sub tar_overhead($) { + my $name = shift || ''; + + # header, padding of file and two null blocks at end + my $len = 4 * $tar_header_length; + + # if filename is longer than 99 chars subtract blocks for + # long filename + if ( length($name) > 99 ) { + $len += int( ( length($name) + $tar_header_length ) / $tar_header_length ) * $tar_header_length; + } + + return $len; +} + my $Attr; my $AttrDir; @@ -429,9 +539,18 @@ $tarPath =~ s{//+}{/}g; -#print STDERR "? $tarPath\n"; - return unless ($in_backup_increment->{$tarPath}); -#print STDERR "A $tarPath\n"; + #print STDERR "? $tarPath\n" if ($opts{d}); + my $size = $in_backup_increment->{$tarPath}; + return unless (defined($size)); + + # is this file too large to fit into MaxArchiveFileSize? + + if ( ($current_tar_size + tar_overhead($tarPath) + $size) > $max_file_size ) { + print STDERR "# tar file $current_tar_size + $tar_header_length + $size > $max_file_size, splitting\n" if ($opts{d}); + new_tar_part(); + } + + print STDERR "A $tarPath [$size] tell: $current_tar_size\n" if ($opts{d}); if ( defined($PathRemove) && substr($tarPath, 0, length($PathRemove)) eq $PathRemove ) { @@ -445,8 +564,6 @@ # # Directory: just write the header # - - $hdr->{name} .= "/" if ( $hdr->{name} !~ m{/$} ); TarWriteFileInfo($fh, $hdr); $DirCnt++; @@ -460,24 +577,66 @@ $ErrorCnt++; return; } - TarWriteFileInfo($fh, $hdr); - my($data, $size); - while ( $f->read(\$data, $BufSize) > 0 ) { - TarWrite($fh, \$data); - $size += length($data); - } - $f->close; - TarWritePad($fh, $size); + # do we need to split file? + if ($hdr->{size} < $max_file_size) { + TarWriteFileInfo($fh, $hdr); + my($data, $size); + while ( $f->read(\$data, $BufSize) > 0 ) { + TarWrite($fh, \$data); + $size += length($data); + } + $f->close; + TarWritePad($fh, $size); $FileCnt++; $ByteCnt += $size; + } else { + my $full_size = $hdr->{size}; + my $orig_name = $hdr->{name}; + my $max_part_size = $max_file_size - tar_overhead($hdr->{name}); + + my $parts = int(($full_size + $max_part_size - 1) / $max_part_size); + print STDERR "# splitting $orig_name [$full_size bytes] into $parts parts\n" if ($opts{d}); + foreach my $subpart ( 1 .. $parts ) { + new_tar_part(); + if ($subpart < $parts) { + $hdr->{size} = $max_part_size; + } else { + $hdr->{size} = $full_size % $max_part_size; + } + $hdr->{name} = $orig_name . '/' . $subpart; + print STDERR "## creating part $subpart ",$hdr->{name}, " [", $hdr->{size}," bytes]\n"; + + TarWriteFileInfo($fh, $hdr); + my($data, $size); +if (0) { + for ( 1 .. int($hdr->{size} / $BufSize) ) { + my $r_size = $f->read(\$data, $BufSize); + die "expected $BufSize bytes read, got $r_size bytes!" if ($r_size != $BufSize); + TarWrite($fh, \$data); + $size += length($data); + } +} + my $size_left = $hdr->{size} % $BufSize; + my $r_size = $f->read(\$data, $size_left); + die "expected $size_left bytes last read, got $r_size bytes!" if ($r_size != $size_left); + + TarWrite($fh, \$data); + $size += length($data); + TarWritePad($fh, $size); + } + $f->close; + $FileCnt++; + $ByteCnt += $full_size; + new_tar_part(); + } } elsif ( $hdr->{type} == BPC_FTYPE_HARDLINK ) { # # Hardlink file: either write a hardlink or the complete file - # depending upon whether the linked-to file will be written - # to the archive. + # depending upon whether the linked-to file will be written + # to the archive. # - # Start by reading the contents of the link. - # + # Start by reading the contents of the link. + # my $f = BackupPC::FileZIO->open($hdr->{fullPath}, 0, $hdr->{compress}); if ( !defined($f) ) { print(STDERR "Unable to open file $hdr->{fullPath}\n"); @@ -488,26 +647,26 @@ while ( $f->read(\$data, $BufSize) > 0 ) { $hdr->{linkname} .= $data; } - $f->close; - my $done = 0; - my $name = $hdr->{linkname}; - $name =~ s{^\./}{/}; - if ( $HardLinkExtraFiles{$name} ) { - # - # Target file will be or was written, so just remember - # the hardlink so we can dump it later. - # - push(@HardLinks, $hdr); - $SpecialCnt++; - } else { - # - # Have to dump the original file. Just call the top-level - # routine, so that we save the hassle of dealing with - # mangling, merging and attributes. - # - $HardLinkExtraFiles{$hdr->{linkname}} = 1; - archiveWrite($fh, $hdr->{linkname}, $hdr->{name}); - } + $f->close; + my $done = 0; + my $name = $hdr->{linkname}; + $name =~ s{^\./}{/}; + if ( $HardLinkExtraFiles{$name} ) { + # + # Target file will be or was written, so just remember + # the hardlink so we can dump it later. + # + push(@HardLinks, $hdr); + $SpecialCnt++; + } else { + # + # Have to dump the original file. Just call the top-level + # routine, so that we save the hassle of dealing with + # mangling, merging and attributes. + # + $HardLinkExtraFiles{$hdr->{linkname}} = 1; + archiveWrite($fh, $hdr->{linkname}, $hdr->{name}); + } } elsif ( $hdr->{type} == BPC_FTYPE_SYMLINK ) { # # Symbolic link: read the symbolic link contents into the header