/[BackupPC]/trunk/lib/BackupPC/Xfer/RsyncDigest.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/BackupPC/Xfer/RsyncDigest.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (hide annotations)
Wed Jun 22 19:12:04 2005 UTC (19 years ago) by dpavlin
Original Path: upstream/2.1.0/lib/BackupPC/Xfer/RsyncDigest.pm
File size: 13573 byte(s)
import of version 2.1.0

1 dpavlin 1 #============================================================= -*-perl-*-
2     #
3     # BackupPC::Xfer::RsyncDigest package
4     #
5     # DESCRIPTION
6     #
7     # This library defines a BackupPC::Xfer::RsyncDigest class for computing
8     # and caching rsync checksums.
9     #
10     # AUTHOR
11     # Craig Barratt <cbarratt@users.sourceforge.net>
12     #
13     # COPYRIGHT
14     # Copyright (C) 2001-2003 Craig Barratt
15     #
16     # This program is free software; you can redistribute it and/or modify
17     # it under the terms of the GNU General Public License as published by
18     # the Free Software Foundation; either version 2 of the License, or
19     # (at your option) any later version.
20     #
21     # This program is distributed in the hope that it will be useful,
22     # but WITHOUT ANY WARRANTY; without even the implied warranty of
23     # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24     # GNU General Public License for more details.
25     #
26     # You should have received a copy of the GNU General Public License
27     # along with this program; if not, write to the Free Software
28     # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29     #
30     #========================================================================
31     #
32     # Version 2.1.0, released 20 Jun 2004.
33     #
34     # See http://backuppc.sourceforge.net.
35     #
36     #========================================================================
37    
38     package BackupPC::Xfer::RsyncDigest;
39    
40     use strict;
41     use BackupPC::FileZIO;
42    
43     use vars qw( $RsyncLibOK );
44     use Carp;
45     require Exporter;
46     use vars qw( @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS );
47    
48     my $Log = \&logHandler;
49    
50     #
51     # Magic value for checksum seed. We only cache block and file digests
52     # when the checksum seed matches this value.
53     #
54     use constant RSYNC_CSUMSEED_CACHE => 32761;
55    
56     @ISA = qw(Exporter);
57    
58     @EXPORT = qw( );
59    
60     @EXPORT_OK = qw(
61     RSYNC_CSUMSEED_CACHE
62     );
63    
64     %EXPORT_TAGS = (
65     'all' => [ @EXPORT_OK ],
66     );
67    
68     BEGIN {
69     eval "use File::RsyncP;";
70     if ( $@ ) {
71     #
72     # File::RsyncP doesn't exist. Define some dummy constant
73     # subs so that the code below doesn't barf.
74     #
75     $RsyncLibOK = 0;
76     } else {
77     $RsyncLibOK = 1;
78     }
79     };
80    
81     #
82     # Return the rsync block size based on the file size.
83     # We also make sure the block size plus 4 (ie: cheeksumSeed)
84     # is not a multiple of 64 - otherwise the cached checksums
85     # will not be the same for protocol versions <= 26 and > 26.
86     #
87     sub blockSize
88     {
89     my($class, $fileSize, $defaultBlkSize) = @_;
90    
91     my $blkSize = int($fileSize / 10000);
92     $blkSize = $defaultBlkSize if ( $blkSize < $defaultBlkSize );
93     $blkSize = 16384 if ( $blkSize > 16384 );
94     $blkSize += 4 if ( (($blkSize + 4) % 64) == 0 );
95     return $blkSize;
96     }
97    
98     sub fileDigestIsCached
99     {
100     my($class, $file) = @_;
101     my $data;
102    
103     open(my $fh, "<", $file) || return -1;
104     binmode($fh);
105     return -2 if ( sysread($fh, $data, 1) != 1 );
106     close($fh);
107     return $data eq chr(0xd6) ? 1 : 0;
108     }
109    
110     #
111     # Compute and add rsync block and file digests to the given file.
112     #
113     # Empty files don't get cached checksums.
114     #
115     # If verify is set then existing cached checksums are checked.
116     #
117     # Returns 0 on success. Returns 1 on good verify and 2 on bad verify.
118     # Returns a variety of negative values on error.
119     #
120     sub digestAdd
121     {
122     my($class, $file, $blockSize, $checksumSeed, $verify) = @_;
123     my $retValue = 0;
124    
125     #
126     # Don't cache checksums if the checksumSeed is not RSYNC_CSUMSEED_CACHE
127     # or if the file is empty.
128     #
129     return -100 if ( $checksumSeed != RSYNC_CSUMSEED_CACHE || !-s $file );
130    
131     if ( $blockSize == 0 ) {
132     &$Log("digestAdd: bad blockSize ($file, $blockSize, $checksumSeed)");
133     $blockSize = 2048;
134     }
135     my $nBlks = int(65536 * 16 / $blockSize) + 1;
136     my($data, $blockDigest, $fileDigest);
137    
138     return -101 if ( !$RsyncLibOK );
139    
140     my $digest = File::RsyncP::Digest->new;
141     $digest->add(pack("V", $checksumSeed)) if ( $checksumSeed );
142    
143     return -102 if ( !defined(my $fh = BackupPC::FileZIO->open($file, 0, 1)) );
144    
145     while ( 1 ) {
146     $fh->read(\$data, $nBlks * $blockSize);
147     last if ( $data eq "" );
148     $blockDigest .= $digest->blockDigest($data, $blockSize, 16,
149     $checksumSeed);
150     $digest->add($data);
151     }
152     $fileDigest = $digest->digest2;
153     my $eofPosn = sysseek($fh->{fh}, 0, 1);
154     $fh->close;
155     my $rsyncData = $blockDigest . $fileDigest;
156     my $metaData = pack("VVVV", $blockSize,
157     $checksumSeed,
158     length($blockDigest) / 20,
159     0x5fe3c289, # magic number
160     );
161     my $data2 = chr(0xb3) . $rsyncData . $metaData;
162     # printf("appending %d+%d bytes to %s at offset %d\n",
163     # length($rsyncData),
164     # length($metaData),
165     # $file,
166     # $eofPosn);
167     open(my $fh2, "+<", $file) || return -103;
168     binmode($fh2);
169     return -104 if ( sysread($fh2, $data, 1) != 1 );
170     if ( $data ne chr(0x78) && $data ne chr(0xd6) ) {
171     &$Log(sprintf("digestAdd: $file has unexpected first char 0x%x",
172     ord($data)));
173     return -105;
174     }
175     return -106 if ( sysseek($fh2, $eofPosn, 0) != $eofPosn );
176     if ( $verify ) {
177     my $data3;
178    
179     #
180     # Verify the cached checksums
181     #
182     return -107 if ( $data ne chr(0xd6) );
183     return -108 if ( sysread($fh2, $data3, length($data2) + 1) < 0 );
184     if ( $data2 eq $data3 ) {
185     return 1;
186     }
187     #
188     # Checksums don't agree - fall through so we rewrite the data
189     #
190     &$Log("digestAdd: $file verify failed; redoing checksums");
191     return -109 if ( sysseek($fh2, $eofPosn, 0) != $eofPosn );
192     $retValue = 2;
193     }
194     return -110 if ( syswrite($fh2, $data2) != length($data2) );
195     if ( $verify ) {
196     #
197     # Make sure there is no extraneous data on the end of
198     # the file. Seek to the end and truncate if it doesn't
199     # match our expected length.
200     #
201     return -111 if ( !defined(sysseek($fh2, 0, 2)) );
202     if ( sysseek($fh2, 0, 1) != $eofPosn + length($data2) ) {
203     if ( !truncate($fh2, $eofPosn + length($data2)) ) {
204     &$Log(sprintf("digestAdd: $file truncate from %d to %d failed",
205     sysseek($fh2, 0, 1), $eofPosn + length($data2)));
206     return -112;
207     } else {
208     &$Log(sprintf("digestAdd: $file truncated from %d to %d",
209     sysseek($fh2, 0, 1), $eofPosn + length($data2)));
210     }
211     }
212     }
213     return -113 if ( !defined(sysseek($fh2, 0, 0)) );
214     return -114 if ( syswrite($fh2, chr(0xd6)) != 1 );
215     close($fh2);
216     return $retValue;
217     }
218    
219     #
220     # Return rsync checksums for the given file. We read the cached checksums
221     # if they exist and the block size and checksum seed match. Otherwise
222     # we compute the checksums from the file contents.
223     #
224     # The doCache flag can take three ranges:
225     #
226     # - doCache < 0: don't generate/use cached checksums
227     # - doCache == 0: don't generate, but do use cached checksums if available
228     # - doCache > 0: generate (if necessary) and use cached checksums
229     #
230     # Note: caching is only enabled when compression is on and the
231     # checksum seed is RSYNC_CSUMSEED_CACHE (32761).
232     #
233     # Returns 0 on success. Returns a variety of negative values on error.
234     #
235     sub digestStart
236     {
237     my($class, $fileName, $fileSize, $blockSize, $defBlkSize,
238     $checksumSeed, $needMD4, $compress, $doCache) = @_;
239    
240     return -1 if ( !$RsyncLibOK );
241    
242     my $data;
243    
244     my $dg = bless {
245     name => $fileName,
246     needMD4 => $needMD4,
247     digest => File::RsyncP::Digest->new,
248     }, $class;
249    
250     if ( $fileSize > 0 && $compress && $doCache >= 0 ) {
251     open(my $fh, "<", $fileName) || return -2;
252     binmode($fh);
253     return -3 if ( read($fh, $data, 1) != 1 );
254     my $ret;
255    
256     if ( $data eq chr(0x78) && $doCache > 0
257     && $checksumSeed == RSYNC_CSUMSEED_CACHE ) {
258     #
259     # RSYNC_CSUMSEED_CACHE (32761) is the magic number that
260     # rsync uses for checksumSeed with the --fixed-csum option.
261     #
262     # We now add the cached checksum data to the file. There
263     # is a possible race condition here since two BackupPC_dump
264     # processes might call this function at the same time
265     # on the same file. But this should be ok since both
266     # processes will write the same data, and the order
267     # in which they write it doesn't matter.
268     #
269     close($fh);
270     $ret = $dg->digestAdd($fileName,
271     $blockSize
272     || BackupPC::Xfer::RsyncDigest->blockSize(
273     $fileSize, $defBlkSize),
274     $checksumSeed);
275     if ( $ret < 0 ) {
276     &$Log("digestAdd($fileName) failed ($ret)");
277     }
278     #
279     # now re-open the file and re-read the first byte
280     #
281     open($fh, "<", $fileName) || return -4;
282     binmode($fh);
283     return -5 if ( read($fh, $data, 1) != 1 );
284     }
285     if ( $ret >= 0 && $data eq chr(0xd6) ) {
286     #
287     # Looks like this file has cached checksums
288     # Read the last 48 bytes: that's 2 file MD4s (32 bytes)
289     # plus 4 words of meta data
290     #
291     return -6 if ( !defined(seek($fh, -48, 2)) );
292     return -7 if ( read($fh, $data, 48) != 48 );
293     ($dg->{md4DigestOld},
294     $dg->{md4Digest},
295     $dg->{blockSize},
296     $dg->{checksumSeed},
297     $dg->{nBlocks},
298     $dg->{magic}) = unpack("a16 a16 V V V V", $data);
299     if ( $dg->{magic} == 0x5fe3c289
300     && $dg->{checksumSeed} == $checksumSeed
301     && ($blockSize == 0 || $dg->{blockSize} == $blockSize) ) {
302     $dg->{fh} = $fh;
303     $dg->{cached} = 1;
304     #
305     # position the file at the start of the rsync block checksums
306     # (4 (adler) + 16 (md4) bytes each)
307     #
308     return -8
309     if ( !defined(seek($fh, -$dg->{nBlocks}*20 - 48, 2)) );
310     } else {
311     #
312     # cached checksums are not valid, so we close the
313     # file and treat it as uncached.
314     #
315     $dg->{cachedInvalid} = 1;
316     close($fh);
317     }
318     }
319     }
320     if ( !$dg->{cached} ) {
321     #
322     # This file doesn't have cached checksums, or the checksumSeed
323     # or blocksize doesn't match. Open the file and prepare to
324     # compute the checksums.
325     #
326     $blockSize
327     = BackupPC::Xfer::RsyncDigest->blockSize($fileSize, $defBlkSize)
328     if ( $blockSize == 0 );
329     $dg->{checksumSeed} = $checksumSeed;
330     $dg->{blockSize} = $blockSize;
331     $dg->{fh} = BackupPC::FileZIO->open($fileName, 0, $compress);
332     return -9 if ( !defined($dg->{fh}) );
333     if ( $needMD4) {
334     $dg->{csumDigest} = File::RsyncP::Digest->new;
335     $dg->{csumDigest}->add(pack("V", $dg->{checksumSeed}));
336     }
337     }
338     return (undef, $dg, $dg->{blockSize});
339     }
340    
341     sub digestGet
342     {
343     my($dg, $num, $csumLen, $noPad) = @_;
344     my($fileData);
345     my $blockSize = $dg->{blockSize};
346    
347     if ( $dg->{cached} ) {
348     my $thisNum = $num;
349     $thisNum = $dg->{nBlocks} if ( $thisNum > $dg->{nBlocks} );
350     read($dg->{fh}, $fileData, 20 * $thisNum);
351     $dg->{nBlocks} -= $thisNum;
352     if ( $thisNum < $num && !$noPad) {
353     #
354     # unexpected shortfall of data; pad with zero digest
355     #
356     $fileData .= pack("c", 0) x (20 * ($num - $thisNum));
357     }
358     return $dg->{digest}->blockDigestExtract($fileData, $csumLen);
359     } else {
360     if ( $dg->{fh}->read(\$fileData, $blockSize * $num) <= 0 ) {
361     #
362     # unexpected shortfall of data; pad with zeros
363     #
364     $fileData = pack("c", 0) x ($blockSize * $num) if ( !$noPad );
365     }
366     $dg->{csumDigest}->add($fileData) if ( $dg->{needMD4} );
367     return $dg->{digest}->blockDigest($fileData, $blockSize,
368     $csumLen, $dg->{checksumSeed});
369     }
370     }
371    
372     sub digestEnd
373     {
374     my($dg, $skipMD4) = @_;
375     my($fileData);
376    
377     if ( $dg->{cached} ) {
378     close($dg->{fh});
379     return $dg->{md4DigestOld} if ( $dg->{needMD4} );
380     } else {
381     #
382     # make sure we read the entire file for the file MD4 digest
383     #
384     if ( $dg->{needMD4} && !$skipMD4 ) {
385     my $fileData;
386     while ( $dg->{fh}->read(\$fileData, 65536) > 0 ) {
387     $dg->{csumDigest}->add($fileData);
388     }
389     }
390     $dg->{fh}->close();
391     return $dg->{csumDigest}->digest if ( $dg->{needMD4} );
392     }
393     }
394    
395     sub isCached
396     {
397     my($dg) = @_;
398    
399     return wantarray ? ($dg->{cached}, $dg->{cachedInvalid}) : $dg->{cached};
400     }
401    
402     sub blockSizeCurr
403     {
404     my($dg) = @_;
405    
406     return $dg->{blockSize};
407     }
408    
409     #
410     # Default log handler
411     #
412     sub logHandler
413     {
414     my($str) = @_;
415    
416     print(STDERR $str, "\n");
417     }
418    
419     #
420     # Set log handler to a new subroutine.
421     #
422     sub logHandlerSet
423     {
424     my($sub) = @_;
425    
426     $Log = $sub;
427     }
428    
429     1;

  ViewVC Help
Powered by ViewVC 1.1.26