/[BackupPC]/upstream/2.1.0/lib/BackupPC/Xfer/RsyncDigest.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /upstream/2.1.0/lib/BackupPC/Xfer/RsyncDigest.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (show annotations)
Wed Jun 22 19:12:04 2005 UTC (18 years, 10 months ago) by dpavlin
File size: 13573 byte(s)
import of version 2.1.0

1 #============================================================= -*-perl-*-
2 #
3 # BackupPC::Xfer::RsyncDigest package
4 #
5 # DESCRIPTION
6 #
7 # This library defines a BackupPC::Xfer::RsyncDigest class for computing
8 # and caching rsync checksums.
9 #
10 # AUTHOR
11 # Craig Barratt <cbarratt@users.sourceforge.net>
12 #
13 # COPYRIGHT
14 # Copyright (C) 2001-2003 Craig Barratt
15 #
16 # This program is free software; you can redistribute it and/or modify
17 # it under the terms of the GNU General Public License as published by
18 # the Free Software Foundation; either version 2 of the License, or
19 # (at your option) any later version.
20 #
21 # This program is distributed in the hope that it will be useful,
22 # but WITHOUT ANY WARRANTY; without even the implied warranty of
23 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 # GNU General Public License for more details.
25 #
26 # You should have received a copy of the GNU General Public License
27 # along with this program; if not, write to the Free Software
28 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 #
30 #========================================================================
31 #
32 # Version 2.1.0, released 20 Jun 2004.
33 #
34 # See http://backuppc.sourceforge.net.
35 #
36 #========================================================================
37
38 package BackupPC::Xfer::RsyncDigest;
39
40 use strict;
41 use BackupPC::FileZIO;
42
43 use vars qw( $RsyncLibOK );
44 use Carp;
45 require Exporter;
46 use vars qw( @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS );
47
48 my $Log = \&logHandler;
49
50 #
51 # Magic value for checksum seed. We only cache block and file digests
52 # when the checksum seed matches this value.
53 #
54 use constant RSYNC_CSUMSEED_CACHE => 32761;
55
56 @ISA = qw(Exporter);
57
58 @EXPORT = qw( );
59
60 @EXPORT_OK = qw(
61 RSYNC_CSUMSEED_CACHE
62 );
63
64 %EXPORT_TAGS = (
65 'all' => [ @EXPORT_OK ],
66 );
67
68 BEGIN {
69 eval "use File::RsyncP;";
70 if ( $@ ) {
71 #
72 # File::RsyncP doesn't exist. Define some dummy constant
73 # subs so that the code below doesn't barf.
74 #
75 $RsyncLibOK = 0;
76 } else {
77 $RsyncLibOK = 1;
78 }
79 };
80
81 #
82 # Return the rsync block size based on the file size.
83 # We also make sure the block size plus 4 (ie: cheeksumSeed)
84 # is not a multiple of 64 - otherwise the cached checksums
85 # will not be the same for protocol versions <= 26 and > 26.
86 #
87 sub blockSize
88 {
89 my($class, $fileSize, $defaultBlkSize) = @_;
90
91 my $blkSize = int($fileSize / 10000);
92 $blkSize = $defaultBlkSize if ( $blkSize < $defaultBlkSize );
93 $blkSize = 16384 if ( $blkSize > 16384 );
94 $blkSize += 4 if ( (($blkSize + 4) % 64) == 0 );
95 return $blkSize;
96 }
97
98 sub fileDigestIsCached
99 {
100 my($class, $file) = @_;
101 my $data;
102
103 open(my $fh, "<", $file) || return -1;
104 binmode($fh);
105 return -2 if ( sysread($fh, $data, 1) != 1 );
106 close($fh);
107 return $data eq chr(0xd6) ? 1 : 0;
108 }
109
110 #
111 # Compute and add rsync block and file digests to the given file.
112 #
113 # Empty files don't get cached checksums.
114 #
115 # If verify is set then existing cached checksums are checked.
116 #
117 # Returns 0 on success. Returns 1 on good verify and 2 on bad verify.
118 # Returns a variety of negative values on error.
119 #
120 sub digestAdd
121 {
122 my($class, $file, $blockSize, $checksumSeed, $verify) = @_;
123 my $retValue = 0;
124
125 #
126 # Don't cache checksums if the checksumSeed is not RSYNC_CSUMSEED_CACHE
127 # or if the file is empty.
128 #
129 return -100 if ( $checksumSeed != RSYNC_CSUMSEED_CACHE || !-s $file );
130
131 if ( $blockSize == 0 ) {
132 &$Log("digestAdd: bad blockSize ($file, $blockSize, $checksumSeed)");
133 $blockSize = 2048;
134 }
135 my $nBlks = int(65536 * 16 / $blockSize) + 1;
136 my($data, $blockDigest, $fileDigest);
137
138 return -101 if ( !$RsyncLibOK );
139
140 my $digest = File::RsyncP::Digest->new;
141 $digest->add(pack("V", $checksumSeed)) if ( $checksumSeed );
142
143 return -102 if ( !defined(my $fh = BackupPC::FileZIO->open($file, 0, 1)) );
144
145 while ( 1 ) {
146 $fh->read(\$data, $nBlks * $blockSize);
147 last if ( $data eq "" );
148 $blockDigest .= $digest->blockDigest($data, $blockSize, 16,
149 $checksumSeed);
150 $digest->add($data);
151 }
152 $fileDigest = $digest->digest2;
153 my $eofPosn = sysseek($fh->{fh}, 0, 1);
154 $fh->close;
155 my $rsyncData = $blockDigest . $fileDigest;
156 my $metaData = pack("VVVV", $blockSize,
157 $checksumSeed,
158 length($blockDigest) / 20,
159 0x5fe3c289, # magic number
160 );
161 my $data2 = chr(0xb3) . $rsyncData . $metaData;
162 # printf("appending %d+%d bytes to %s at offset %d\n",
163 # length($rsyncData),
164 # length($metaData),
165 # $file,
166 # $eofPosn);
167 open(my $fh2, "+<", $file) || return -103;
168 binmode($fh2);
169 return -104 if ( sysread($fh2, $data, 1) != 1 );
170 if ( $data ne chr(0x78) && $data ne chr(0xd6) ) {
171 &$Log(sprintf("digestAdd: $file has unexpected first char 0x%x",
172 ord($data)));
173 return -105;
174 }
175 return -106 if ( sysseek($fh2, $eofPosn, 0) != $eofPosn );
176 if ( $verify ) {
177 my $data3;
178
179 #
180 # Verify the cached checksums
181 #
182 return -107 if ( $data ne chr(0xd6) );
183 return -108 if ( sysread($fh2, $data3, length($data2) + 1) < 0 );
184 if ( $data2 eq $data3 ) {
185 return 1;
186 }
187 #
188 # Checksums don't agree - fall through so we rewrite the data
189 #
190 &$Log("digestAdd: $file verify failed; redoing checksums");
191 return -109 if ( sysseek($fh2, $eofPosn, 0) != $eofPosn );
192 $retValue = 2;
193 }
194 return -110 if ( syswrite($fh2, $data2) != length($data2) );
195 if ( $verify ) {
196 #
197 # Make sure there is no extraneous data on the end of
198 # the file. Seek to the end and truncate if it doesn't
199 # match our expected length.
200 #
201 return -111 if ( !defined(sysseek($fh2, 0, 2)) );
202 if ( sysseek($fh2, 0, 1) != $eofPosn + length($data2) ) {
203 if ( !truncate($fh2, $eofPosn + length($data2)) ) {
204 &$Log(sprintf("digestAdd: $file truncate from %d to %d failed",
205 sysseek($fh2, 0, 1), $eofPosn + length($data2)));
206 return -112;
207 } else {
208 &$Log(sprintf("digestAdd: $file truncated from %d to %d",
209 sysseek($fh2, 0, 1), $eofPosn + length($data2)));
210 }
211 }
212 }
213 return -113 if ( !defined(sysseek($fh2, 0, 0)) );
214 return -114 if ( syswrite($fh2, chr(0xd6)) != 1 );
215 close($fh2);
216 return $retValue;
217 }
218
219 #
220 # Return rsync checksums for the given file. We read the cached checksums
221 # if they exist and the block size and checksum seed match. Otherwise
222 # we compute the checksums from the file contents.
223 #
224 # The doCache flag can take three ranges:
225 #
226 # - doCache < 0: don't generate/use cached checksums
227 # - doCache == 0: don't generate, but do use cached checksums if available
228 # - doCache > 0: generate (if necessary) and use cached checksums
229 #
230 # Note: caching is only enabled when compression is on and the
231 # checksum seed is RSYNC_CSUMSEED_CACHE (32761).
232 #
233 # Returns 0 on success. Returns a variety of negative values on error.
234 #
235 sub digestStart
236 {
237 my($class, $fileName, $fileSize, $blockSize, $defBlkSize,
238 $checksumSeed, $needMD4, $compress, $doCache) = @_;
239
240 return -1 if ( !$RsyncLibOK );
241
242 my $data;
243
244 my $dg = bless {
245 name => $fileName,
246 needMD4 => $needMD4,
247 digest => File::RsyncP::Digest->new,
248 }, $class;
249
250 if ( $fileSize > 0 && $compress && $doCache >= 0 ) {
251 open(my $fh, "<", $fileName) || return -2;
252 binmode($fh);
253 return -3 if ( read($fh, $data, 1) != 1 );
254 my $ret;
255
256 if ( $data eq chr(0x78) && $doCache > 0
257 && $checksumSeed == RSYNC_CSUMSEED_CACHE ) {
258 #
259 # RSYNC_CSUMSEED_CACHE (32761) is the magic number that
260 # rsync uses for checksumSeed with the --fixed-csum option.
261 #
262 # We now add the cached checksum data to the file. There
263 # is a possible race condition here since two BackupPC_dump
264 # processes might call this function at the same time
265 # on the same file. But this should be ok since both
266 # processes will write the same data, and the order
267 # in which they write it doesn't matter.
268 #
269 close($fh);
270 $ret = $dg->digestAdd($fileName,
271 $blockSize
272 || BackupPC::Xfer::RsyncDigest->blockSize(
273 $fileSize, $defBlkSize),
274 $checksumSeed);
275 if ( $ret < 0 ) {
276 &$Log("digestAdd($fileName) failed ($ret)");
277 }
278 #
279 # now re-open the file and re-read the first byte
280 #
281 open($fh, "<", $fileName) || return -4;
282 binmode($fh);
283 return -5 if ( read($fh, $data, 1) != 1 );
284 }
285 if ( $ret >= 0 && $data eq chr(0xd6) ) {
286 #
287 # Looks like this file has cached checksums
288 # Read the last 48 bytes: that's 2 file MD4s (32 bytes)
289 # plus 4 words of meta data
290 #
291 return -6 if ( !defined(seek($fh, -48, 2)) );
292 return -7 if ( read($fh, $data, 48) != 48 );
293 ($dg->{md4DigestOld},
294 $dg->{md4Digest},
295 $dg->{blockSize},
296 $dg->{checksumSeed},
297 $dg->{nBlocks},
298 $dg->{magic}) = unpack("a16 a16 V V V V", $data);
299 if ( $dg->{magic} == 0x5fe3c289
300 && $dg->{checksumSeed} == $checksumSeed
301 && ($blockSize == 0 || $dg->{blockSize} == $blockSize) ) {
302 $dg->{fh} = $fh;
303 $dg->{cached} = 1;
304 #
305 # position the file at the start of the rsync block checksums
306 # (4 (adler) + 16 (md4) bytes each)
307 #
308 return -8
309 if ( !defined(seek($fh, -$dg->{nBlocks}*20 - 48, 2)) );
310 } else {
311 #
312 # cached checksums are not valid, so we close the
313 # file and treat it as uncached.
314 #
315 $dg->{cachedInvalid} = 1;
316 close($fh);
317 }
318 }
319 }
320 if ( !$dg->{cached} ) {
321 #
322 # This file doesn't have cached checksums, or the checksumSeed
323 # or blocksize doesn't match. Open the file and prepare to
324 # compute the checksums.
325 #
326 $blockSize
327 = BackupPC::Xfer::RsyncDigest->blockSize($fileSize, $defBlkSize)
328 if ( $blockSize == 0 );
329 $dg->{checksumSeed} = $checksumSeed;
330 $dg->{blockSize} = $blockSize;
331 $dg->{fh} = BackupPC::FileZIO->open($fileName, 0, $compress);
332 return -9 if ( !defined($dg->{fh}) );
333 if ( $needMD4) {
334 $dg->{csumDigest} = File::RsyncP::Digest->new;
335 $dg->{csumDigest}->add(pack("V", $dg->{checksumSeed}));
336 }
337 }
338 return (undef, $dg, $dg->{blockSize});
339 }
340
341 sub digestGet
342 {
343 my($dg, $num, $csumLen, $noPad) = @_;
344 my($fileData);
345 my $blockSize = $dg->{blockSize};
346
347 if ( $dg->{cached} ) {
348 my $thisNum = $num;
349 $thisNum = $dg->{nBlocks} if ( $thisNum > $dg->{nBlocks} );
350 read($dg->{fh}, $fileData, 20 * $thisNum);
351 $dg->{nBlocks} -= $thisNum;
352 if ( $thisNum < $num && !$noPad) {
353 #
354 # unexpected shortfall of data; pad with zero digest
355 #
356 $fileData .= pack("c", 0) x (20 * ($num - $thisNum));
357 }
358 return $dg->{digest}->blockDigestExtract($fileData, $csumLen);
359 } else {
360 if ( $dg->{fh}->read(\$fileData, $blockSize * $num) <= 0 ) {
361 #
362 # unexpected shortfall of data; pad with zeros
363 #
364 $fileData = pack("c", 0) x ($blockSize * $num) if ( !$noPad );
365 }
366 $dg->{csumDigest}->add($fileData) if ( $dg->{needMD4} );
367 return $dg->{digest}->blockDigest($fileData, $blockSize,
368 $csumLen, $dg->{checksumSeed});
369 }
370 }
371
372 sub digestEnd
373 {
374 my($dg, $skipMD4) = @_;
375 my($fileData);
376
377 if ( $dg->{cached} ) {
378 close($dg->{fh});
379 return $dg->{md4DigestOld} if ( $dg->{needMD4} );
380 } else {
381 #
382 # make sure we read the entire file for the file MD4 digest
383 #
384 if ( $dg->{needMD4} && !$skipMD4 ) {
385 my $fileData;
386 while ( $dg->{fh}->read(\$fileData, 65536) > 0 ) {
387 $dg->{csumDigest}->add($fileData);
388 }
389 }
390 $dg->{fh}->close();
391 return $dg->{csumDigest}->digest if ( $dg->{needMD4} );
392 }
393 }
394
395 sub isCached
396 {
397 my($dg) = @_;
398
399 return wantarray ? ($dg->{cached}, $dg->{cachedInvalid}) : $dg->{cached};
400 }
401
402 sub blockSizeCurr
403 {
404 my($dg) = @_;
405
406 return $dg->{blockSize};
407 }
408
409 #
410 # Default log handler
411 #
412 sub logHandler
413 {
414 my($str) = @_;
415
416 print(STDERR $str, "\n");
417 }
418
419 #
420 # Set log handler to a new subroutine.
421 #
422 sub logHandlerSet
423 {
424 my($sub) = @_;
425
426 $Log = $sub;
427 }
428
429 1;

  ViewVC Help
Powered by ViewVC 1.1.26