1 |
#============================================================= -*-perl-*- |
2 |
# |
3 |
# BackupPC::FileZIO package |
4 |
# |
5 |
# DESCRIPTION |
6 |
# |
7 |
# This library defines a BackupPC::FileZIO class for doing |
8 |
# compressed or normal file I/O. |
9 |
# |
10 |
# AUTHOR |
11 |
# Craig Barratt <cbarratt@users.sourceforge.net> |
12 |
# |
13 |
# COPYRIGHT |
14 |
# Copyright (C) 2001-2003 Craig Barratt |
15 |
# |
16 |
# This program is free software; you can redistribute it and/or modify |
17 |
# it under the terms of the GNU General Public License as published by |
18 |
# the Free Software Foundation; either version 2 of the License, or |
19 |
# (at your option) any later version. |
20 |
# |
21 |
# This program is distributed in the hope that it will be useful, |
22 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
23 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
24 |
# GNU General Public License for more details. |
25 |
# |
26 |
# You should have received a copy of the GNU General Public License |
27 |
# along with this program; if not, write to the Free Software |
28 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
29 |
# |
30 |
#======================================================================== |
31 |
# |
32 |
# Version 2.1.0, released 20 Jun 2004. |
33 |
# |
34 |
# See http://backuppc.sourceforge.net. |
35 |
# |
36 |
#======================================================================== |
37 |
|
38 |
package BackupPC::FileZIO; |
39 |
|
40 |
use strict; |
41 |
|
42 |
use vars qw( $CompZlibOK ); |
43 |
use Carp; |
44 |
use File::Path; |
45 |
use File::Copy; |
46 |
|
47 |
# |
48 |
# For compressed files we have a to careful about running out of memory |
49 |
# when we inflate a deflated file. For example, if a 500MB file of all |
50 |
# zero-bytes is compressed, it will only occupy a few tens of kbytes. If |
51 |
# we read the compressed file in decent-size chunks, a single inflate |
52 |
# will try to allocate 500MB. Not a good idea. |
53 |
# |
54 |
# Instead, we compress the file in chunks of $CompMaxWrite. If a |
55 |
# deflated chunk produces less than $CompMaxRead bytes, then we flush |
56 |
# and continue. This adds a few bytes to the compressed output file, but |
57 |
# only in extreme cases where the compression ratio is very close to |
58 |
# 100%. The result is that, provided we read the compressed file in |
59 |
# chunks of $CompMaxRead or less, the biggest inflated data will be |
60 |
# $CompMaxWrite. |
61 |
# |
62 |
my $CompMaxRead = 131072; # 128K |
63 |
my $CompMaxWrite = 6291456; # 6MB |
64 |
|
65 |
# |
66 |
# We maintain a write buffer for small writes for both compressed and |
67 |
# uncompressed files. This is the size of the write buffer. |
68 |
# |
69 |
my $WriteBufSize = 65536; |
70 |
|
71 |
BEGIN { |
72 |
eval "use Compress::Zlib;"; |
73 |
if ( $@ ) { |
74 |
# |
75 |
# Compress::Zlib doesn't exist. Define some dummy constant |
76 |
# subs so that the code below doesn't barf. |
77 |
# |
78 |
eval { |
79 |
sub Z_OK { return 0; } |
80 |
sub Z_STREAM_END { return 1; } |
81 |
}; |
82 |
$CompZlibOK = 0; |
83 |
} else { |
84 |
$CompZlibOK = 1; |
85 |
} |
86 |
}; |
87 |
|
88 |
sub open |
89 |
{ |
90 |
my($class, $fileName, $write, $compLevel) = @_; |
91 |
local(*FH); |
92 |
my($fh); |
93 |
|
94 |
if ( ref(\$fileName) eq "GLOB" ) { |
95 |
$fh = $fileName; |
96 |
} else { |
97 |
if ( $write ) { |
98 |
open(FH, ">", $fileName) || return; |
99 |
} else { |
100 |
open(FH, "<", $fileName) || return; |
101 |
} |
102 |
binmode(FH); |
103 |
$fh = *FH; |
104 |
} |
105 |
$compLevel = 0 if ( !$CompZlibOK ); |
106 |
my $self = bless { |
107 |
fh => $fh, |
108 |
name => $fileName, |
109 |
write => $write, |
110 |
writeZeroCnt => 0, |
111 |
compress => $compLevel, |
112 |
}, $class; |
113 |
if ( $compLevel ) { |
114 |
if ( $write ) { |
115 |
$self->{deflate} = $self->myDeflateInit; |
116 |
} else { |
117 |
$self->{inflate} = $self->myInflateInit; |
118 |
$self->{inflateStart} = 1; |
119 |
} |
120 |
} |
121 |
return $self; |
122 |
} |
123 |
|
124 |
sub compOk |
125 |
{ |
126 |
return $CompZlibOK; |
127 |
} |
128 |
|
129 |
sub myDeflateInit |
130 |
{ |
131 |
my $self = shift; |
132 |
|
133 |
return deflateInit( |
134 |
-Bufsize => 65536, |
135 |
-Level => $self->{compress}, |
136 |
); |
137 |
} |
138 |
|
139 |
sub myInflateInit |
140 |
{ |
141 |
my $self = shift; |
142 |
|
143 |
return inflateInit( |
144 |
-Bufsize => 65536, |
145 |
); |
146 |
} |
147 |
|
148 |
sub read |
149 |
{ |
150 |
my($self, $dataRef, $nRead) = @_; |
151 |
my($n); |
152 |
|
153 |
return if ( $self->{write} ); |
154 |
return sysread($self->{fh}, $$dataRef, $nRead) if ( !$self->{compress} ); |
155 |
while ( !$self->{eof} && $nRead > length($self->{dataOut}) ) { |
156 |
if ( !length($self->{dataIn}) ) { |
157 |
$n = sysread($self->{fh}, $self->{dataIn}, $CompMaxRead); |
158 |
return $n if ( $n < 0 ); |
159 |
$self->{eof} = 1 if ( $n == 0 ); |
160 |
} |
161 |
if ( $self->{inflateStart} && $self->{dataIn} ne "" ) { |
162 |
my $chr = substr($self->{dataIn}, 0, 1); |
163 |
|
164 |
$self->{inflateStart} = 0; |
165 |
if ( $chr eq chr(0xd6) ) { |
166 |
# |
167 |
# Flag 0xd6 means this is a compressed file with |
168 |
# appended md4 block checksums for rsync. Change |
169 |
# the first byte back to 0x78 and proceed. |
170 |
# |
171 |
##print("Got 0xd6 block: normal\n"); |
172 |
substr($self->{dataIn}, 0, 1) = chr(0x78); |
173 |
} elsif ( $chr eq chr(0xb3) ) { |
174 |
# |
175 |
# Flag 0xb3 means this is the start of the rsync |
176 |
# block checksums, so consider this as EOF for |
177 |
# the compressed file. Also seek the file so |
178 |
# it is positioned at the 0xb3. |
179 |
# |
180 |
sysseek($self->{fh}, -length($self->{dataIn}), 1); |
181 |
$self->{eof} = 1; |
182 |
$self->{dataIn} = ""; |
183 |
##print("Got 0xb3 block: considering eof\n"); |
184 |
last; |
185 |
} else { |
186 |
# |
187 |
# normal case: nothing to do |
188 |
# |
189 |
} |
190 |
} |
191 |
my($data, $err) = $self->{inflate}->inflate($self->{dataIn}); |
192 |
$self->{dataOut} .= $data; |
193 |
if ( $err == Z_STREAM_END ) { |
194 |
#print("R"); |
195 |
$self->{inflate} = $self->myInflateInit; |
196 |
$self->{inflateStart} = 1; |
197 |
} elsif ( $err != Z_OK ) { |
198 |
$$dataRef = ""; |
199 |
return -1; |
200 |
} |
201 |
} |
202 |
if ( $nRead >= length($self->{dataOut}) ) { |
203 |
$n = length($self->{dataOut}); |
204 |
$$dataRef = $self->{dataOut}; |
205 |
$self->{dataOut} = ''; |
206 |
return $n; |
207 |
} else { |
208 |
$$dataRef = substr($self->{dataOut}, 0, $nRead); |
209 |
$self->{dataOut} = substr($self->{dataOut}, $nRead); |
210 |
return $nRead; |
211 |
} |
212 |
} |
213 |
|
214 |
# |
215 |
# Provide a line-at-a-time interface. This splits and buffers the |
216 |
# lines, you cannot mix calls to read() and readLine(). |
217 |
# |
218 |
sub readLine |
219 |
{ |
220 |
my($self) = @_; |
221 |
my $str; |
222 |
|
223 |
$self->{readLineBuf} = [] if ( !defined($self->{readLineBuf}) ); |
224 |
while ( !@{$self->{readLineBuf}} ) { |
225 |
$self->read(\$str, $CompMaxRead); |
226 |
if ( $str eq "" ) { |
227 |
$str = $self->{readLineFrag}; |
228 |
$self->{readLineFrag} = ""; |
229 |
return $str; |
230 |
} |
231 |
@{$self->{readLineBuf}} = split(/\n/, $self->{readLineFrag} . $str); |
232 |
if ( substr($str, -1, 1) ne "\n" ) { |
233 |
$self->{readLineFrag} = pop(@{$self->{readLineBuf}}); |
234 |
} else { |
235 |
$self->{readLineFrag} = ""; |
236 |
} |
237 |
} |
238 |
return shift(@{$self->{readLineBuf}}) . "\n"; |
239 |
} |
240 |
|
241 |
sub rewind |
242 |
{ |
243 |
my($self) = @_; |
244 |
|
245 |
return if ( $self->{write} ); |
246 |
return sysseek($self->{fh}, 0, 0) if ( !$self->{compress} ); |
247 |
$self->{dataOut} = ''; |
248 |
$self->{dataIn} = ''; |
249 |
$self->{eof} = 0; |
250 |
$self->{inflate} = $self->myInflateInit; |
251 |
$self->{inflateStart} = 1; |
252 |
return sysseek($self->{fh}, 0, 0); |
253 |
} |
254 |
|
255 |
sub writeBuffered |
256 |
{ |
257 |
my $self = shift; |
258 |
my($data, $force) = @_; |
259 |
|
260 |
# |
261 |
# Buffer small writes using a buffer size of up to $WriteBufSize. |
262 |
# |
263 |
if ( $force || length($self->{writeBuf}) + length($data) > $WriteBufSize ) { |
264 |
if ( length($self->{writeBuf}) ) { |
265 |
my $wrData = $self->{writeBuf} . $data; |
266 |
return -1 if ( syswrite($self->{fh}, $wrData) != length($wrData) ); |
267 |
$self->{writeBuf} = undef; |
268 |
} else { |
269 |
return if ( length($data) == 0 ); |
270 |
return -1 if ( syswrite($self->{fh}, $data) != length($data) ); |
271 |
} |
272 |
} else { |
273 |
$self->{writeBuf} .= $data; |
274 |
} |
275 |
return 0; |
276 |
} |
277 |
|
278 |
sub write |
279 |
{ |
280 |
my($self, $dataRef) = @_; |
281 |
my $n = length($$dataRef); |
282 |
|
283 |
return if ( !$self->{write} ); |
284 |
print(STDERR $$dataRef) if ( $self->{writeTeeStderr} ); |
285 |
return 0 if ( $n == 0 ); |
286 |
if ( !$self->{compress} ) { |
287 |
# |
288 |
# If smbclient gets a read error on the client (due to a file lock) |
289 |
# it will write a dummy file of zeros. We detect this so we can |
290 |
# store the file efficiently as a sparse file. writeZeroCnt is |
291 |
# the number of consecutive 0 bytes at the start of the file. |
292 |
# |
293 |
my $skip = 0; |
294 |
if ( $self->{writeZeroCnt} >= 0 && $$dataRef =~ /^(\0+)/s ) { |
295 |
$skip = length($1); |
296 |
$self->{writeZeroCnt} += $skip; |
297 |
return $n if ( $skip == $n ); |
298 |
} |
299 |
# |
300 |
# We now have some non-zero bytes, so time to seek to the right |
301 |
# place and turn off zero-byte detection. |
302 |
# |
303 |
if ( $self->{writeZeroCnt} > 0 ) { |
304 |
sysseek($self->{fh}, $self->{writeZeroCnt}, 0); |
305 |
$self->{writeZeroCnt} = -1; |
306 |
} elsif ( $self->{writeZeroCnt} == 0 ) { |
307 |
$self->{writeZeroCnt} = -1; |
308 |
} |
309 |
return -1 if ( $self->writeBuffered(substr($$dataRef, $skip)) < 0 ); |
310 |
return $n; |
311 |
} |
312 |
for ( my $i = 0 ; $i < $n ; $i += $CompMaxWrite ) { |
313 |
my $dataIn = substr($$dataRef, $i, $CompMaxWrite); |
314 |
my $dataOut = $self->{deflate}->deflate($dataIn); |
315 |
return -1 if ( $self->writeBuffered($dataOut) < 0 ); |
316 |
$self->{deflateIn} += length($dataIn); |
317 |
$self->{deflateOut} += length($dataOut); |
318 |
if ( $self->{deflateIn} >= $CompMaxWrite ) { |
319 |
if ( $self->{deflateOut} < $CompMaxRead ) { |
320 |
# |
321 |
# Compression is too high: to avoid huge memory requirements |
322 |
# on read we need to flush(). |
323 |
# |
324 |
$dataOut = $self->{deflate}->flush(); |
325 |
#print("F"); |
326 |
$self->{deflate} = $self->myDeflateInit; |
327 |
return -1 if ( $self->writeBuffered($dataOut) < 0 ); |
328 |
} |
329 |
$self->{deflateIn} = $self->{deflateOut} = 0; |
330 |
} |
331 |
} |
332 |
return $n; |
333 |
} |
334 |
|
335 |
sub name |
336 |
{ |
337 |
my($self) = @_; |
338 |
|
339 |
return $self->{name}; |
340 |
} |
341 |
|
342 |
sub writeTeeStderr |
343 |
{ |
344 |
my($self, $param) = @_; |
345 |
|
346 |
$self->{writeTeeStderr} = $param if ( defined($param) ); |
347 |
return $self->{writeTeeStderr}; |
348 |
} |
349 |
|
350 |
sub close |
351 |
{ |
352 |
my($self) = @_; |
353 |
my $err = 0; |
354 |
|
355 |
if ( $self->{write} && $self->{compress} ) { |
356 |
my $data = $self->{deflate}->flush(); |
357 |
$err = 1 if ( $self->writeBuffered($data) < 0 ); |
358 |
} elsif ( $self->{write} && !$self->{compress} ) { |
359 |
if ( $self->{writeZeroCnt} > 0 ) { |
360 |
# |
361 |
# We got a file of all zero bytes. Write a single zero byte |
362 |
# at the end of the file. On most file systems this is an |
363 |
# efficient way to store the file. |
364 |
# |
365 |
$err = 1 if ( sysseek($self->{fh}, $self->{writeZeroCnt} - 1, 0) |
366 |
!= $self->{writeZeroCnt} - 1 |
367 |
|| syswrite($self->{fh}, "\0") != 1 ); |
368 |
} |
369 |
} |
370 |
$self->writeBuffered(undef, 1); |
371 |
close($self->{fh}); |
372 |
return $err ? -1 : 0; |
373 |
} |
374 |
|
375 |
# |
376 |
# If $compress is >0, copy and compress $srcFile putting the output |
377 |
# in $destFileZ. Otherwise, copy the file to $destFileNoZ, or do |
378 |
# nothing if $destFileNoZ is undef. Finally, if rename is set, then |
379 |
# the source file is removed. |
380 |
# |
381 |
sub compressCopy |
382 |
{ |
383 |
my($class, $srcFile, $destFileZ, $destFileNoZ, $compress, $rmSrc) = @_; |
384 |
my(@s) = stat($srcFile); |
385 |
my $atime = $s[8] =~ /(.*)/ && $1; |
386 |
my $mtime = $s[9] =~ /(.*)/ && $1; |
387 |
if ( $CompZlibOK && $compress > 0 ) { |
388 |
my $fh = BackupPC::FileZIO->open($destFileZ, 1, $compress); |
389 |
my $data; |
390 |
if ( defined($fh) && open(LOG, "<", $srcFile) ) { |
391 |
binmode(LOG); |
392 |
while ( sysread(LOG, $data, 65536) > 0 ) { |
393 |
$fh->write(\$data); |
394 |
} |
395 |
close(LOG); |
396 |
$fh->close(); |
397 |
unlink($srcFile) if ( $rmSrc ); |
398 |
utime($atime, $mtime, $destFileZ); |
399 |
return 1; |
400 |
} else { |
401 |
$fh->close() if ( defined($fh) ); |
402 |
return 0; |
403 |
} |
404 |
} |
405 |
return 0 if ( !defined($destFileNoZ) ); |
406 |
if ( $rmSrc ) { |
407 |
return rename($srcFile, $destFileNoZ); |
408 |
} else { |
409 |
return 0 if ( !copy($srcFile, $destFileNoZ) ); |
410 |
utime($atime, $mtime, $destFileNoZ); |
411 |
} |
412 |
} |
413 |
|
414 |
1; |