/[BackupPC]/upstream/2.1.0/bin/BackupPC_compressPool
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /upstream/2.1.0/bin/BackupPC_compressPool

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (show annotations)
Wed Jun 22 19:12:04 2005 UTC (18 years, 10 months ago) by dpavlin
File size: 19905 byte(s)
import of version 2.1.0

1 #!/bin/perl
2 #============================================================= -*-perl-*-
3 #
4 # BackupPC_compressPool: Compress existing pool
5 #
6 # DESCRIPTION
7 #
8 # Usage: BackupPC_compressPool [-t] [-r] <host>
9 #
10 # Flags:
11 # -t test mode: do everything except actually replace the pool files.
12 # Useful for estimating total run time without making any real
13 # changes.
14 # -r read check: re-read the compressed file and compare it against
15 # the original uncompressed file. Can only be used in test mode.
16 # -c # number of children to fork. BackupPC_compressPool can take
17 # a long time to run, so to speed things up it spawns four children,
18 # each working on a different part of the pool. You can change
19 # the number of children with the -c option.
20 #
21 # BackupPC_compressPool is used to convert an uncompressed pool to
22 # a compressed pool. If BackupPC compression is enabled after
23 # uncompressed backups already exist, BackupPC_compressPool can
24 # be used to compress all the old uncompressed backups.
25 #
26 # It is important that BackupPC not run while BackupPC_compressPool
27 # runs. Also, BackupPC_compressPool must run to completion before
28 # BackupPC is restarted.
29 #
30 # AUTHOR
31 # Craig Barratt <cbarratt@users.sourceforge.net>
32 #
33 # COPYRIGHT
34 # Copyright (C) 2001-2003 Craig Barratt
35 #
36 # This program is free software; you can redistribute it and/or modify
37 # it under the terms of the GNU General Public License as published by
38 # the Free Software Foundation; either version 2 of the License, or
39 # (at your option) any later version.
40 #
41 # This program is distributed in the hope that it will be useful,
42 # but WITHOUT ANY WARRANTY; without even the implied warranty of
43 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 # GNU General Public License for more details.
45 #
46 # You should have received a copy of the GNU General Public License
47 # along with this program; if not, write to the Free Software
48 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
49 #
50 #========================================================================
51 #
52 # Version 2.1.0, released 20 Jun 2004.
53 #
54 # See http://backuppc.sourceforge.net.
55 #
56 #========================================================================
57
58 use strict;
59 no utf8;
60
61 use File::Find;
62 use File::Path;
63 use Compress::Zlib;
64 use Getopt::Std;
65 use lib "__INSTALLDIR__/lib";
66 use BackupPC::Lib;
67 use BackupPC::FileZIO;
68
69 die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) );
70 $bpc->ChildInit();
71 my $TopDir = $bpc->TopDir();
72 my $BinDir = $bpc->BinDir();
73 my %Conf = $bpc->Conf();
74 my $PoolDir = "$TopDir/pool";
75 my $CPoolDir = "$TopDir/cpool";
76 my $Compress = $Conf{CompressLevel};
77 my %opts;
78 my $SigName = "";
79
80 #
81 # Catch various signals
82 #
83 foreach my $sig ( qw(INT BUS SEGV PIPE TERM ALRM HUP) ) {
84 $SIG{$sig} = \&catch_signal;
85 }
86
87 $| = 1;
88
89 my $CompMaxRead = 131072; # 128K
90 my $CompMaxWrite = 6291456; # 6MB
91
92 if ( !getopts("trc:", \%opts) || @ARGV != 0 ) {
93 print("usage: $0 [-c nChild] [-r] [-t]\n");
94 exit(1);
95 }
96 my $TestMode = $opts{t};
97 my $ReadCheck = $opts{r};
98 my $nChild = $opts{c} || 4;
99 if ( $ReadCheck && !$TestMode ) {
100 print(STDERR "$0: -r (read check) option must have -t (test)\n");
101 exit(1);
102 }
103 if ( $nChild < 1 || $nChild >= 16 ) {
104 print(STDERR "$0: number of children (-c option) must be from 1 to 16\n");
105 exit(1);
106 }
107 if ( !BackupPC::FileZIO->compOk ) {
108 print STDERR <<EOF;
109 $0: Compress::Zlib is not installed. You need to install it
110 before running this script.
111 EOF
112 exit(1);
113 }
114 if ( $Compress <= 0 ) {
115 print STDERR <<EOF;
116 $0: compression is not enabled. \%Conf{CompressLevel} needs
117 to be set to a value from 1 to 9. Please edit the config.pl file and
118 re-start $0.
119 EOF
120 exit(1);
121 }
122
123 my $Errors = 0;
124 my $SubDirDone = 0;
125 my $SubDirCnt = 0;
126 my $SubDirCurr = 0;
127 my $FileCnt = 0;
128 my $FileOrigSz = 0;
129 my $FileCompressSz = 0;
130
131 my $err = $bpc->ServerConnect($Conf{ServerHost}, $Conf{ServerPort});
132 if ( $err eq "" ) {
133 print <<EOF;
134 BackupPC is running on $Conf{ServerHost}. You need to stop BackupPC
135 before you can upgrade the code. Depending upon your installation,
136 you could run "/etc/init.d/backuppc stop".
137 EOF
138 exit(1);
139 }
140
141 umask($Conf{UmaskMode});
142
143 sub cpoolFileName
144 {
145 my($new) = @_;
146 if ( $new !~ m{/(\w/\w/\w)/(\w{32})(_\d+)?$} ) {
147 print("Error: Can't parse filename from $new\n");
148 $Errors++;
149 return;
150 }
151 my $dir = "$CPoolDir/$1";
152 $new = "$dir/$2";
153 mkpath($dir, 0, 0777) if ( !-d $dir );
154 return $new if ( !-f $new );
155 for ( my $i = 0 ; ; $i++ ) {
156 return "${new}_$i" if ( !-f "${new}_$i" );
157 }
158 }
159
160 sub doCompress
161 {
162 my $file = ($File::Find::name =~ /(.*)/ && $1);
163 local(*FH, *OUT);
164 my(@s) = stat($file);
165 my($n, $dataIn, $dataOut, $flush, $copy);
166
167 if ( $SigName ) {
168 print("Child got signal $SigName; quitting\n");
169 reportStats();
170 exit(0);
171 }
172 return if ( !-f $file );
173 my $defl = deflateInit(
174 -Bufsize => 65536,
175 -Level => $Compress,
176 );
177 if ( !open(FH, $TestMode ? "<" : "+<", $file) ) {
178 print("Error: Can't open $file for read/write\n");
179 $Errors++;
180 return;
181 }
182 binmode(FH);
183 while ( sysread(FH, $dataIn, $CompMaxWrite) > 0 ) {
184 $flush = 0;
185 $FileOrigSz += length($dataIn);
186 my $fragOut = $defl->deflate($dataIn);
187 if ( length($fragOut) < $CompMaxRead ) {
188 #
189 # Compression is too high: to avoid huge memory requirements
190 # on read we need to flush().
191 #
192 $fragOut .= $defl->flush();
193 $flush = 1;
194 $defl = deflateInit(
195 -Bufsize => 65536,
196 -Level => $Compress,
197 );
198 }
199 $dataOut .= $fragOut;
200 if ( !$copy && length($dataOut) > $CompMaxWrite ) {
201 if ( !open(OUT, "+>", "$file.__z") ) {
202 print("Error: Can't open $file.__z for write\n");
203 $Errors++;
204 close(FH);
205 return;
206 }
207 binmode(OUT);
208 $copy = 1;
209 }
210 if ( $copy && $dataOut ne "" ) {
211 if ( syswrite(OUT, $dataOut) != length($dataOut) ) {
212 printf("Error: Can't write %d bytes to %s\n",
213 length($dataOut), "$file.__z");
214 $Errors++;
215 close(OUT);
216 close(FH);
217 unlink("$file.__z");
218 return;
219 }
220 $FileCompressSz += length($dataOut);
221 $dataOut = undef;
222 }
223 }
224 if ( !$flush ) {
225 $dataOut .= $defl->flush();
226 if ( $copy && $dataOut ne "" ) {
227 if ( syswrite(OUT, $dataOut) != length($dataOut) ) {
228 printf("Error: Can't write %d bytes to %s\n",
229 length($dataOut), "$file.__z");
230 $Errors++;
231 close(OUT);
232 close(FH);
233 unlink("$file.__z");
234 return;
235 }
236 $FileCompressSz += length($dataOut);
237 $dataOut = undef;
238 }
239 }
240 my $newFile = cpoolFileName($file);
241 if ( $TestMode ) {
242 close(FH);
243 if ( !open(FH, ">", $newFile) ) {
244 print("Error: Can't open $newFile for write\n");
245 $Errors++;
246 close(FH);
247 unlink("$file.__z");
248 return;
249 }
250 binmode(FH);
251 }
252 if ( $copy ) {
253 if ( !sysseek(OUT, 0, 0) ) {
254 print("Error: Can't seek $file.__z to 0\n");
255 $Errors++;
256 }
257 if ( !sysseek(FH, 0, 0) ) {
258 print("Error: Can't seek $newFile to 0\n");
259 $Errors++;
260 }
261 while ( sysread(OUT, $dataIn, $CompMaxWrite) > 0 ) {
262 if ( syswrite(FH, $dataIn) != length($dataIn) ) {
263 printf("Error: Can't write %d bytes to %s\n",
264 length($dataIn), $file);
265 $Errors++;
266 }
267 }
268 if ( !truncate(FH, sysseek(OUT, 0, 1)) ) {
269 printf("Error: Can't truncate %s to %d\n",
270 $file, sysseek(OUT, 0, 1));
271 $Errors++;
272 }
273 close(OUT);
274 close(FH);
275 unlink("$file.__z");
276 } else {
277 if ( !sysseek(FH, 0, 0) ) {
278 print("Error: Can't seek $file to 0\n");
279 $Errors++;
280 }
281 if ( syswrite(FH, $dataOut) != length($dataOut) ) {
282 printf("Error: Can't write %d bytes to %s\n",
283 length($dataOut), $file);
284 $Errors++;
285 }
286 $FileCompressSz += length($dataOut);
287 if ( !truncate(FH, length($dataOut)) ) {
288 printf("Error: Can't truncate %s to %d\n", $file, length($dataOut));
289 $Errors++;
290 }
291 close(FH);
292 }
293 if ( $TestMode ) {
294 if ( $ReadCheck ) {
295 checkRead($file, $newFile);
296 }
297 unlink($newFile);
298 } else {
299 rename($file, $newFile);
300 my $atime = $s[8] =~ /(.*)/ && $1;
301 my $mtime = $s[9] =~ /(.*)/ && $1;
302 utime($atime, $mtime, $newFile);
303 }
304 (my $dir = $file) =~ s{/[^/]*$}{};
305 $FileCnt++;
306 if ( $SubDirCurr ne "" && $SubDirCurr ne $dir ) {
307 $SubDirDone++;
308 $SubDirCurr = $dir;
309 reportStats();
310 } elsif ( $SubDirCurr eq "" ) {
311 $SubDirCurr = $dir;
312 }
313 }
314
315 sub reportStats
316 {
317 print("stats: $SubDirDone $SubDirCnt $FileCnt $FileOrigSz"
318 . " $FileCompressSz $Errors\n");
319 }
320
321 sub checkRead
322 {
323 my($file, $cfile) = @_;
324 return if ( !-f $file || !-f $cfile );
325 my $f = BackupPC::FileZIO->open($cfile, 0, $Compress)
326 || die("can't open $cfile for read\n");
327 my($n, $nd, $r, $d, $d0);
328 local(*FH);
329
330 if ( !open(FH, "<", $file) ) {
331 print("can't open $file for check\n");
332 $Errors++;
333 $f->close();
334 return;
335 }
336 binmode(FH);
337 #print("comparing $file to $cfile\n");
338 while ( 1 ) {
339 $n = 1 + int(rand($CompMaxRead) + rand(100));
340 $r = $f->read(\$d, $n);
341 sysread(FH, $d0, $n);
342 if ( $d ne $d0 ) {
343 print("Botch read data on $cfile\n");
344 }
345 last if ( length($d) == 0 );
346 }
347 if ( ($r = $f->read(\$d, 100)) != 0 || ($r = $f->read(\$d, 100)) != 0 ) {
348 printf("Botch at EOF on $cfile got $r (%d,%d)\n",
349 sysseek(FH, 0, 1), $n);
350 $Errors++;
351 }
352 $f->close;
353 close(FH);
354 }
355
356 sub checkReadLine
357 {
358 my($file, $cfile) = @_;
359 return if ( !-f $file || !-f $cfile );
360 my $f = BackupPC::FileZIO->open($cfile, 0, $Compress)
361 || die("can't open $cfile for read\n");
362 my($n, $nd, $r, $d, $d0);
363 local(*FH);
364
365 if ( !open(FH, "<", $file) ) {
366 print("can't open $file for check\n");
367 $Errors++;
368 $f->close();
369 return;
370 }
371 binmode(FH);
372 while ( 1 ) {
373 $d0 = <FH>;
374 $d = $f->readLine();
375 if ( $d ne $d0 ) {
376 print("Botch read data on $cfile\n");
377 }
378 last if ( length($d) == 0 );
379 }
380 if ( ($r = $f->read(\$d, 100)) != 0 || ($r = $f->read(\$d, 100)) != 0 ) {
381 printf("Botch at EOF on $cfile got $r (%d,%d)\n",
382 sysseek(FH, 0, 1), $n);
383 $Errors++;
384 }
385 $f->close;
386 close(FH);
387 }
388
389 sub catch_signal
390 {
391 $SigName = shift;
392 }
393
394 sub compressHostFiles
395 {
396 my($host) = @_;
397 my(@Files, @Backups, $fh, $data);
398 local(*FH);
399
400 if ( !defined($host) ) {
401 for ( my $i = 0 ; ; $i++ ) {
402 last if ( !-f "$TopDir/log/LOG.$i" );
403 push(@Files, "$TopDir/log/LOG.$i");
404 }
405 } else {
406 @Backups = $bpc->BackupInfoRead($host);
407 for ( my $i = 0 ; $i < @Backups ; $i++ ) {
408 next if ( $Backups[$i]{compress} );
409 push(@Files, "$TopDir/pc/$host/SmbLOG.$Backups[$i]{num}");
410 push(@Files, "$TopDir/pc/$host/XferLOG.$Backups[$i]{num}");
411 }
412 push(@Files, "$TopDir/pc/$host/SmbLOG.bad");
413 push(@Files, "$TopDir/pc/$host/XferLOG.bad");
414 for ( my $i = 0 ; ; $i++ ) {
415 last if ( !-f "$TopDir/pc/$host/LOG.$i" );
416 push(@Files, "$TopDir/pc/$host/LOG.$i");
417 }
418 }
419 foreach my $file ( @Files ) {
420 if ( $SigName ) {
421 print("Child got signal $SigName; quitting\n");
422 reportStats();
423 exit(0);
424 }
425 next if ( !-f $file );
426 if ( !BackupPC::FileZIO->compressCopy($file, "$file.z", undef,
427 $Compress, !$TestMode) ) {
428 print("compressCopy($file, $file.z, $Compress, !$TestMode)"
429 . " failed\n");
430 $Errors++;
431 } elsif ( $TestMode ) {
432 checkReadLine($file, "$file.z") if ( $ReadCheck );
433 unlink("$file.z");
434 }
435 }
436 }
437
438 sub updateHostBackupInfo
439 {
440 my($host) = @_;
441 if ( !$TestMode ) {
442 my @Backups = $bpc->BackupInfoRead($host);
443 for ( my $i = 0 ; $i < @Backups ; $i++ ) {
444 $Backups[$i]{compress} = $Compress;
445 }
446 $bpc->BackupInfoWrite($host, @Backups);
447 }
448 }
449
450 my @Dirs = split(//, "0123456789abcdef");
451 my @Hosts = sort(keys(%{$bpc->HostInfoRead()}));
452 my $FDread;
453 my @Jobs;
454
455 #
456 # First make sure there are no existing compressed backups
457 #
458 my(%compHosts, $compCnt);
459 for ( my $j = 0 ; $j < @Hosts ; $j++ ) {
460 my $host = $Hosts[$j];
461 my @Backups = $bpc->BackupInfoRead($host);
462 for ( my $i = 0 ; $i < @Backups ; $i++ ) {
463 next if ( !$Backups[$i]{compress} );
464 $compHosts{$host}++;
465 $compCnt++;
466 }
467 }
468 if ( $compCnt ) {
469 my $compHostStr = join("\n + ", sort(keys(%compHosts)));
470 print STDERR <<EOF;
471 BackupPC_compressPool: there are $compCnt compressed backups.
472 BackupPC_compressPool can only be run when there are no existing
473 compressed backups. The following hosts have compressed backups:
474
475 + $compHostStr
476
477 If you really want to run BackupPC_compressPool you will need to remove
478 all the existing compressed backups (and /home/pcbackup/data/cpool).
479 Think carefully before you do this. Otherwise, you can just let new
480 compressed backups run and the old uncompressed backups and pool will
481 steadily expire.
482 EOF
483 exit(0);
484 }
485
486 #
487 # Next spawn $nChild children that actually do all the work.
488 #
489 for ( my $i = 0 ; $i < $nChild ; $i++ ) {
490 local(*CHILD);
491 my $pid;
492 if ( !defined($pid = open(CHILD, "-|")) ) {
493 print("Can't fork\n");
494 next;
495 }
496 my $nDirs = @Dirs / ($nChild - $i);
497 my $nHosts = @Hosts / ($nChild - $i);
498 if ( !$pid ) {
499 #
500 # This is the child.
501 # First process each of the hosts (compress per-pc log files etc).
502 #
503 for ( my $j = 0 ; $j < $nHosts ; $j++ ) {
504 compressHostFiles($Hosts[$j]);
505 }
506 #
507 # Count the total number of directories so we can estimate the
508 # completion time. We ignore empty directories by reading each
509 # directory and making sure it has at least 3 entries (ie, ".",
510 # ".." and a file).
511 #
512 for ( my $j = 0 ; $j < $nDirs ; $j++ ) {
513 my $thisDir = $Dirs[$j];
514 next if ( !-d "$PoolDir/$thisDir" );
515 foreach my $dir ( <$PoolDir/$thisDir/*/*> ) {
516 next if ( !opendir(DIR, $dir) );
517 my @files = readdir(DIR);
518 closedir(DIR);
519 $SubDirCnt++ if ( @files > 2 );
520 }
521 }
522 #
523 # Now process each of the directories
524 #
525 for ( my $j = 0 ; $j < $nDirs ; $j++ ) {
526 my $thisDir = shift(@Dirs);
527 next if ( !-d "$PoolDir/$thisDir" );
528 find({wanted => sub { doCompress($File::Find::name); },
529 no_chdir => 1}, "$PoolDir/$thisDir");
530 }
531 #
532 # Last, update the backup info file for each of the hosts
533 #
534 for ( my $j = 0 ; $j < $nHosts ; $j++ ) {
535 updateHostBackupInfo($Hosts[$j]);
536 }
537 $SubDirDone = $SubDirCnt;
538 reportStats();
539 exit(0);
540 }
541 #
542 # This is the parent. Peel off $nDirs directories, $nHosts hosts,
543 # and continue
544 #
545 $Jobs[$i]{fh} = *CHILD;
546 $Jobs[$i]{pid} = $pid;
547 vec($FDread, fileno($Jobs[$i]{fh}), 1) = 1;
548 splice(@Dirs, 0, $nDirs);
549 splice(@Hosts, 0, $nHosts);
550 }
551
552 #
553 # compress the main log files (in the parents)
554 #
555 compressHostFiles(undef);
556
557 #
558 # Now wait for all the children to report results and finish up
559 #
560 my $TimeStart = time;
561 my $DonePct = 0;
562 my $GotSignal = "";
563 while ( $FDread !~ /^\0*$/ ) {
564 my $ein = $FDread;
565 select(my $rout = $FDread, undef, $ein, undef);
566 if ( $SigName ne $GotSignal ) {
567 print("Got signal $SigName; waiting for $nChild children to cleanup\n");
568 $GotSignal = $SigName;
569 }
570 for ( my $i = 0 ; $i < $nChild ; $i++ ) {
571 next if ( !vec($rout, fileno($Jobs[$i]{fh}), 1) );
572 my $data;
573 if ( sysread($Jobs[$i]{fh}, $data, 1024) <= 0 ) {
574 vec($FDread, fileno($Jobs[$i]{fh}), 1) = 0;
575 close($Jobs[$i]{fh});
576 next;
577 }
578 $Jobs[$i]{mesg} .= $data;
579 while ( $Jobs[$i]{mesg} =~ /(.*?)[\n\r]+(.*)/s ) {
580 my $mesg = $1;
581 $Jobs[$i]{mesg} = $2;
582 if ( $mesg =~ /^stats: (\d+) (\d+) (\d+) (\d+) (\d+) (\d+)/ ) {
583 $Jobs[$i]{SubDirDone} = $1;
584 $Jobs[$i]{SubDirCnt} = $2;
585 $Jobs[$i]{FileCnt} = $3;
586 $Jobs[$i]{FileOrigSz} = $4;
587 $Jobs[$i]{FileCompressSz} = $5;
588 $Jobs[$i]{Errors} = $6;
589 $SubDirDone = $SubDirCnt = $FileCnt = $FileOrigSz = 0;
590 $FileCompressSz = $Errors = 0;
591 my $numReports = 0;
592 for ( my $j = 0 ; $j < $nChild ; $j++ ) {
593 next if ( !defined($Jobs[$j]{SubDirDone}) );
594 $SubDirDone += $Jobs[$j]{SubDirDone};
595 $SubDirCnt += $Jobs[$j]{SubDirCnt};
596 $FileCnt += $Jobs[$j]{FileCnt};
597 $FileOrigSz += $Jobs[$j]{FileOrigSz};
598 $FileCompressSz += $Jobs[$j]{FileCompressSz};
599 $Errors += $Jobs[$j]{Errors};
600 $numReports++;
601 }
602 $SubDirCnt ||= 1;
603 $FileOrigSz ||= 1;
604 my $pctDone = 100 * $SubDirDone / $SubDirCnt;
605 if ( $numReports == $nChild && $pctDone >= $DonePct + 1 ) {
606 $DonePct = int($pctDone);
607 my $estSecLeft = 1.2 * (time - $TimeStart)
608 * (100 / $pctDone - 1);
609 my $timeStamp = $bpc->timeStamp;
610 printf("%sDone %2.0f%% (%d of %d dirs, %d files,"
611 . " %.2fGB raw, %.1f%% reduce, %d errors)\n",
612 $timeStamp,
613 $pctDone, $SubDirDone, $SubDirCnt, $FileCnt,
614 $FileOrigSz / (1024 * 1024 * 1000),
615 100 * (1 - $FileCompressSz / $FileOrigSz));
616 printf("%s Est complete in %.1f hours (around %s)\n",
617 $timeStamp, $estSecLeft / 3600,
618 $bpc->timeStamp(time + $estSecLeft, 1))
619 if ( $DonePct < 100 );
620 }
621 } else {
622 print($mesg, "\n");
623 }
624 }
625 }
626 }
627 if ( $Errors ) {
628 print("Finished with $Errors errors!!!!\n");
629 exit(1);
630 }

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26