/[BackupPC]/upstream/2.1.0/bin/BackupPC_compressPool
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /upstream/2.1.0/bin/BackupPC_compressPool

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (hide annotations)
Wed Jun 22 19:12:04 2005 UTC (18 years, 9 months ago) by dpavlin
File size: 19905 byte(s)
import of version 2.1.0

1 dpavlin 1 #!/bin/perl
2     #============================================================= -*-perl-*-
3     #
4     # BackupPC_compressPool: Compress existing pool
5     #
6     # DESCRIPTION
7     #
8     # Usage: BackupPC_compressPool [-t] [-r] <host>
9     #
10     # Flags:
11     # -t test mode: do everything except actually replace the pool files.
12     # Useful for estimating total run time without making any real
13     # changes.
14     # -r read check: re-read the compressed file and compare it against
15     # the original uncompressed file. Can only be used in test mode.
16     # -c # number of children to fork. BackupPC_compressPool can take
17     # a long time to run, so to speed things up it spawns four children,
18     # each working on a different part of the pool. You can change
19     # the number of children with the -c option.
20     #
21     # BackupPC_compressPool is used to convert an uncompressed pool to
22     # a compressed pool. If BackupPC compression is enabled after
23     # uncompressed backups already exist, BackupPC_compressPool can
24     # be used to compress all the old uncompressed backups.
25     #
26     # It is important that BackupPC not run while BackupPC_compressPool
27     # runs. Also, BackupPC_compressPool must run to completion before
28     # BackupPC is restarted.
29     #
30     # AUTHOR
31     # Craig Barratt <cbarratt@users.sourceforge.net>
32     #
33     # COPYRIGHT
34     # Copyright (C) 2001-2003 Craig Barratt
35     #
36     # This program is free software; you can redistribute it and/or modify
37     # it under the terms of the GNU General Public License as published by
38     # the Free Software Foundation; either version 2 of the License, or
39     # (at your option) any later version.
40     #
41     # This program is distributed in the hope that it will be useful,
42     # but WITHOUT ANY WARRANTY; without even the implied warranty of
43     # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44     # GNU General Public License for more details.
45     #
46     # You should have received a copy of the GNU General Public License
47     # along with this program; if not, write to the Free Software
48     # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
49     #
50     #========================================================================
51     #
52     # Version 2.1.0, released 20 Jun 2004.
53     #
54     # See http://backuppc.sourceforge.net.
55     #
56     #========================================================================
57    
58     use strict;
59     no utf8;
60    
61     use File::Find;
62     use File::Path;
63     use Compress::Zlib;
64     use Getopt::Std;
65     use lib "__INSTALLDIR__/lib";
66     use BackupPC::Lib;
67     use BackupPC::FileZIO;
68    
69     die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) );
70     $bpc->ChildInit();
71     my $TopDir = $bpc->TopDir();
72     my $BinDir = $bpc->BinDir();
73     my %Conf = $bpc->Conf();
74     my $PoolDir = "$TopDir/pool";
75     my $CPoolDir = "$TopDir/cpool";
76     my $Compress = $Conf{CompressLevel};
77     my %opts;
78     my $SigName = "";
79    
80     #
81     # Catch various signals
82     #
83     foreach my $sig ( qw(INT BUS SEGV PIPE TERM ALRM HUP) ) {
84     $SIG{$sig} = \&catch_signal;
85     }
86    
87     $| = 1;
88    
89     my $CompMaxRead = 131072; # 128K
90     my $CompMaxWrite = 6291456; # 6MB
91    
92     if ( !getopts("trc:", \%opts) || @ARGV != 0 ) {
93     print("usage: $0 [-c nChild] [-r] [-t]\n");
94     exit(1);
95     }
96     my $TestMode = $opts{t};
97     my $ReadCheck = $opts{r};
98     my $nChild = $opts{c} || 4;
99     if ( $ReadCheck && !$TestMode ) {
100     print(STDERR "$0: -r (read check) option must have -t (test)\n");
101     exit(1);
102     }
103     if ( $nChild < 1 || $nChild >= 16 ) {
104     print(STDERR "$0: number of children (-c option) must be from 1 to 16\n");
105     exit(1);
106     }
107     if ( !BackupPC::FileZIO->compOk ) {
108     print STDERR <<EOF;
109     $0: Compress::Zlib is not installed. You need to install it
110     before running this script.
111     EOF
112     exit(1);
113     }
114     if ( $Compress <= 0 ) {
115     print STDERR <<EOF;
116     $0: compression is not enabled. \%Conf{CompressLevel} needs
117     to be set to a value from 1 to 9. Please edit the config.pl file and
118     re-start $0.
119     EOF
120     exit(1);
121     }
122    
123     my $Errors = 0;
124     my $SubDirDone = 0;
125     my $SubDirCnt = 0;
126     my $SubDirCurr = 0;
127     my $FileCnt = 0;
128     my $FileOrigSz = 0;
129     my $FileCompressSz = 0;
130    
131     my $err = $bpc->ServerConnect($Conf{ServerHost}, $Conf{ServerPort});
132     if ( $err eq "" ) {
133     print <<EOF;
134     BackupPC is running on $Conf{ServerHost}. You need to stop BackupPC
135     before you can upgrade the code. Depending upon your installation,
136     you could run "/etc/init.d/backuppc stop".
137     EOF
138     exit(1);
139     }
140    
141     umask($Conf{UmaskMode});
142    
143     sub cpoolFileName
144     {
145     my($new) = @_;
146     if ( $new !~ m{/(\w/\w/\w)/(\w{32})(_\d+)?$} ) {
147     print("Error: Can't parse filename from $new\n");
148     $Errors++;
149     return;
150     }
151     my $dir = "$CPoolDir/$1";
152     $new = "$dir/$2";
153     mkpath($dir, 0, 0777) if ( !-d $dir );
154     return $new if ( !-f $new );
155     for ( my $i = 0 ; ; $i++ ) {
156     return "${new}_$i" if ( !-f "${new}_$i" );
157     }
158     }
159    
160     sub doCompress
161     {
162     my $file = ($File::Find::name =~ /(.*)/ && $1);
163     local(*FH, *OUT);
164     my(@s) = stat($file);
165     my($n, $dataIn, $dataOut, $flush, $copy);
166    
167     if ( $SigName ) {
168     print("Child got signal $SigName; quitting\n");
169     reportStats();
170     exit(0);
171     }
172     return if ( !-f $file );
173     my $defl = deflateInit(
174     -Bufsize => 65536,
175     -Level => $Compress,
176     );
177     if ( !open(FH, $TestMode ? "<" : "+<", $file) ) {
178     print("Error: Can't open $file for read/write\n");
179     $Errors++;
180     return;
181     }
182     binmode(FH);
183     while ( sysread(FH, $dataIn, $CompMaxWrite) > 0 ) {
184     $flush = 0;
185     $FileOrigSz += length($dataIn);
186     my $fragOut = $defl->deflate($dataIn);
187     if ( length($fragOut) < $CompMaxRead ) {
188     #
189     # Compression is too high: to avoid huge memory requirements
190     # on read we need to flush().
191     #
192     $fragOut .= $defl->flush();
193     $flush = 1;
194     $defl = deflateInit(
195     -Bufsize => 65536,
196     -Level => $Compress,
197     );
198     }
199     $dataOut .= $fragOut;
200     if ( !$copy && length($dataOut) > $CompMaxWrite ) {
201     if ( !open(OUT, "+>", "$file.__z") ) {
202     print("Error: Can't open $file.__z for write\n");
203     $Errors++;
204     close(FH);
205     return;
206     }
207     binmode(OUT);
208     $copy = 1;
209     }
210     if ( $copy && $dataOut ne "" ) {
211     if ( syswrite(OUT, $dataOut) != length($dataOut) ) {
212     printf("Error: Can't write %d bytes to %s\n",
213     length($dataOut), "$file.__z");
214     $Errors++;
215     close(OUT);
216     close(FH);
217     unlink("$file.__z");
218     return;
219     }
220     $FileCompressSz += length($dataOut);
221     $dataOut = undef;
222     }
223     }
224     if ( !$flush ) {
225     $dataOut .= $defl->flush();
226     if ( $copy && $dataOut ne "" ) {
227     if ( syswrite(OUT, $dataOut) != length($dataOut) ) {
228     printf("Error: Can't write %d bytes to %s\n",
229     length($dataOut), "$file.__z");
230     $Errors++;
231     close(OUT);
232     close(FH);
233     unlink("$file.__z");
234     return;
235     }
236     $FileCompressSz += length($dataOut);
237     $dataOut = undef;
238     }
239     }
240     my $newFile = cpoolFileName($file);
241     if ( $TestMode ) {
242     close(FH);
243     if ( !open(FH, ">", $newFile) ) {
244     print("Error: Can't open $newFile for write\n");
245     $Errors++;
246     close(FH);
247     unlink("$file.__z");
248     return;
249     }
250     binmode(FH);
251     }
252     if ( $copy ) {
253     if ( !sysseek(OUT, 0, 0) ) {
254     print("Error: Can't seek $file.__z to 0\n");
255     $Errors++;
256     }
257     if ( !sysseek(FH, 0, 0) ) {
258     print("Error: Can't seek $newFile to 0\n");
259     $Errors++;
260     }
261     while ( sysread(OUT, $dataIn, $CompMaxWrite) > 0 ) {
262     if ( syswrite(FH, $dataIn) != length($dataIn) ) {
263     printf("Error: Can't write %d bytes to %s\n",
264     length($dataIn), $file);
265     $Errors++;
266     }
267     }
268     if ( !truncate(FH, sysseek(OUT, 0, 1)) ) {
269     printf("Error: Can't truncate %s to %d\n",
270     $file, sysseek(OUT, 0, 1));
271     $Errors++;
272     }
273     close(OUT);
274     close(FH);
275     unlink("$file.__z");
276     } else {
277     if ( !sysseek(FH, 0, 0) ) {
278     print("Error: Can't seek $file to 0\n");
279     $Errors++;
280     }
281     if ( syswrite(FH, $dataOut) != length($dataOut) ) {
282     printf("Error: Can't write %d bytes to %s\n",
283     length($dataOut), $file);
284     $Errors++;
285     }
286     $FileCompressSz += length($dataOut);
287     if ( !truncate(FH, length($dataOut)) ) {
288     printf("Error: Can't truncate %s to %d\n", $file, length($dataOut));
289     $Errors++;
290     }
291     close(FH);
292     }
293     if ( $TestMode ) {
294     if ( $ReadCheck ) {
295     checkRead($file, $newFile);
296     }
297     unlink($newFile);
298     } else {
299     rename($file, $newFile);
300     my $atime = $s[8] =~ /(.*)/ && $1;
301     my $mtime = $s[9] =~ /(.*)/ && $1;
302     utime($atime, $mtime, $newFile);
303     }
304     (my $dir = $file) =~ s{/[^/]*$}{};
305     $FileCnt++;
306     if ( $SubDirCurr ne "" && $SubDirCurr ne $dir ) {
307     $SubDirDone++;
308     $SubDirCurr = $dir;
309     reportStats();
310     } elsif ( $SubDirCurr eq "" ) {
311     $SubDirCurr = $dir;
312     }
313     }
314    
315     sub reportStats
316     {
317     print("stats: $SubDirDone $SubDirCnt $FileCnt $FileOrigSz"
318     . " $FileCompressSz $Errors\n");
319     }
320    
321     sub checkRead
322     {
323     my($file, $cfile) = @_;
324     return if ( !-f $file || !-f $cfile );
325     my $f = BackupPC::FileZIO->open($cfile, 0, $Compress)
326     || die("can't open $cfile for read\n");
327     my($n, $nd, $r, $d, $d0);
328     local(*FH);
329    
330     if ( !open(FH, "<", $file) ) {
331     print("can't open $file for check\n");
332     $Errors++;
333     $f->close();
334     return;
335     }
336     binmode(FH);
337     #print("comparing $file to $cfile\n");
338     while ( 1 ) {
339     $n = 1 + int(rand($CompMaxRead) + rand(100));
340     $r = $f->read(\$d, $n);
341     sysread(FH, $d0, $n);
342     if ( $d ne $d0 ) {
343     print("Botch read data on $cfile\n");
344     }
345     last if ( length($d) == 0 );
346     }
347     if ( ($r = $f->read(\$d, 100)) != 0 || ($r = $f->read(\$d, 100)) != 0 ) {
348     printf("Botch at EOF on $cfile got $r (%d,%d)\n",
349     sysseek(FH, 0, 1), $n);
350     $Errors++;
351     }
352     $f->close;
353     close(FH);
354     }
355    
356     sub checkReadLine
357     {
358     my($file, $cfile) = @_;
359     return if ( !-f $file || !-f $cfile );
360     my $f = BackupPC::FileZIO->open($cfile, 0, $Compress)
361     || die("can't open $cfile for read\n");
362     my($n, $nd, $r, $d, $d0);
363     local(*FH);
364    
365     if ( !open(FH, "<", $file) ) {
366     print("can't open $file for check\n");
367     $Errors++;
368     $f->close();
369     return;
370     }
371     binmode(FH);
372     while ( 1 ) {
373     $d0 = <FH>;
374     $d = $f->readLine();
375     if ( $d ne $d0 ) {
376     print("Botch read data on $cfile\n");
377     }
378     last if ( length($d) == 0 );
379     }
380     if ( ($r = $f->read(\$d, 100)) != 0 || ($r = $f->read(\$d, 100)) != 0 ) {
381     printf("Botch at EOF on $cfile got $r (%d,%d)\n",
382     sysseek(FH, 0, 1), $n);
383     $Errors++;
384     }
385     $f->close;
386     close(FH);
387     }
388    
389     sub catch_signal
390     {
391     $SigName = shift;
392     }
393    
394     sub compressHostFiles
395     {
396     my($host) = @_;
397     my(@Files, @Backups, $fh, $data);
398     local(*FH);
399    
400     if ( !defined($host) ) {
401     for ( my $i = 0 ; ; $i++ ) {
402     last if ( !-f "$TopDir/log/LOG.$i" );
403     push(@Files, "$TopDir/log/LOG.$i");
404     }
405     } else {
406     @Backups = $bpc->BackupInfoRead($host);
407     for ( my $i = 0 ; $i < @Backups ; $i++ ) {
408     next if ( $Backups[$i]{compress} );
409     push(@Files, "$TopDir/pc/$host/SmbLOG.$Backups[$i]{num}");
410     push(@Files, "$TopDir/pc/$host/XferLOG.$Backups[$i]{num}");
411     }
412     push(@Files, "$TopDir/pc/$host/SmbLOG.bad");
413     push(@Files, "$TopDir/pc/$host/XferLOG.bad");
414     for ( my $i = 0 ; ; $i++ ) {
415     last if ( !-f "$TopDir/pc/$host/LOG.$i" );
416     push(@Files, "$TopDir/pc/$host/LOG.$i");
417     }
418     }
419     foreach my $file ( @Files ) {
420     if ( $SigName ) {
421     print("Child got signal $SigName; quitting\n");
422     reportStats();
423     exit(0);
424     }
425     next if ( !-f $file );
426     if ( !BackupPC::FileZIO->compressCopy($file, "$file.z", undef,
427     $Compress, !$TestMode) ) {
428     print("compressCopy($file, $file.z, $Compress, !$TestMode)"
429     . " failed\n");
430     $Errors++;
431     } elsif ( $TestMode ) {
432     checkReadLine($file, "$file.z") if ( $ReadCheck );
433     unlink("$file.z");
434     }
435     }
436     }
437    
438     sub updateHostBackupInfo
439     {
440     my($host) = @_;
441     if ( !$TestMode ) {
442     my @Backups = $bpc->BackupInfoRead($host);
443     for ( my $i = 0 ; $i < @Backups ; $i++ ) {
444     $Backups[$i]{compress} = $Compress;
445     }
446     $bpc->BackupInfoWrite($host, @Backups);
447     }
448     }
449    
450     my @Dirs = split(//, "0123456789abcdef");
451     my @Hosts = sort(keys(%{$bpc->HostInfoRead()}));
452     my $FDread;
453     my @Jobs;
454    
455     #
456     # First make sure there are no existing compressed backups
457     #
458     my(%compHosts, $compCnt);
459     for ( my $j = 0 ; $j < @Hosts ; $j++ ) {
460     my $host = $Hosts[$j];
461     my @Backups = $bpc->BackupInfoRead($host);
462     for ( my $i = 0 ; $i < @Backups ; $i++ ) {
463     next if ( !$Backups[$i]{compress} );
464     $compHosts{$host}++;
465     $compCnt++;
466     }
467     }
468     if ( $compCnt ) {
469     my $compHostStr = join("\n + ", sort(keys(%compHosts)));
470     print STDERR <<EOF;
471     BackupPC_compressPool: there are $compCnt compressed backups.
472     BackupPC_compressPool can only be run when there are no existing
473     compressed backups. The following hosts have compressed backups:
474    
475     + $compHostStr
476    
477     If you really want to run BackupPC_compressPool you will need to remove
478     all the existing compressed backups (and /home/pcbackup/data/cpool).
479     Think carefully before you do this. Otherwise, you can just let new
480     compressed backups run and the old uncompressed backups and pool will
481     steadily expire.
482     EOF
483     exit(0);
484     }
485    
486     #
487     # Next spawn $nChild children that actually do all the work.
488     #
489     for ( my $i = 0 ; $i < $nChild ; $i++ ) {
490     local(*CHILD);
491     my $pid;
492     if ( !defined($pid = open(CHILD, "-|")) ) {
493     print("Can't fork\n");
494     next;
495     }
496     my $nDirs = @Dirs / ($nChild - $i);
497     my $nHosts = @Hosts / ($nChild - $i);
498     if ( !$pid ) {
499     #
500     # This is the child.
501     # First process each of the hosts (compress per-pc log files etc).
502     #
503     for ( my $j = 0 ; $j < $nHosts ; $j++ ) {
504     compressHostFiles($Hosts[$j]);
505     }
506     #
507     # Count the total number of directories so we can estimate the
508     # completion time. We ignore empty directories by reading each
509     # directory and making sure it has at least 3 entries (ie, ".",
510     # ".." and a file).
511     #
512     for ( my $j = 0 ; $j < $nDirs ; $j++ ) {
513     my $thisDir = $Dirs[$j];
514     next if ( !-d "$PoolDir/$thisDir" );
515     foreach my $dir ( <$PoolDir/$thisDir/*/*> ) {
516     next if ( !opendir(DIR, $dir) );
517     my @files = readdir(DIR);
518     closedir(DIR);
519     $SubDirCnt++ if ( @files > 2 );
520     }
521     }
522     #
523     # Now process each of the directories
524     #
525     for ( my $j = 0 ; $j < $nDirs ; $j++ ) {
526     my $thisDir = shift(@Dirs);
527     next if ( !-d "$PoolDir/$thisDir" );
528     find({wanted => sub { doCompress($File::Find::name); },
529     no_chdir => 1}, "$PoolDir/$thisDir");
530     }
531     #
532     # Last, update the backup info file for each of the hosts
533     #
534     for ( my $j = 0 ; $j < $nHosts ; $j++ ) {
535     updateHostBackupInfo($Hosts[$j]);
536     }
537     $SubDirDone = $SubDirCnt;
538     reportStats();
539     exit(0);
540     }
541     #
542     # This is the parent. Peel off $nDirs directories, $nHosts hosts,
543     # and continue
544     #
545     $Jobs[$i]{fh} = *CHILD;
546     $Jobs[$i]{pid} = $pid;
547     vec($FDread, fileno($Jobs[$i]{fh}), 1) = 1;
548     splice(@Dirs, 0, $nDirs);
549     splice(@Hosts, 0, $nHosts);
550     }
551    
552     #
553     # compress the main log files (in the parents)
554     #
555     compressHostFiles(undef);
556    
557     #
558     # Now wait for all the children to report results and finish up
559     #
560     my $TimeStart = time;
561     my $DonePct = 0;
562     my $GotSignal = "";
563     while ( $FDread !~ /^\0*$/ ) {
564     my $ein = $FDread;
565     select(my $rout = $FDread, undef, $ein, undef);
566     if ( $SigName ne $GotSignal ) {
567     print("Got signal $SigName; waiting for $nChild children to cleanup\n");
568     $GotSignal = $SigName;
569     }
570     for ( my $i = 0 ; $i < $nChild ; $i++ ) {
571     next if ( !vec($rout, fileno($Jobs[$i]{fh}), 1) );
572     my $data;
573     if ( sysread($Jobs[$i]{fh}, $data, 1024) <= 0 ) {
574     vec($FDread, fileno($Jobs[$i]{fh}), 1) = 0;
575     close($Jobs[$i]{fh});
576     next;
577     }
578     $Jobs[$i]{mesg} .= $data;
579     while ( $Jobs[$i]{mesg} =~ /(.*?)[\n\r]+(.*)/s ) {
580     my $mesg = $1;
581     $Jobs[$i]{mesg} = $2;
582     if ( $mesg =~ /^stats: (\d+) (\d+) (\d+) (\d+) (\d+) (\d+)/ ) {
583     $Jobs[$i]{SubDirDone} = $1;
584     $Jobs[$i]{SubDirCnt} = $2;
585     $Jobs[$i]{FileCnt} = $3;
586     $Jobs[$i]{FileOrigSz} = $4;
587     $Jobs[$i]{FileCompressSz} = $5;
588     $Jobs[$i]{Errors} = $6;
589     $SubDirDone = $SubDirCnt = $FileCnt = $FileOrigSz = 0;
590     $FileCompressSz = $Errors = 0;
591     my $numReports = 0;
592     for ( my $j = 0 ; $j < $nChild ; $j++ ) {
593     next if ( !defined($Jobs[$j]{SubDirDone}) );
594     $SubDirDone += $Jobs[$j]{SubDirDone};
595     $SubDirCnt += $Jobs[$j]{SubDirCnt};
596     $FileCnt += $Jobs[$j]{FileCnt};
597     $FileOrigSz += $Jobs[$j]{FileOrigSz};
598     $FileCompressSz += $Jobs[$j]{FileCompressSz};
599     $Errors += $Jobs[$j]{Errors};
600     $numReports++;
601     }
602     $SubDirCnt ||= 1;
603     $FileOrigSz ||= 1;
604     my $pctDone = 100 * $SubDirDone / $SubDirCnt;
605     if ( $numReports == $nChild && $pctDone >= $DonePct + 1 ) {
606     $DonePct = int($pctDone);
607     my $estSecLeft = 1.2 * (time - $TimeStart)
608     * (100 / $pctDone - 1);
609     my $timeStamp = $bpc->timeStamp;
610     printf("%sDone %2.0f%% (%d of %d dirs, %d files,"
611     . " %.2fGB raw, %.1f%% reduce, %d errors)\n",
612     $timeStamp,
613     $pctDone, $SubDirDone, $SubDirCnt, $FileCnt,
614     $FileOrigSz / (1024 * 1024 * 1000),
615     100 * (1 - $FileCompressSz / $FileOrigSz));
616     printf("%s Est complete in %.1f hours (around %s)\n",
617     $timeStamp, $estSecLeft / 3600,
618     $bpc->timeStamp(time + $estSecLeft, 1))
619     if ( $DonePct < 100 );
620     }
621     } else {
622     print($mesg, "\n");
623     }
624     }
625     }
626     }
627     if ( $Errors ) {
628     print("Finished with $Errors errors!!!!\n");
629     exit(1);
630     }

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26