1 |
dpavlin |
1 |
#!/bin/perl |
2 |
|
|
#============================================================= -*-perl-*- |
3 |
|
|
# |
4 |
|
|
# BackupPC_nightly: Nightly cleanup & statistics script. |
5 |
|
|
# |
6 |
|
|
# DESCRIPTION |
7 |
|
|
# |
8 |
|
|
# BackupPC_nightly performs several administrative tasks: |
9 |
|
|
# |
10 |
|
|
# - monthly aging of per-PC log files (only with -m option) |
11 |
|
|
# |
12 |
|
|
# - pruning files from pool no longer used (ie: those with only one |
13 |
|
|
# hard link). |
14 |
|
|
# |
15 |
|
|
# - sending email to users and administrators (only with -m option) |
16 |
|
|
# |
17 |
|
|
# Usage: BackupPC_nightly [-m] poolRangeStart poolRangeEnd |
18 |
|
|
# |
19 |
|
|
# Flags: |
20 |
|
|
# |
21 |
|
|
# -m Do monthly aging of per-PC log files and sending of email. |
22 |
|
|
# Otherise, BackupPC_nightly just does pool pruning. |
23 |
|
|
# |
24 |
|
|
# The poolRangeStart and poolRangeEnd arguments are integers from 0 to 255. |
25 |
|
|
# These specify which parts of the pool to process. There are 256 2nd-level |
26 |
|
|
# directories in the pool (0/0, 0/1, ..., f/e, f/f). BackupPC_nightly |
27 |
|
|
# processes the given subset of this list (0 means 0/0, 255 means f/f). |
28 |
|
|
# Therefore, arguments of 0 255 process the entire pool, 0 127 does |
29 |
|
|
# the first half (ie: 0/0 through 7/f), 127 255 does the other half |
30 |
|
|
# (eg: 8/0 through f/f) and 0 15 does just the first 1/16 of the pool |
31 |
|
|
# (ie: 0/0 through 0/f). |
32 |
|
|
# |
33 |
|
|
# AUTHOR |
34 |
|
|
# Craig Barratt <cbarratt@users.sourceforge.net> |
35 |
|
|
# |
36 |
|
|
# COPYRIGHT |
37 |
|
|
# Copyright (C) 2001-2004 Craig Barratt |
38 |
|
|
# |
39 |
|
|
# This program is free software; you can redistribute it and/or modify |
40 |
|
|
# it under the terms of the GNU General Public License as published by |
41 |
|
|
# the Free Software Foundation; either version 2 of the License, or |
42 |
|
|
# (at your option) any later version. |
43 |
|
|
# |
44 |
|
|
# This program is distributed in the hope that it will be useful, |
45 |
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
46 |
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
47 |
|
|
# GNU General Public License for more details. |
48 |
|
|
# |
49 |
|
|
# You should have received a copy of the GNU General Public License |
50 |
|
|
# along with this program; if not, write to the Free Software |
51 |
|
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
52 |
|
|
# |
53 |
|
|
#======================================================================== |
54 |
|
|
# |
55 |
|
|
# Version 2.1.0, released 20 Jun 2004. |
56 |
|
|
# |
57 |
|
|
# See http://backuppc.sourceforge.net. |
58 |
|
|
# |
59 |
|
|
#======================================================================== |
60 |
|
|
|
61 |
|
|
use strict; |
62 |
|
|
no utf8; |
63 |
|
|
use lib "__INSTALLDIR__/lib"; |
64 |
|
|
use BackupPC::Lib; |
65 |
|
|
use BackupPC::FileZIO; |
66 |
|
|
use Getopt::Std; |
67 |
|
|
|
68 |
|
|
use File::Find; |
69 |
|
|
use File::Path; |
70 |
|
|
use Data::Dumper; |
71 |
|
|
|
72 |
|
|
die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) ); |
73 |
|
|
my $TopDir = $bpc->TopDir(); |
74 |
|
|
my $BinDir = $bpc->BinDir(); |
75 |
|
|
my %Conf = $bpc->Conf(); |
76 |
|
|
my(%Status, %Info, %Jobs, @BgQueue, @UserQueue, @CmdQueue); |
77 |
|
|
|
78 |
|
|
$bpc->ChildInit(); |
79 |
|
|
|
80 |
|
|
my %opts; |
81 |
|
|
if ( !getopts("m", \%opts) || @ARGV != 2 ) { |
82 |
|
|
print("usage: $0 [-m] poolRangeStart poolRangeEnd\n"); |
83 |
|
|
exit(1); |
84 |
|
|
} |
85 |
|
|
if ( $ARGV[0] !~ /^(\d+)$/ || $1 > 255 ) { |
86 |
|
|
print("$0: bad poolRangeStart '$ARGV[0]'\n"); |
87 |
|
|
exit(1); |
88 |
|
|
} |
89 |
|
|
my $poolRangeStart = $1; |
90 |
|
|
if ( $ARGV[1] !~ /^(\d+)$/ || $1 > 255 ) { |
91 |
|
|
print("$0: bad poolRangeEnd '$ARGV[1]'\n"); |
92 |
|
|
exit(1); |
93 |
|
|
} |
94 |
|
|
my $poolRangeEnd = $1; |
95 |
|
|
|
96 |
|
|
if ( $opts{m} ) { |
97 |
|
|
my $err = $bpc->ServerConnect($Conf{ServerHost}, $Conf{ServerPort}); |
98 |
|
|
if ( $err ) { |
99 |
|
|
print("Can't connect to server ($err)\n"); |
100 |
|
|
exit(1); |
101 |
|
|
} |
102 |
|
|
my $reply = $bpc->ServerMesg("status hosts"); |
103 |
|
|
$reply = $1 if ( $reply =~ /(.*)/s ); |
104 |
|
|
eval($reply); |
105 |
|
|
} |
106 |
|
|
|
107 |
|
|
########################################################################### |
108 |
|
|
# When BackupPC_nightly starts, BackupPC will not run any simultaneous |
109 |
|
|
# BackupPC_dump commands. We first do things that contend with |
110 |
|
|
# BackupPC_dump, eg: aging per-PC log files etc. |
111 |
|
|
########################################################################### |
112 |
|
|
doPerPCLogFileAging() if ( $opts{m} ); |
113 |
|
|
|
114 |
|
|
########################################################################### |
115 |
|
|
# Get statistics on the pool, and remove files that have only one link. |
116 |
|
|
########################################################################### |
117 |
|
|
|
118 |
|
|
my $fileCnt; # total number of files |
119 |
|
|
my $dirCnt; # total number of directories |
120 |
|
|
my $blkCnt; # total block size of files |
121 |
|
|
my $fileCntRm; # total number of removed files |
122 |
|
|
my $blkCntRm; # total block size of removed files |
123 |
|
|
my $blkCnt2; # total block size of files with just 2 links |
124 |
|
|
# (ie: files that only occur once among all backups) |
125 |
|
|
my $fileCntRep; # total number of file names containing "_", ie: files |
126 |
|
|
# that have repeated md5 checksums |
127 |
|
|
my $fileRepMax; # worse case number of files that have repeated checksums |
128 |
|
|
# (ie: max(nnn+1) for all names xxxxxxxxxxxxxxxx_nnn) |
129 |
|
|
my $fileLinkMax; # maximum number of hardlinks on a pool file |
130 |
|
|
my $fileCntRename; # number of renamed files (to keep file numbering |
131 |
|
|
# contiguous) |
132 |
|
|
my %FixList; # list of paths that need to be renamed to avoid |
133 |
|
|
# new holes |
134 |
|
|
my @hexChars = qw(0 1 2 3 4 5 6 7 8 9 a b c d e f); |
135 |
|
|
|
136 |
|
|
for my $pool ( qw(pool cpool) ) { |
137 |
|
|
for ( my $i = $poolRangeStart ; $i <= $poolRangeEnd ; $i++ ) { |
138 |
|
|
my $dir = "$hexChars[int($i / 16)]/$hexChars[$i % 16]"; |
139 |
|
|
# print("Doing $pool/$dir\n") if ( ($i % 16) == 0 ); |
140 |
|
|
$fileCnt = 0; |
141 |
|
|
$dirCnt = 0; |
142 |
|
|
$blkCnt = 0; |
143 |
|
|
$fileCntRm = 0; |
144 |
|
|
$blkCntRm = 0; |
145 |
|
|
$blkCnt2 = 0; |
146 |
|
|
$fileCntRep = 0; |
147 |
|
|
$fileRepMax = 0; |
148 |
|
|
$fileLinkMax = 0; |
149 |
|
|
$fileCntRename = 0; |
150 |
|
|
%FixList = (); |
151 |
|
|
find({wanted => \&GetPoolStats}, "$TopDir/$pool/$dir"); |
152 |
|
|
my $kb = $blkCnt / 2; |
153 |
|
|
my $kbRm = $blkCntRm / 2; |
154 |
|
|
my $kb2 = $blkCnt2 / 2; |
155 |
|
|
|
156 |
|
|
# |
157 |
|
|
# Main BackupPC_nightly counts the top-level directory |
158 |
|
|
# |
159 |
|
|
$dirCnt++ if ( $opts{m} && -d "$TopDir/$pool" && $i == 0 ); |
160 |
|
|
|
161 |
|
|
# |
162 |
|
|
# Also count the next level directories |
163 |
|
|
# |
164 |
|
|
$dirCnt++ if ( ($i % 16) == 0 |
165 |
|
|
&& -d "$TopDir/$pool/$hexChars[int($i / 16)]" ); |
166 |
|
|
|
167 |
|
|
# |
168 |
|
|
# Now make sure that files with repeated checksums are still |
169 |
|
|
# sequentially numbered |
170 |
|
|
# |
171 |
|
|
foreach my $name ( sort(keys(%FixList)) ) { |
172 |
|
|
my $rmCnt = $FixList{$name} + 1; |
173 |
|
|
my $new = -1; |
174 |
|
|
for ( my $old = -1 ; ; $old++ ) { |
175 |
|
|
my $oldName = $name; |
176 |
|
|
$oldName .= "_$old" if ( $old >= 0 ); |
177 |
|
|
if ( !-f $oldName ) { |
178 |
|
|
# |
179 |
|
|
# We know we are done when we have missed at least |
180 |
|
|
# the number of files that were removed from this |
181 |
|
|
# base name, plus a couple just to be sure |
182 |
|
|
# |
183 |
|
|
last if ( $rmCnt-- <= 0 ); |
184 |
|
|
next; |
185 |
|
|
} |
186 |
|
|
my $newName = $name; |
187 |
|
|
$newName .= "_$new" if ( $new >= 0 ); |
188 |
|
|
$new++; |
189 |
|
|
next if ( $oldName eq $newName ); |
190 |
|
|
rename($oldName, $newName); |
191 |
|
|
$fileCntRename++; |
192 |
|
|
} |
193 |
|
|
} |
194 |
|
|
print("BackupPC_stats $i = $pool,$fileCnt,$dirCnt,$kb,$kb2,$kbRm," |
195 |
|
|
. "$fileCntRm,$fileCntRep,$fileRepMax," |
196 |
|
|
. "$fileCntRename,$fileLinkMax\n"); |
197 |
|
|
} |
198 |
|
|
} |
199 |
|
|
|
200 |
|
|
########################################################################### |
201 |
|
|
# Tell BackupPC that it is now ok to start running BackupPC_dump |
202 |
|
|
# commands. We are guaranteed that no BackupPC_link commands will |
203 |
|
|
# run since only a single CmdQueue command runs at a time, and |
204 |
|
|
# that means we are safe. |
205 |
|
|
########################################################################### |
206 |
|
|
printf("BackupPC_nightly lock_off\n"); |
207 |
|
|
|
208 |
|
|
########################################################################### |
209 |
|
|
# Send email |
210 |
|
|
########################################################################### |
211 |
|
|
if ( $opts{m} ) { |
212 |
|
|
print("log BackupPC_nightly now running BackupPC_sendEmail\n"); |
213 |
|
|
system("$BinDir/BackupPC_sendEmail") |
214 |
|
|
} |
215 |
|
|
|
216 |
|
|
# |
217 |
|
|
# Do per-PC log file aging |
218 |
|
|
# |
219 |
|
|
sub doPerPCLogFileAging |
220 |
|
|
{ |
221 |
|
|
my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); |
222 |
|
|
if ( $mday == 1 ) { |
223 |
|
|
foreach my $host ( keys(%Status) ) { |
224 |
|
|
my $lastLog = $Conf{MaxOldPerPCLogFiles} - 1; |
225 |
|
|
unlink("$TopDir/pc/$host/LOG.$lastLog") |
226 |
|
|
if ( -f "$TopDir/pc/$host/LOG.$lastLog" ); |
227 |
|
|
unlink("$TopDir/pc/$host/LOG.$lastLog.z") |
228 |
|
|
if ( -f "$TopDir/pc/$host/LOG.$lastLog.z" ); |
229 |
|
|
for ( my $i = $lastLog - 1 ; $i >= 0 ; $i-- ) { |
230 |
|
|
my $j = $i + 1; |
231 |
|
|
if ( -f "$TopDir/pc/$host/LOG.$i" ) { |
232 |
|
|
rename("$TopDir/pc/$host/LOG.$i", |
233 |
|
|
"$TopDir/pc/$host/LOG.$j"); |
234 |
|
|
} elsif ( -f "$TopDir/pc/$host/LOG.$i.z" ) { |
235 |
|
|
rename("$TopDir/pc/$host/LOG.$i.z", |
236 |
|
|
"$TopDir/pc/$host/LOG.$j.z"); |
237 |
|
|
} |
238 |
|
|
} |
239 |
|
|
# |
240 |
|
|
# Compress the log file LOG -> LOG.0.z (if enabled). |
241 |
|
|
# Otherwise, just rename LOG -> LOG.0. |
242 |
|
|
# |
243 |
|
|
BackupPC::FileZIO->compressCopy("$TopDir/pc/$host/LOG", |
244 |
|
|
"$TopDir/pc/$host/LOG.0.z", |
245 |
|
|
"$TopDir/pc/$host/LOG.0", |
246 |
|
|
$Conf{CompressLevel}, 1); |
247 |
|
|
open(LOG, ">", "$TopDir/pc/$host/LOG") && close(LOG); |
248 |
|
|
} |
249 |
|
|
} |
250 |
|
|
} |
251 |
|
|
|
252 |
|
|
sub GetPoolStats |
253 |
|
|
{ |
254 |
|
|
my($nlinks, $nblocks) = (lstat($_))[3, 12]; |
255 |
|
|
|
256 |
|
|
if ( -d _ ) { |
257 |
|
|
$dirCnt++; |
258 |
|
|
return; |
259 |
|
|
} elsif ( ! -f _ ) { |
260 |
|
|
return; |
261 |
|
|
} |
262 |
|
|
if ( $nlinks == 1 ) { |
263 |
|
|
$blkCntRm += $nblocks; |
264 |
|
|
$fileCntRm++; |
265 |
|
|
unlink($_); |
266 |
|
|
# |
267 |
|
|
# We must keep repeated files numbered sequential (ie: files |
268 |
|
|
# that have the same checksum are appended with _0, _1 etc). |
269 |
|
|
# There are two cases: we remove the base file xxxx, but xxxx_0 |
270 |
|
|
# exists, or we remove any file of the form xxxx_nnn. We remember |
271 |
|
|
# the base name and fix it up later (not in the middle of find). |
272 |
|
|
# |
273 |
|
|
my($baseName); |
274 |
|
|
($baseName = $File::Find::name) =~ s/_\d+$//; |
275 |
|
|
$FixList{$baseName}++; |
276 |
|
|
} else { |
277 |
|
|
if ( /_(\d+)$/ ) { |
278 |
|
|
$fileRepMax = $1 + 1 if ( $fileRepMax <= $1 ); |
279 |
|
|
$fileCntRep++; |
280 |
|
|
} |
281 |
|
|
$fileCnt += 1; |
282 |
|
|
$blkCnt += $nblocks; |
283 |
|
|
$blkCnt2 += $nblocks if ( $nlinks == 2 ); |
284 |
|
|
$fileLinkMax = $nlinks if ( $fileLinkMax < $nlinks ); |
285 |
|
|
} |
286 |
|
|
} |