/[psinib]/psinib.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /psinib.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.10 - (show annotations)
Tue Jul 15 17:41:45 2003 UTC (20 years, 8 months ago) by dpavlin
Branch: MAIN
Changes since 1.9: +1 -1 lines
File MIME type: text/plain
removed all debugging output

1 #!/usr/bin/perl -w
2 #
3 # psinib - Perl Snapshot Is Not Incremental Backup
4 #
5 # written by Dobrica Pavlinusic <dpavlin@rot13.org> 2003-01-03
6 # released under GPL v2 or later.
7 #
8 # Backup SMB directories using file produced by LinNeighbourhood (or some
9 # other program [vi :-)] which produces file in format:
10 #
11 # smbmount service mountpoint options
12 #
13 #
14 # usage:
15 # $ psinib.pl mountscript
16
17 use strict 'vars';
18 use Data::Dumper;
19 use Net::Ping;
20 use POSIX qw(strftime);
21 use List::Compare;
22 use Filesys::SmbClient;
23 #use Taint;
24 use Fcntl qw(LOCK_EX LOCK_NB);
25 use Digest::MD5;
26 use File::Basename;
27
28 # configuration
29 my $LOG_TIME_FMT = '%Y-%m-%d %H:%M:%S'; # strftime format for logfile
30 my $DIR_TIME_FMT = '%Y%m%d'; # strftime format for backup dir
31
32 my $LOG = '/var/log/backup.log'; # add path here...
33 #$LOG = '/tmp/backup.log';
34
35 # store backups in which directory
36 my $BACKUP_DEST = '/backup/isis_backup';
37
38 # files to ignore in backup
39 my @ignore = ('.md5sum', '.backupignore', 'backupignore.txt');
40
41 # open log
42 open(L, ">> $LOG") || die "can't open log $LOG: $!";
43 select((select(L), $|=1)[0]); # flush output
44
45 # make a lock on logfile
46
47 my $c = 0;
48 {
49 flock L, LOCK_EX | LOCK_NB and last;
50 sleep 1;
51 redo if ++$c < 10;
52 # no response for 10 sec, bail out
53 print STDERR "can't take lock on $LOG -- another $0 running?\n";
54 exit 1;
55 }
56
57 # taint path: nmblookup should be there!
58 $ENV{'PATH'} = "/usr/bin:/bin";
59
60 my $mounts = shift @ARGV ||
61 'mountscript';
62 # die "usage: $0 mountscript";
63
64
65 my @in_backup; # shares which are backeduped this run
66
67 my $p = new Net::Ping->new("tcp", 2);
68 # ping will try tcp connect to netbios-ssn (139)
69 $p->{port_num} = getservbyname("netbios-ssn", "tcp");
70
71 my $backup_ok = 0;
72
73 my $smb;
74 my %smb_atime;
75 my %smb_mtime;
76 my %file_md5;
77
78 open(M, $mounts) || die "can't open $mounts: $!";
79 while(<M>) {
80 chomp;
81 next if !/^\s*smbmount\s/;
82 my (undef,$share,undef,$opt) = split(/\s+/,$_,4);
83
84 my ($user,$passwd,$workgroup);
85
86 foreach (split(/,/,$opt)) {
87 my ($n,$v) = split(/=/,$_,2);
88 if ($n =~ m/username/i) {
89 if ($v =~ m#^(.+)/(.+)%(.+)$#) {
90 ($user,$passwd,$workgroup) = ($1,$2,$3);
91 } elsif ($v =~ m#^(.+)/(.+)$#) {
92 ($user,$workgroup) = ($1,$2);
93 } elsif ($v =~ m#^(.+)%(.+)$#) {
94 ($user,$passwd) = ($1,$2);
95 } else {
96 $user = $v;
97 }
98 } elsif ($n =~ m#workgroup#i) {
99 $workgroup = $v;
100 }
101 }
102
103 push @in_backup,$share;
104
105
106 my ($host,$dir,$date_dir) = share2host_dir($share);
107 my $bl = "$BACKUP_DEST/$host/$dir/latest"; # latest backup
108 my $bc = "$BACKUP_DEST/$host/$dir/$date_dir"; # current one
109 my $real_bl;
110 if (-l $bl) {
111 $real_bl=readlink($bl) || die "can't read link $bl: $!";
112 $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/");
113 if (-l $bc && $real_bl eq $bc) {
114 print "$share allready backuped...\n";
115 $backup_ok++;
116 next;
117 }
118
119 }
120
121
122 print "working on $share\n";
123
124
125 my $ip = get_ip($share);
126
127 if ($ip) {
128 xlog($share,"IP is $ip");
129 if ($p->ping($ip)) {
130 snap_share($share,$user,$passwd,$workgroup);
131 $backup_ok++;
132 }
133 }
134 }
135 close(M);
136
137 xlog("","$backup_ok backups completed of total ".($#in_backup+1)." this time (".int($backup_ok*100/($#in_backup+1))." %)");
138
139 1;
140
141 #-------------------------------------------------------------------------
142
143
144 # get IP number from share
145 sub get_ip {
146 my $share = shift;
147
148 my $host = $1 if ($share =~ m#//([^/]+)/#);
149
150 my $ip = `nmblookup $host`;
151 if ($ip =~ m/(\d+\.\d+\.\d+\.\d+)\s$host/i) {
152 return $1;
153 }
154 }
155
156
157 # write entry to screen and log
158 sub xlog {
159 my $share = shift;
160 my $t = strftime $LOG_TIME_FMT, localtime;
161 my $m = shift || '[no log entry]';
162 print STDERR $m,"\n";
163 print L "$t $share\t$m\n";
164 }
165
166 # dump warn and dies into log
167 BEGIN { $SIG{'__WARN__'} = sub { xlog('WARN',$_[0]) ; warn $_[0] } }
168 BEGIN { $SIG{'__DIE__'} = sub { xlog('DIE',$_[0]) ; die $_[0] } }
169
170
171 # split share name to host, dir and currnet date dir
172 sub share2host_dir {
173 my $share = shift;
174 my ($host,$dir);
175 if ($share =~ m#//([^/]+)/(.+)$#) {
176 ($host,$dir) = ($1,$2);
177 $dir =~ s/\W/_/g;
178 $dir =~ s/^_+//;
179 $dir =~ s/_+$//;
180 } else {
181 print "Can't parse share $share into host and directory!\n";
182 return;
183 }
184 return ($host,$dir,strftime $DIR_TIME_FMT, localtime);
185 }
186
187
188 # make a snapshot of a share
189 sub snap_share {
190
191 my $share = shift;
192
193 my %param = ( debug => 0 );
194
195 $param{username} = shift || warn "can't find username for share $share";
196 $param{password} = shift || warn "can't find passwod for share $share";
197 $param{workgroup} = shift || warn "can't find workgroup for share $share";
198
199 my ($host,$dir,$date_dir) = share2host_dir($share);
200
201 # latest backup directory
202 my $bl = "$BACKUP_DEST/$host/$dir/latest";
203 # current backup directory
204 my $bc = "$BACKUP_DEST/$host/$dir/$date_dir";
205
206 my $real_bl;
207 if (-l $bl) {
208 $real_bl=readlink($bl) || die "can't read link $bl: $!";
209 $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/");
210 } else {
211 print "no old backup, trying to find last backup, ";
212 if (opendir(BL_DIR, "$BACKUP_DEST/$host/$dir")) {
213 my @bl_dirs = sort grep { !/^\./ && -d "$BACKUP_DEST/$host/$dir/$_" } readdir(BL_DIR);
214 closedir(BL_DIR);
215 $real_bl=pop @bl_dirs;
216 print "using $real_bl as latest...\n";
217 $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/");
218 if ($real_bl eq $bc) {
219 xlog($share,"latest from today (possible partial backup)");
220 rename $real_bl,$real_bl.".partial" || warn "can't reaname partial backup: $!";
221 $real_bl .= ".partial";
222 }
223 } else {
224 print "this is first run...\n";
225 }
226 }
227
228 if (-l $bc && $real_bl && $real_bl eq $bc) {
229 print "$share allready backuped...\n";
230 return;
231 }
232
233 die "You should really create BACKUP_DEST [$BACKUP_DEST] by hand! " if (!-e $BACKUP_DEST);
234
235 if (! -e "$BACKUP_DEST/$host") {
236 mkdir "$BACKUP_DEST/$host" || die "can't make dir for host $host, $BACKUP_DEST/$host: $!";
237 print "created host directory $BACKUP_DEST/$host...\n";
238 }
239
240 if (! -e "$BACKUP_DEST/$host/$dir") {
241 mkdir "$BACKUP_DEST/$host/$dir" || die "can't make dir for share $share, $BACKUP_DEST/$host/$dir $!";
242 print "created dir for share $share, $BACKUP_DEST/$host/$dir...\n";
243 }
244
245 mkdir $bc || die "can't make dir for current backup $bc: $!";
246
247 my @dirs = ( "/" );
248 my @smb_dirs = ( "/" );
249
250 my $transfer = 0; # bytes transfered over network
251
252 # this will store all available files and sizes
253 my @files;
254 my %file_size;
255 my %file_atime;
256 my %file_mtime;
257 #my %file_md5;
258
259 my @smb_files;
260 my %smb_size;
261 #my %smb_atime;
262 #my %smb_mtime;
263
264 sub norm_dir {
265 my $foo = shift;
266 my $prefix = shift;
267 $foo =~ s#//+#/#g;
268 $foo =~ s#/+$##g;
269 $foo =~ s#^/+##g;
270 return $prefix.$foo if ($prefix);
271 return $foo;
272 }
273
274 # read local filesystem
275 my $di = 0;
276 while ($di <= $#dirs && $real_bl) {
277 my $d=$dirs[$di++];
278 opendir(DIR,"$real_bl/$d") || warn "opendir($real_bl/$d): $!\n";
279
280 # read .backupignore if exists
281 if (-f "$real_bl/$d/.backupignore") {
282 open(I,"$real_bl/$d/.backupignore");
283 while(<I>) {
284 chomp;
285 push @ignore,norm_dir("$d/$_");
286 }
287 close(I);
288 #print STDERR "ignore: ",join("|",@ignore),"\n";
289 link "$real_bl/$d/.backupignore","$bc/$d/.backupignore" ||
290 warn "can't copy $real_bl/$d/.backupignore to current backup dir: $!\n";
291 }
292
293 # read .md5sum if exists
294 if (-f "$real_bl/$d/.md5sum") {
295 open(I,"$real_bl/$d/.md5sum");
296 while(<I>) {
297 chomp;
298 my ($md5,$f) = split(/\s+/,$_,2);
299 $file_md5{$f}=$md5;
300 }
301 close(I);
302 }
303
304 my @clutter = readdir(DIR);
305 foreach my $f (@clutter) {
306 next if ($f eq '.');
307 next if ($f eq '..');
308 my $pr = norm_dir("$d/$f"); # path relative
309 my $pf = norm_dir("$d/$f","$real_bl/"); # path full
310 if (grep(/^\Q$pr\E$/,@ignore) == 0) {
311 if (-f $pf) {
312 push @files,$pr;
313 $file_size{$pr}=(stat($pf))[7];
314 $file_atime{$pr}=(stat($pf))[8];
315 $file_mtime{$pr}=(stat($pf))[9];
316 } elsif (-d $pf) {
317 push @dirs,$pr;
318 } else {
319 print STDERR "unknown type: $pf\n";
320 }
321 } else {
322 print STDERR "ignored: $pr\n";
323 }
324 }
325 }
326
327 xlog($share,($#files+1)." files and ".($#dirs+1)." dirs on local disk before backup");
328
329 # read smb filesystem
330
331 xlog($share,"smb to $share as $param{username}/$param{workgroup}");
332
333 # FIX: how to aviod creation of ~/.smb/smb.conf ?
334 $smb = new Filesys::SmbClient(%param) || die "SmbClient :$!\n";
335
336 $di = 0;
337 while ($di <= $#smb_dirs) {
338 my $d=$smb_dirs[$di];
339 my $pf = norm_dir($d,"smb:$share/"); # path full
340 my $D = $smb->opendir($pf);
341 if (! $D) {
342 xlog($share,"FATAL: $share: $!");
343 # remove failing dir
344 delete $smb_dirs[$di];
345 next;
346 }
347 $di++;
348
349 my @clutter = $smb->readdir_struct($D);
350 foreach my $item (@clutter) {
351 my $f = $item->[1];
352 next if ($f eq '.');
353 next if ($f eq '..');
354 my $pr = norm_dir("$d/$f"); # path relative
355 my $pf = norm_dir("$d/$f","smb:$share/"); # path full
356 if (grep(/^\Q$pr\E$/,@ignore) == 0) {
357 if ($item->[0] == main::SMBC_FILE) {
358 push @smb_files,$pr;
359 $smb_size{$pr}=($smb->stat($pf))[7];
360 $smb_atime{$pr}=($smb->stat($pf))[10];
361 $smb_mtime{$pr}=($smb->stat($pf))[11];
362 } elsif ($item->[0] == main::SMBC_DIR) {
363 push @smb_dirs,$pr;
364 } else {
365 print STDERR "unknown type: $pf\n";
366 }
367 } else {
368 print STDERR "smb ignored: $pr\n";
369 }
370 }
371 }
372
373 xlog($share,($#smb_files+1)." files and ".($#smb_dirs+1)." dirs on remote share");
374
375 # sync dirs
376 my $lc = List::Compare->new(\@dirs, \@smb_dirs);
377
378 my @dirs2erase = $lc->get_Lonly;
379 my @dirs2create = $lc->get_Ronly;
380 xlog($share,($#dirs2erase+1)." dirs to erase and ".($#dirs2create+1)." dirs to create");
381
382 # create new dirs
383 foreach (sort @smb_dirs) {
384 mkdir "$bc/$_" || warn "mkdir $_: $!\n";
385 }
386
387 # sync files
388 $lc = List::Compare->new(\@files, \@smb_files);
389
390 my @files2erase = $lc->get_Lonly;
391 my @files2create = $lc->get_Ronly;
392 xlog($share,($#files2erase+1)." files to erase and ".($#files2create+1)." files to create");
393
394 sub smb_copy {
395 my $smb = shift;
396
397 my $from = shift;
398 my $to = shift;
399
400
401 my $l = 0;
402
403 foreach my $f (@_) {
404 #print "smb_copy $from/$f -> $to/$f\n";
405 if (! open(F,"> $to/$f")) {
406 print STDERR "can't open new file $to/$f: $!\n";
407 next;
408 }
409
410 my $md5 = Digest::MD5->new;
411
412 my $fd = $smb->open("$from/$f");
413 if (! $fd) {
414 print STDERR "can't open smb file $from/$f: $!\n";
415 next;
416 }
417
418 while (defined(my $b=$smb->read($fd,4096))) {
419 print F $b;
420 $l += length($b);
421 $md5->add($b);
422 }
423
424 $smb->close($fd);
425 close(F);
426
427 $file_md5{$f} = $md5->hexdigest;
428
429 # FIX: this fails with -T
430 my ($a,$m) = ($smb->stat("$from/$f"))[10,11];
431 utime $a, $m, "$to/$f" ||
432 warn "can't update utime on $to/$f: $!\n";
433
434 }
435 return $l;
436 }
437
438 # copy new files
439 foreach (@files2create) {
440 $transfer += smb_copy($smb,"smb:$share",$bc,$_);
441 }
442
443 my $size_sync = 0;
444 my $atime_sync = 0;
445 my $mtime_sync = 0;
446 my @sync_files;
447 my @ln_files;
448
449 foreach ($lc->get_intersection) {
450
451 my $f;
452
453 if ($file_size{$_} != $smb_size{$_}) {
454 $f=$_;
455 $size_sync++;
456 }
457 if ($file_atime{$_} != $smb_atime{$_}) {
458 $f=$_;
459 $atime_sync++;
460 }
461 if ($file_mtime{$_} != $smb_mtime{$_}) {
462 $f=$_;
463 $mtime_sync++;
464 }
465
466 if ($f) {
467 push @sync_files, $f;
468 } else {
469 push @ln_files, $_;
470 }
471 }
472
473 xlog($share,($#sync_files+1)." files will be updated (diff: $size_sync size, $atime_sync atime, $mtime_sync mtime), ".($#ln_files+1)." will be linked.");
474
475 foreach (@sync_files) {
476 $transfer += smb_copy($smb,"smb:$share",$bc,$_);
477 }
478
479 xlog($share,"$transfer bytes transfered...");
480
481 foreach (@ln_files) {
482 link "$real_bl/$_","$bc/$_" || warn "link $real_bl/$_ -> $bc/$_: $!\n";
483 }
484
485 # remove files
486 foreach (sort @files2erase) {
487 unlink "$bc/$_" || warn "unlink $_: $!\n";
488 }
489
490 # remove not needed dirs (after files)
491 foreach (sort @dirs2erase) {
492 rmdir "$bc/$_" || warn "rmdir $_: $!\n";
493 }
494
495 # remove old .md5sum
496 foreach (sort @dirs) {
497 unlink "$bc/$_/.md5sum" if (-e "$bc/$_/.md5sum");
498 }
499
500 # create .md5sum
501 my $last_dir = '';
502 my $md5;
503 foreach my $f (sort { $file_md5{$a} cmp $file_md5{$b} } keys %file_md5) {
504 my $dir = dirname($f);
505 my $file = basename($f);
506 #print "$f -- $dir / $file<--\n";
507 if ($dir ne $last_dir) {
508 close($md5) if ($md5);
509 open($md5, ">> $bc/$dir/.md5sum") || warn "can't create $bc/$dir/.md5sum: $!";
510 $last_dir = $dir;
511 #print STDERR "writing $last_dir/.md5sum\n";
512 }
513 print $md5 $file_md5{$f}," $file\n";
514 }
515 close($md5);
516
517 # create leatest link
518 #print "ln -s $bc $real_bl\n";
519 if (-l $bl) {
520 unlink $bl || warn "can't remove old latest symlink $bl: $!\n";
521 }
522 symlink $bc,$bl || warn "can't create latest symlink $bl -> $bc: $!\n";
523
524 # FIX: sanity check -- remove for speedup
525 xlog($share,"failed to create latest symlink $bl -> $bc...") if (readlink($bl) ne $bc || ! -l $bl);
526
527 xlog($share,"backup completed...");
528 }
529
530 __END__
531 #-------------------------------------------------------------------------
532
533
534 =head1 NAME
535
536 psinib - Perl Snapshot Is Not Incremental Backup
537
538 =head1 SYNOPSIS
539
540 ./psinib.pl
541
542 =head1 DESCRIPTION
543
544 This script in current version support just backup of Samba (or Micro$oft
545 Winblowz) shares to central disk space. Central disk space is organized in
546 multiple directories named after:
547
548 =over 4
549
550 =item *
551 server which is sharing files to be backed up
552
553 =item *
554 name of share on server
555
556 =item *
557 dated directory named like standard ISO date format (YYYYMMDD).
558
559 =back
560
561 In each dated directory you will find I<snapshot> of all files on
562 exported share on that particular date.
563
564 You can also use symlink I<latest> which will lead you to
565 last completed backup. After that you can use some other backup
566 software to transfer I<snapshot> to tape, CD-ROM or some other media.
567
568 =head2 Design considerations
569
570 Since taking of share snapshot every day requires a lot of disk space and
571 network bandwidth, B<psinib> uses several techniques to keep disk usage and
572 network traffic at acceptable level:
573
574 =over 3
575
576 =item - usage of hard-links to provide same files in each snapshot (as opposed
577 to have multiple copies of same file)
578
579 =item - usage of file size, atime and mtime to find changes of files without
580 transferring whole file over network (just share browsing is transfered
581 over network)
582
583 =item - usage of C<.md5sum> files (compatible with command-line utility
584 C<md5sum>) to keep file between snapshots hard-linked
585
586 =back
587
588 =head1 CONFIGURATION
589
590 This section is not yet written.
591
592 =head1 HACKS, TRICKS, BUGS and LIMITATIONS
593
594 This chapter will have all content that doesn't fit anywhere else.
595
596 =head2 Can snapshots be more frequent than daily?
597
598 There is not real reason why you can't take snapshot more often than
599 once a day. Actually, if you are using B<psinib> to backup Windows
600 workstations you already know that they tend to come-and-go during the day
601 (reboots probably ;-), so running B<psinib> several times a day increases
602 your chance of having up-to-date backup (B<psinib> will not make multiple
603 snapshots for same day, nor will it update snapshot for current day if
604 it already exists).
605
606 However, changing B<psinib> to produce snapshots which are, for example, hourly
607 is a simple change of C<$DIR_TIME_FMT> which is currently set to
608 C<'%Y%m%d'> (see I<strftime> documentation for explanation of that
609 format). If you change that to C<'%Y%m%d-%H> you can have hourly snapshots
610 (if your network is fast enough, that is...). Also, some of messages in
611 program will sound strange, but other than that it should work.
612 I<You have been warned>.
613
614 =head2 Do I really need to share every directory which I want to snapshot?
615
616 Actually, no. Due to usage of C<Filesys::SmbClient> module, you can also
617 specify sub-directory inside your share that you want to backup. This feature
618 is most useful if you want to use administrative shares (but, have in mind
619 that you have to enter your Win administrator password in unencrypted file on
620 disk to do that) like this:
621
622 smbmount //server/c$/WinNT/fonts /mnt -o username=administrator%win
623
624 After that you will get directories with snapshots like:
625
626 server/c_WinNT_fonts/yyyymmdd/....
627
628 =head2 Won't I run out of disk space?
629
630 Of course you will... Snapshots and logfiles will eventually fill-up your disk.
631 However, you can do two things to stop that:
632
633 =head3 Clean snapshort older than x days
634
635 You can add following command to your C<root> crontab:
636
637 find /backup/isis_backup -type d -mindepth 3 -maxdepth 3 -mtime +11 -exec rm -Rf {} \;
638
639 I assume that C</backup/isis_backup> is directory in which are your snapshots
640 and that you don't want to keep snapshots older than 11 days (that's
641 C<-mtime +11> part of command).
642
643 =head3 Rotate your logs
644
645 I will leave that to you. I relay on GNU/Debian's C<logrotate> to do it for me.
646
647 =head2 What are I<YYYYMMDD.partial> directories?
648
649 If there isn't I<latest> symlink in snapshot directory, it's preatty safe to
650 assume that previous backup from that day failed. So, that directory will
651 be renamed to I<YYYYMMDD.partial> and snapshot will be performed again,
652 linking same files (other alternative would be to erase that dir and find
653 second-oldest directory, but this seemed like more correct approach).
654
655 =head1 AUTHOR
656
657 Dobrica Pavlinusic <dpavlin@rot13.org>
658
659 L<http://www.rot13.org/~dpavlin/>
660
661 =head1 LICENSE
662
663 This product is licensed under GNU Public License (GPL) v2 or later.
664
665 =cut

  ViewVC Help
Powered by ViewVC 1.1.26