--- psinib.pl 2003/01/04 11:42:56 1.1.1.1 +++ psinib.pl 2003/07/15 17:40:32 1.9 @@ -12,7 +12,7 @@ # # # usage: -# $ backup.pl mountscript +# $ psinib.pl mountscript use strict 'vars'; use Data::Dumper; @@ -21,24 +21,39 @@ use List::Compare; use Filesys::SmbClient; #use Taint; +use Fcntl qw(LOCK_EX LOCK_NB); +use Digest::MD5; +use File::Basename; # configuration my $LOG_TIME_FMT = '%Y-%m-%d %H:%M:%S'; # strftime format for logfile my $DIR_TIME_FMT = '%Y%m%d'; # strftime format for backup dir my $LOG = '/var/log/backup.log'; # add path here... -$LOG = '/tmp/backup.log'; +#$LOG = '/tmp/backup.log'; # store backups in which directory -my $BACKUP_DEST = '/data/isis_backup'; +my $BACKUP_DEST = '/backup/isis_backup'; # files to ignore in backup my @ignore = ('.md5sum', '.backupignore', 'backupignore.txt'); # open log -open(L, "> $LOG") || die "can't open log $LOG: $!"; +open(L, ">> $LOG") || die "can't open log $LOG: $!"; select((select(L), $|=1)[0]); # flush output +# make a lock on logfile + +my $c = 0; +{ + flock L, LOCK_EX | LOCK_NB and last; + sleep 1; + redo if ++$c < 10; + # no response for 10 sec, bail out + print STDERR "can't take lock on $LOG -- another $0 running?\n"; + exit 1; +} + # taint path: nmblookup should be there! $ENV{'PATH'} = "/usr/bin:/bin"; @@ -49,13 +64,16 @@ my @in_backup; # shares which are backeduped this run -my $p = new Net::Ping->new(); +my $p = new Net::Ping->new("tcp", 2); +# ping will try tcp connect to netbios-ssn (139) +$p->{port_num} = getservbyname("netbios-ssn", "tcp"); my $backup_ok = 0; my $smb; my %smb_atime; my %smb_mtime; +my %file_md5; open(M, $mounts) || die "can't open $mounts: $!"; while() { @@ -84,8 +102,26 @@ push @in_backup,$share; + + my ($host,$dir,$date_dir) = share2host_dir($share); + my $bl = "$BACKUP_DEST/$host/$dir/latest"; # latest backup + my $bc = "$BACKUP_DEST/$host/$dir/$date_dir"; # current one + my $real_bl; + if (-l $bl) { + $real_bl=readlink($bl) || die "can't read link $bl: $!"; + $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/"); + if (-l $bc && $real_bl eq $bc) { + print "$share allready backuped...\n"; + $backup_ok++; + next; + } + + } + + print "working on $share\n"; + my $ip = get_ip($share); if ($ip) { @@ -104,6 +140,7 @@ #------------------------------------------------------------------------- + # get IP number from share sub get_ip { my $share = shift; @@ -116,6 +153,8 @@ } } + +# write entry to screen and log sub xlog { my $share = shift; my $t = strftime $LOG_TIME_FMT, localtime; @@ -124,16 +163,14 @@ print L "$t $share\t$m\n"; } -sub snap_share { +# dump warn and dies into log +BEGIN { $SIG{'__WARN__'} = sub { xlog('WARN',$_[0]) ; warn $_[0] } } +BEGIN { $SIG{'__DIE__'} = sub { xlog('DIE',$_[0]) ; die $_[0] } } - my $share = shift; - - my %param = ( debug => 0 ); - - $param{username} = shift; - $param{password} = shift; - $param{workgroup} = shift; +# split share name to host, dir and currnet date dir +sub share2host_dir { + my $share = shift; my ($host,$dir); if ($share =~ m#//([^/]+)/(.+)$#) { ($host,$dir) = ($1,$2); @@ -144,8 +181,22 @@ print "Can't parse share $share into host and directory!\n"; return; } + return ($host,$dir,strftime $DIR_TIME_FMT, localtime); +} - my $date_dir = strftime $DIR_TIME_FMT, localtime; + +# make a snapshot of a share +sub snap_share { + + my $share = shift; + + my %param = ( debug => 0 ); + + $param{username} = shift || warn "can't find username for share $share"; + $param{password} = shift || warn "can't find passwod for share $share"; + $param{workgroup} = shift || warn "can't find workgroup for share $share"; + + my ($host,$dir,$date_dir) = share2host_dir($share); # latest backup directory my $bl = "$BACKUP_DEST/$host/$dir/latest"; @@ -153,14 +204,28 @@ my $bc = "$BACKUP_DEST/$host/$dir/$date_dir"; my $real_bl; - if (-e $bl) { + if (-l $bl) { $real_bl=readlink($bl) || die "can't read link $bl: $!"; $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/"); } else { - print "no old backup, this is first run...\n"; + print "no old backup, trying to find last backup, "; + if (opendir(BL_DIR, "$BACKUP_DEST/$host/$dir")) { + my @bl_dirs = sort grep { !/^\./ && -d "$BACKUP_DEST/$host/$dir/$_" } readdir(BL_DIR); + closedir(BL_DIR); + $real_bl=pop @bl_dirs; + print "using $real_bl as latest...\n"; + $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/"); + if ($real_bl eq $bc) { + xlog($share,"latest from today (possible partial backup)"); + rename $real_bl,$real_bl.".partial" || warn "can't reaname partial backup: $!"; + $real_bl .= ".partial"; + } + } else { + print "this is first run...\n"; + } } - if (-e $bc && $real_bl && $real_bl eq $bc) { + if (-l $bc && $real_bl && $real_bl eq $bc) { print "$share allready backuped...\n"; return; } @@ -189,14 +254,12 @@ my %file_size; my %file_atime; my %file_mtime; - my %file_md5; + #my %file_md5; my @smb_files; my %smb_size; #my %smb_atime; #my %smb_mtime; - my %smb_md5; - sub norm_dir { my $foo = shift; @@ -212,24 +275,24 @@ my $di = 0; while ($di <= $#dirs && $real_bl) { my $d=$dirs[$di++]; - opendir(DIR,"$bl/$d") || warn "opendir($bl/$d): $!\n"; + opendir(DIR,"$real_bl/$d") || warn "opendir($real_bl/$d): $!\n"; # read .backupignore if exists - if (-f "$bl/$d/.backupignore") { - open(I,"$bl/$d/.backupignore"); + if (-f "$real_bl/$d/.backupignore") { + open(I,"$real_bl/$d/.backupignore"); while() { chomp; push @ignore,norm_dir("$d/$_"); } close(I); -print STDERR "ignore: ",join("|",@ignore),"\n"; - link "$bl/$d/.backupignore","$bc/$d/.backupignore" || - warn "can't copy $bl/$d/.backupignore to current backup dir: $!\n"; +#print STDERR "ignore: ",join("|",@ignore),"\n"; + link "$real_bl/$d/.backupignore","$bc/$d/.backupignore" || + warn "can't copy $real_bl/$d/.backupignore to current backup dir: $!\n"; } # read .md5sum if exists - if (-f "$bl/$d/.md5sum") { - open(I,"$bl/$d/.md5sum"); + if (-f "$real_bl/$d/.md5sum") { + open(I,"$real_bl/$d/.md5sum"); while() { chomp; my ($md5,$f) = split(/\s+/,$_,2); @@ -243,7 +306,7 @@ next if ($f eq '.'); next if ($f eq '..'); my $pr = norm_dir("$d/$f"); # path relative - my $pf = norm_dir("$d/$f","$bl/"); # path full + my $pf = norm_dir("$d/$f","$real_bl/"); # path full if (grep(/^\Q$pr\E$/,@ignore) == 0) { if (-f $pf) { push @files,$pr; @@ -272,9 +335,16 @@ $di = 0; while ($di <= $#smb_dirs) { - my $d=$smb_dirs[$di++]; + my $d=$smb_dirs[$di]; my $pf = norm_dir($d,"smb:$share/"); # path full - my $D = $smb->opendir($pf) || warn "smb->opendir($pf): $!\n"; + my $D = $smb->opendir($pf); + if (! $D) { + xlog($share,"FATAL: $share: $!"); + # remove failing dir + delete $smb_dirs[$di]; + next; + } + $di++; my @clutter = $smb->readdir_struct($D); foreach my $item (@clutter) { @@ -337,6 +407,8 @@ next; } + my $md5 = Digest::MD5->new; + my $fd = $smb->open("$from/$f"); if (! $fd) { print STDERR "can't open smb file $from/$f: $!\n"; @@ -346,11 +418,14 @@ while (defined(my $b=$smb->read($fd,4096))) { print F $b; $l += length($b); + $md5->add($b); } $smb->close($fd); close(F); + $file_md5{$f} = $md5->hexdigest; + # FIX: this fails with -T my ($a,$m) = ($smb->stat("$from/$f"))[10,11]; utime $a, $m, "$to/$f" || @@ -404,7 +479,7 @@ xlog($share,"$transfer bytes transfered..."); foreach (@ln_files) { - link "$bl/$_","$bc/$_" || warn "link $bl/$_ -> $bc/$_: $!\n"; + link "$real_bl/$_","$bc/$_" || warn "link $real_bl/$_ -> $bc/$_: $!\n"; } # remove files @@ -417,14 +492,174 @@ rmdir "$bc/$_" || warn "rmdir $_: $!\n"; } - - # FIX: create .md5sum + # remove old .md5sum + foreach (sort @dirs) { + unlink "$bc/$_/.md5sum" if (-e "$bc/$_/.md5sum"); + } + + # create .md5sum + my $last_dir = ''; + my $md5; + foreach my $f (sort { $file_md5{$a} cmp $file_md5{$b} } keys %file_md5) { + my $dir = dirname($f); + my $file = basename($f); +print "$f -- $dir / $file<--\n"; + if ($dir ne $last_dir) { + close($md5) if ($md5); + open($md5, ">> $bc/$dir/.md5sum") || warn "can't create $bc/$dir/.md5sum: $!"; + $last_dir = $dir; +#print STDERR "writing $last_dir/.md5sum\n"; + } + print $md5 $file_md5{$f}," $file\n"; + } + close($md5); # create leatest link +#print "ln -s $bc $real_bl\n"; + if (-l $bl) { + unlink $bl || warn "can't remove old latest symlink $bl: $!\n"; + } symlink $bc,$bl || warn "can't create latest symlink $bl -> $bc: $!\n"; + # FIX: sanity check -- remove for speedup + xlog($share,"failed to create latest symlink $bl -> $bc...") if (readlink($bl) ne $bc || ! -l $bl); + xlog($share,"backup completed..."); } +__END__ #------------------------------------------------------------------------- + +=head1 NAME + +psinib - Perl Snapshot Is Not Incremental Backup + +=head1 SYNOPSIS + +./psinib.pl + +=head1 DESCRIPTION + +This script in current version support just backup of Samba (or Micro$oft +Winblowz) shares to central disk space. Central disk space is organized in +multiple directories named after: + +=over 4 + +=item * +server which is sharing files to be backed up + +=item * +name of share on server + +=item * +dated directory named like standard ISO date format (YYYYMMDD). + +=back + +In each dated directory you will find I of all files on +exported share on that particular date. + +You can also use symlink I which will lead you to +last completed backup. After that you can use some other backup +software to transfer I to tape, CD-ROM or some other media. + +=head2 Design considerations + +Since taking of share snapshot every day requires a lot of disk space and +network bandwidth, B uses several techniques to keep disk usage and +network traffic at acceptable level: + +=over 3 + +=item - usage of hard-links to provide same files in each snapshot (as opposed +to have multiple copies of same file) + +=item - usage of file size, atime and mtime to find changes of files without +transferring whole file over network (just share browsing is transfered +over network) + +=item - usage of C<.md5sum> files (compatible with command-line utility +C) to keep file between snapshots hard-linked + +=back + +=head1 CONFIGURATION + +This section is not yet written. + +=head1 HACKS, TRICKS, BUGS and LIMITATIONS + +This chapter will have all content that doesn't fit anywhere else. + +=head2 Can snapshots be more frequent than daily? + +There is not real reason why you can't take snapshot more often than +once a day. Actually, if you are using B to backup Windows +workstations you already know that they tend to come-and-go during the day +(reboots probably ;-), so running B several times a day increases +your chance of having up-to-date backup (B will not make multiple +snapshots for same day, nor will it update snapshot for current day if +it already exists). + +However, changing B to produce snapshots which are, for example, hourly +is a simple change of C<$DIR_TIME_FMT> which is currently set to +C<'%Y%m%d'> (see I documentation for explanation of that +format). If you change that to C<'%Y%m%d-%H> you can have hourly snapshots +(if your network is fast enough, that is...). Also, some of messages in +program will sound strange, but other than that it should work. +I. + +=head2 Do I really need to share every directory which I want to snapshot? + +Actually, no. Due to usage of C module, you can also +specify sub-directory inside your share that you want to backup. This feature +is most useful if you want to use administrative shares (but, have in mind +that you have to enter your Win administrator password in unencrypted file on +disk to do that) like this: + + smbmount //server/c$/WinNT/fonts /mnt -o username=administrator%win + +After that you will get directories with snapshots like: + + server/c_WinNT_fonts/yyyymmdd/.... + +=head2 Won't I run out of disk space? + +Of course you will... Snapshots and logfiles will eventually fill-up your disk. +However, you can do two things to stop that: + +=head3 Clean snapshort older than x days + +You can add following command to your C crontab: + + find /backup/isis_backup -type d -mindepth 3 -maxdepth 3 -mtime +11 -exec rm -Rf {} \; + +I assume that C is directory in which are your snapshots +and that you don't want to keep snapshots older than 11 days (that's +C<-mtime +11> part of command). + +=head3 Rotate your logs + +I will leave that to you. I relay on GNU/Debian's C to do it for me. + +=head2 What are I directories? + +If there isn't I symlink in snapshot directory, it's preatty safe to +assume that previous backup from that day failed. So, that directory will +be renamed to I and snapshot will be performed again, +linking same files (other alternative would be to erase that dir and find +second-oldest directory, but this seemed like more correct approach). + +=head1 AUTHOR + +Dobrica Pavlinusic + +L + +=head1 LICENSE + +This product is licensed under GNU Public License (GPL) v2 or later. + +=cut