--- fuse-comp.pl 2007/07/08 13:46:33 4 +++ fuse-comp.pl 2007/09/02 12:03:52 39 @@ -4,23 +4,38 @@ use threads::shared; use Fuse; -use IO::File; use POSIX qw(ENOENT ENOSYS EEXIST EPERM O_RDONLY O_RDWR O_APPEND O_CREAT); use Fcntl qw(S_ISBLK S_ISCHR S_ISFIFO SEEK_SET); require 'syscall.ph'; # for SYS_mknod and SYS_lchown use PerlIO::gzip; use File::Path; use Data::Dump qw/dump/; +use Carp qw/confess cluck/; +use IO::File; +use Getopt::Long; + +my $debug = 0; +my $fuse_debug = 0; +my $stats = 1; + +GetOptions( + 'debug+' => \$debug, + 'fuse-debug+' => \$fuse_debug, + 'stats!' => \$stats, +); my $mount = { - from => '/tmp/comp', - to => '/tmp/no-comp', - tmp => '/dev/shm/comp', + from => shift @ARGV || '/tmp/comp', + to => shift @ARGV || '/tmp/no-comp', + tmp => shift @ARGV || '/dev/shm/comp', }; -my $debug = 1; +warn "mount $mount->{from} to $mount->{to} using $mount->{tmp} as cache\n"; + +my $skip_extensions_regex = qr/gz|gz%|\.(?:sw[a-z]|gif|png|jpeg|jpg|avi|rar|zip|bz2|tgz|avi|mpeg|mpg|tmp|temp)$/i; -my $skip_extensions_regex = qr/\.(?:sw[a-z]|gif|png|jpeg|jpg|avi|rar|zip|bz2|gz|tgz|avi|mpeg|mpg|tmp|temp)$/i; +# don't compress files smaller than this +my $min_compress_size = 512; foreach my $dir ( keys %$mount ) { if ( ! -e $mount->{$dir} ) { @@ -31,18 +46,28 @@ my $pending; +sub real_name { + my ( $dir, $name ) = @_; + if ( -e "$dir/${name}.gz" ) { + cluck "ASSERT: unexpected $dir/$name exists" if -e "$dir/$name"; + return "${name}.gz"; + } + if ( $name =~ m/\.gz$/ ) { + return $name . '%'; # protect (mingle) compressed files + } else { + return $name; + } +} + sub fixup { my ( $path ) = @_; - my $full = $mount->{from} . '/' . $path; - if ( -e $full . '.gz' ) { - return $full . '.gz'; - } - return $full; + return $mount->{from} . '/' . real_name( $mount->{from}, $path ); } sub original_name { my $p = shift; $p =~ s/\.gz$//; + $p =~ s/\.gz%$/.gz/; # demungle compressed .gz files return $p; }; @@ -85,99 +110,321 @@ sub file_copy { my ( $s_opt, $s_path, $d_opt, $d_path ) = @_; - warn "## file_copy( $s_opt $s_path $d_opt $d_path )\n"; - open(my $s, $s_opt, $s_path ) || die "can't open $s_path: $!"; - open(my $d, $d_opt, $d_path ) || die "can't open $d_path: $!"; + warn "## file_copy( $s_opt $s_path [",-s $s_path,"] $d_opt $d_path [",-e $d_path ? -s $d_path : 'new',"])\n" if $debug; + open(my $s, $s_opt, $s_path ) || confess "can't open $s_path: $!\npending = ", dump( $pending ); + open(my $d, $d_opt, $d_path ) || confess "can't open $d_path: $!"; my $buff; while( read( $s, $buff, 65535 ) ) { - print $d $buff || die "can't write into $d_path: $!"; - warn ">> ", length($buff), " bytes, offset ", tell($s), " -> ", tell($d), "\n" if $debug; + print $d $buff || confess "can't write into $d_path: $!"; + warn ">> [", length($buff), "] offset ", tell($s), " -> ", tell($d), "\n" if $debug; } close($d) || warn "can't close $d_path: $!"; close($s) || warn "can't close $s_path: $!"; - warn "-- $s_path [", -s $s_path, "]\n >>> $d_path [", -s $d_path, "]\n" if $debug; + warn "-- $s_path [", -s $s_path, "] >>> $d_path [", -s $d_path, "]\n" if $debug; + my ($mode,$uid,$gid,$atime,$mtime) = (stat $s_path)[2,4,5,8,9]; + + chmod $mode, $d_path || warn "chmod( $mode $d_path ) failed: $!\n"; + chown $uid,$gid,$d_path || warn "chown( $uid $gid $d_path ) failed: $!\n"; + utime $atime,$mtime,$d_path || warn "utime( $atime $mtime $d_path ) failed: $!\n"; + + undef $d; + undef $s; } -sub x_open { - my ($file) = shift; - my ($mode) = shift; - $pending->{$file}->{open}++; - my $fh; - if ( $pending->{$file}->{open} == 1 ) { - warn "# open( $file, $mode )\n"; - my $path = fixup($file); +sub tmp_path { + my $file = shift; + + my $path = fixup( $file ); + + my $op = 'UNKNOWN'; + + if (defined( $pending->{$file} )) { + $path = $pending->{$file}->{path} || confess "no path for $file in ",dump( $pending ); + $op = 'opened'; + } else { my $tmp = $mount->{tmp} . '/' . $file; if ( -e $tmp ) { $path = $tmp; + $op = 'existing'; } elsif ( $path =~ m/\.gz$/ ) { my $dest_path = $tmp; $dest_path =~ s!/[^/]+$!!; #!vim-fix mkpath $dest_path unless -e $dest_path; - file_copy( '<:gzip', $path, '>', $tmp ); + if ( -s $path ) { + file_copy( '<:gzip', $path, '>', $tmp ) + } else { + confess "ASSERT: filesystem corruption, $path is zero size in ",dump( $pending ); + } $path = $tmp; + $op = 'created'; } - return -$! unless sysopen($fh , $path, $mode); - $pending->{$file}->{fh} = $fh; + confess "ASSERT: path shouldn't exist for $file in ", dump( $pending ) if defined( $pending->{$file}->{path} ); + confess "ASSERT: open shouldn't exist for $file in ", dump( $pending ) if defined( $pending->{$file}->{open} ); $pending->{$file}->{path} = $path; - } elsif ( ! defined( $pending->{$file}->{fh} ) ) { - die "can't find fh for $file ", dump($pending); + $pending->{$file}->{open} = 0; # not really opened, just uncompressed + warn "## tmp_file( $file ) $op $path [", -s $path, "]\n" if $debug; } - return 0; + return $path; +} + +sub compress_file2path { + my ( $file, $path ) = @_; + + my $dest = fixup( $file ); + + if ( defined($pending->{$file}) ) { + my $pending_path = $pending->{$file}->{path} || confess "no path for $file in ",dump( $pending ); + + if ( $pending->{$file}->{open} > 1 ) { + warn "$file used ", $pending->{$file}->{open}, " times, delaying compression\n"; + return; + } elsif ( ! $path ) { + $path = $pending_path; + } elsif ( $pending_path ne $path ) { + confess "ASSERT: compressing into $path instead of $pending_path"; + } + } + + confess "need path" unless $path; + + # cleanup old compressed copy + if ( $dest =~ /\.gz$/ ) { + warn "## remove old $dest\n"; + unlink $dest || confess "can't remove $dest: $!"; + $dest =~ s/\.gz$//; + confess "ASSERT: uncompressed $dest shouldn't exist!" if -e $dest; + } + + if ( $path =~ $skip_extensions_regex ) { + warn "$path [",-s $path,"] skipped compression\n"; + file_copy( '<', $path, '>', $dest ) if ( $path ne $dest ); + } elsif ( -s $path < $min_compress_size ) { + warn "$path [",-s $path,"] uncompressed, too small\n"; + file_copy( '<', $path, '>', $dest ) if ( $path ne $dest ); + } else { + warn "$path [",-s $path,"] compressing\n"; + + my $comp = $dest . '.gz'; + file_copy( '<', $path, '>:gzip', $comp ); + + my ( $size_path, $size_comp ) = ( -s $path, -s $comp ); + + if ( $size_path <= $size_comp ) { + warn ">>> $size_path <= $size_comp leaving uncompressed $dest\n"; + unlink $comp || confess "can't remove: $comp: $!"; + file_copy( '<', $path, '>', $dest ) if ( $path ne $dest ); + } else { + warn ">>> compressed $size_path -> $size_comp ",int(($size_comp * 100) / $size_path),"% $comp\n"; + + # FIXME add timeout to remove uncompressed version? + unlink $path || confess "can't remove $path: $!"; + + if ( -e $dest ) { + warn "## cleanup uncompressed $dest\n" if $debug; + unlink $dest || confess "can't remove $dest: $!"; + } + } + + } +} + +sub x_open { + my ($file) = shift; + my ($mode) = shift; + + if ( $file eq '/.debug' ) { + my $path = $mount->{from} . '/.debug'; + open( my $debug, '>', $path ) || die "can't open $path: $!"; + my $dump = dump( $pending ); + print $debug "pending = $dump\n"; + close($debug); + $pending->{'/.debug'}->{path} = $path; + warn "## created dump $path $dump\n"; + return 0; + } + + my $mode_desc = { + rdonly => $mode && O_RDONLY, + rdwr => $mode && O_RDWR, + append => $mode && O_APPEND, + create => $mode && O_CREAT, + trunc => $mode && O_TRUNC, + }; + + my $path = tmp_path( $file ); + + warn "## open( $file, $mode ) pending: ", $pending->{$file}->{open}, " mode $mode: ", dump( $mode_desc )," $path [", -s $path, "]\n" if $debug; + + my $fh; + my $rv = 0; + + if ( ! -w $path ) { + my $old_mode = (stat $path)[2]; + my $new_mode = $old_mode | 0600; + chmod $new_mode, $path || confess "can't chmod $new_mode $path"; + warn "### modify mode $old_mode -> $new_mode for $path\n"; + $pending->{$file}->{mode} = $old_mode; + } + + if ( sysopen($fh , $path, $mode) ) { + close($fh) || confess "can't close $path: $!"; + warn "<<< sysopen $path [", -e $path ? -s $path : 'new' , "]\n"; + $pending->{$file}->{open}++; + } else { + warn "ERROR: can't open $path -- $!"; + $rv = -$!; + } + + return $rv; + } sub x_read { my ($file,$bufsize,$off) = @_; my ($rv) = -ENOSYS(); my $path = fixup( $file ); + + confess "no pending file $file ", dump( $pending ) unless defined( $pending->{$file} ); + return -ENOENT() unless -e $path; - my ($fsize) = -s $path; - my $fh = $pending->{$file}->{fh} || die "no fh? ", dump( $pending ); + + my $fh = new IO::File; + return -ENOSYS() unless open($fh,$pending->{$file}->{path}); + if(seek($fh,$off,SEEK_SET)) { read($fh,$rv,$bufsize); + $pending->{$file}->{read} += length($rv) if $stats; } + return $rv; } sub x_write { my ($file,$buf,$off) = @_; - $pending->{$file}->{write}++; - my ($rv); + + my $rv; my $path = fixup($file); + + confess "no pending file $file ", dump( $pending ) unless defined( $pending->{$file} ); + return -ENOENT() unless -e $path; - my ($fsize) = -s $path; - my $fh = $pending->{$file}->{fh}; - return -ENOSYS() unless $fh; + + $path = $pending->{$file}->{path} || confess "no path for $file in ", dump( $pending ); + confess "write into non-existant $path for $file" unless -e $path; + + my $fh = new IO::File; + return -ENOSYS() unless open($fh,'+<',$path); if($rv = seek( $fh ,$off,SEEK_SET)) { $rv = print( $fh $buf ); + my $size = length($buf); + warn "## write $path offset $off [$size]\n" if $debug; + $pending->{$file}->{write} += $size; } $rv = -ENOSYS() unless $rv; + close($fh) || warn "can't close $path: $!"; return length($buf); } sub err { return (-shift || -$!) } -sub x_readlink { return readlink(fixup(shift)); } -sub x_unlink { return unlink(fixup(shift)) ? 0 : -$!; } +sub x_readlink { return readlink(fixup(shift)); } + +sub x_unlink { + my $file = shift; + my $path = fixup( $file ); + + if ( $file =~ m#\Q/.fuse_hidden\E# ) { + return unlink $path ? 0 : -$1; + } + + warn "# unlink( $file )\n"; + + unlink $path || return 0; + + my $tmp = $mount->{tmp} . '/' . $file; + unlink $tmp if ( -e $tmp ); + + delete( $pending->{$file} ); + return 0; +} + +sub x_symlink { + my ($from,$to) = @_; + + my $from_path = $from; #fixup( $from ); + my $to_path = fixup( $to ); + + my $rv = symlink( $from_path, $to_path ) ? 0 : -$!; + warn "# symlink( $from_path -> $to_path ) = $rv\n" if $debug; + + my $tmp = $mount->{tmp} . '/' . $from; + my $tmp_to = $mount->{tmp} . '/' . $to; + if ( $rv == 0 && -e $tmp_to ) { + symlink( $tmp, $tmp_to ) || confess "can't symlink $tmp -> $tmp_to: $!"; + } + return $rv; +} + +sub x_link { + my ($from,$to) = @_; + + my $from_path = fixup($from); + my $to_path = fixup($to); + $to_path .= '.gz' if ( $from_path =~ m/\.gz$/ && $to_path !~ m/\.gz$/ ); -sub x_symlink { return symlink(shift,fixup(shift)) ? 0 : -$!; } + my $rv = link( $from_path, $to_path ) ? 0 : -$!; + + warn "# link( $from_path -> $to_path ) = $rv\n" if $debug; + + return $rv; +} sub x_rename { - my ($old) = fixup(shift); - my ($new) = fixup(shift); - my ($err) = rename($old,$new) ? 0 : -ENOENT(); + my ($old,$new) = @_; + my $old_path = fixup($old); + my $new_path = fixup($new); + $new_path .= '.gz' if ( $old_path =~ m/\.gz$/ && $new_path !~ m/\.gz$/ ); + + my $err = rename($old_path,$new_path) ? 0 : -ENOENT(); + warn "## rename( $old_path => $new_path ) = $err\n"; + + my $tmp = $mount->{tmp} . '/' . $old; + if ( -e $tmp ) { + if ( $new =~ m#\Q/.fuse_hidden\E# ) { + unlink $tmp || confess "can't unlink $tmp for $new"; + } else { + my $new_tmp = $mount->{tmp} . '/' . $new; + rename $tmp, $new_tmp || confess "can't rename $tmp -> $new_tmp : $!"; + } + } + + if (defined( $pending->{$old} )) { + $pending->{$new} = $pending->{$old}; + + my $path = $pending->{$old}->{path}; + $path =~ s/\Q$old\E/$new/; + $pending->{$new}->{path} = $path; + + delete( $pending->{$old} ); + warn "## tweaking pending to ", dump( $pending ) if $debug; + } + return $err; } -sub x_link { return link(fixup(shift),fixup(shift)) ? 0 : -$! } sub x_chown { - my ($path) = fixup(shift); + my ($file,$uid,$gid) = @_; + my $path = fixup($file); print "nonexistent $path\n" unless -e $path; - my ($uid,$gid) = @_; # perl's chown() does not chown symlinks, it chowns the symlink's # target. it fails when the link's target doesn't exist, because # the stat64() syscall fails. # this causes error messages when unpacking symlinks in tarballs. my ($err) = syscall(&SYS_lchown,$path,$uid,$gid,$path) ? -$! : 0; + + my $tmp = $mount->{tmp} . '/' . $file; + syscall(&SYS_lchown,$file,$uid,$gid,$path) if -e $tmp; + return $err; } @@ -188,7 +435,19 @@ return $err; } -sub x_truncate { return truncate(fixup(shift),shift) ? 0 : -$! ; } +sub x_truncate { + my ( $file,$size ) = @_; + + #confess "no pending file $file to truncate in ", dump( $pending ) unless defined( $pending->{$file} ); + + my $path = tmp_path( $file ); + my $rv = truncate( $path, $size ) ? 0 : -$! ; + warn "## truncate( $file $size ) $path [", -s $path, "] = $rv\n" if $debug; + compress_file2path( $file, $path ); + + return $rv; +} + sub x_utime { return utime($_[1],$_[2],fixup($_[0])) ? 0:-$!; } sub x_mkdir { my ($name, $perm) = @_; return 0 if mkdir(fixup($name),$perm); return -$!; } @@ -206,38 +465,33 @@ sub x_release { my ( $file, $mode ) = @_; + if ( ! defined( $pending->{$file} ) ) { warn "release $file, NO PENDING DATA\n"; return 0; } elsif ( ! defined( $pending->{$file}->{write} ) ) { warn "release $file, not written into\n"; - } elsif ( defined( $pending->{$file}->{open} ) && $pending->{$file}->{open} == 1 ) { - close( $pending->{$file}->{fh} ) || warn "can't close $file: $!"; - if ( $file =~ $skip_extensions_regex ) { - warn "release $file $mode -- uncompressed\n"; - } else { - warn "release $file $mode -- compressing\n"; + } elsif ( $file =~ m#\Q/.fuse_hidden\E# ) { + warn "release internal $file\n" if $debug; + } else { + compress_file2path( $file ); + } - my $path = $pending->{$file}->{path} || die "no path for $file ? ", dump( $pending ); - my $dest = fixup( $file ); + $pending->{$file}->{open}--; + if ( $pending->{$file}->{open} == 0 ) { - if ( $dest =~ /\.gz$/ ) { - warn "## remove old $dest\n"; - unlink $dest || die "can't remove $dest: $!"; - $dest =~ s/\.gz$//; - } + my $path = fixup( $file ); - file_copy( '<', $path, '>:gzip', $dest . '.gz' ); + if ( my $old_mode = $pending->{$file}->{mode} ) { + chmod $old_mode, $path || confess "can't chmod $old_mode $path"; + warn "### restored mode $old_mode $path\n"; - # FIXME add timeout to remove uncompressed version? - unlink $path || warn "can't remove $path: $!"; } - } else { - warn "release $file, but still used ", $pending->{$file}->{open} , " times, delaying compression\n"; - $pending->{$file}->{open}--; - return 0; + + warn "## cleanup pending $file [", -s $path, "]\n" if $debug; + delete( $pending->{$file} ); } - delete( $pending->{$file} ); + return 0; } @@ -265,5 +519,5 @@ statfs =>"main::x_statfs", release =>"main::x_release", # threaded=>1, -# debug => 1, + debug => $fuse_debug, );