--- trunk/PlusPlus.pm 2004/12/05 14:35:54 13 +++ trunk/PlusPlus.pm 2004/12/05 21:06:48 16 @@ -4,10 +4,11 @@ use strict; use warnings; -our $VERSION = '0.06'; +our $VERSION = '0.10'; use Carp; use File::Temp qw/ tempdir /; +use BerkeleyDB; #use YAML; =head1 NAME @@ -92,8 +93,11 @@ my $index_dir = $self->{'index_dir'}; + my $cwd; + chomp($cwd = `pwd`); + $self->{'cwd'} = $cwd || carp "can't get cwd!"; + if ($index_dir !~ m#^/#) { - chomp(my $cwd = `pwd`); $index_dir = "$cwd/$index_dir"; print STDERR "## full path to index_dir: $index_dir\n" if ($self->{'debug'}); $self->{'index_dir'} = $index_dir; @@ -143,6 +147,7 @@ confess $self->{'search'}," binary is not SWISH++" unless ($s =~ m/^SWISH\+\+/); if ($i eq $s) { + $i =~ s/^SWISH\+\+\s+// || confess "can't strip SWISH++ from version"; $self->{'version'} = $i; return 1; } else { @@ -217,22 +222,28 @@ my $query = shift || return; - $self->_close_index; + $self->finish_update; + $self->_tie_meta_db(DB_RDONLY); my @results; # escape double quotes in query for shell $query =~ s/"/\\"/g; - my $open_cmd = $self->{'search'}." -i ".$self->{'index_dir'}.'/index "'.$query.'" |'; - print STDERR "## search $open_cmd\n" if ($self->{'debug'}); + my $open_cmd = $self->{'search'} . + ' -i ' . $self->{'index_dir'}.'/index' . + ' "' . $query . '"'. + ' |'; + print STDERR "## search: $open_cmd\n" if ($self->{'debug'}); open(SEARCH, $open_cmd) || confess "can't start $open_cmd: $!"; - while() { - next if (/^#/); - chomp; - print STDERR "## $_\n" if ($self->{'debug'}); - my ($rank,$path,$size,$title) = split(/ /,$_,4); + my $l; + while($l = ) { + next if ($l =~ /^#/); + chomp($l); + print STDERR "## $l\n" if ($self->{'debug'}); + my ($rank,$path,$size,$title) = split(/ /,$l,4); + $path =~ s#^\./##; # strip from path push @results, { rank => $rank, path => $path, @@ -248,6 +259,30 @@ return @results; } +=head2 property + +Return stored meta property from result or result path. + + print $i->property('path', 'title'); + print $i->property($res->{'path'}, 'title'); + +=cut + +sub property { + my $self = shift; + + my ($path,$meta) = @_; + + if ($path =~ m/^HASH/) { + $path = $path->{'path'} || confess "can't find path in input data"; + } + + my $val = $self->{'meta_db'}->{"$path-$meta"}; + + print STDERR "## property $path-$meta: ",($val || 'undef'),"\n" if ($self->{'debug'}); + return $val; +} + =head2 finish_update This method will close index. @@ -261,7 +296,9 @@ sub finish_update { my $self = shift; - $self->_close_index; + print STDERR "## finish_update\n" if ($self->{'debug'}); + + $self->_close_index && $self->_untie_meta_db; } sub DESTROY { @@ -289,9 +326,14 @@ sub _init_indexer { my $self = shift; - $self->{'tmp_dir'} = tempdir( CLEANUP => 1 ) || confess "can't create temporary directory: $!"; + return if ($self->{'_index_fh'}); + + my $tmp_dir = tempdir( CLEANUP => 1 ) || confess "can't create temporary directory: $!"; + $self->{'tmp_dir'} = $tmp_dir; + + chdir $tmp_dir || confess "can't chdir to ".$tmp_dir.": $!"; - chdir $self->{'tmp_dir'} || confess "can't chdir to ".$self->{'tmp_dir'}.": $!"; + print STDERR "## tmp_dir: $tmp_dir" if ($self->{'debug'}); my $opt = "-v " . ($self->{'debug'} || '0'); @@ -302,14 +344,73 @@ $opt .= " -s _stopwords_"; } - my $open_cmd = '| '.$self->{'index'}.' '.$opt.' -e "html:*" -i '.$self->{'index_dir'}.'/index -'; + my $index_dir = $self->{'index_dir'} || confess "no index_dir?"; - print STDERR "## open index $open_cmd\n" if ($self->{'index'}); + my $open_cmd = '| '.$self->{'index'}.' '.$opt.' -e "html:*" -i '.$index_dir.'/index -'; - open($self->{'index_fh'}, $open_cmd) || confess "can't start index with $open_cmd: $!"; + print STDERR "## init_indexer: $open_cmd\n" if ($self->{'debug'}); + open($self->{'_index_fh'}, $open_cmd) || confess "can't start index with $open_cmd: $!"; - return $self->{'index_fh'}; + chdir $self->{'cwd'} || confess "can't chdir to ".$self->{'cwd'}.": $!"; + + $self->_tie_meta_db(DB_CREATE); + + return $self->{'_index_fh'}; +} + +=head2 _tie_meta_db + +Open BerkeleyDB database with meta properties. + + $i->_tie_meta_db(DB_CREATE); + $i->_tie_meta_db(DB_RDONLY); + +} + +=cut + +sub _tie_meta_db { + my $self = shift; + + my $flags = shift || confess "need DB_CREATE or DB_RDONLY"; + + return if ($self->{'_meta_db_flags'} && $self->{'_meta_db_flags'} == $flags); + + print STDERR "## _tie_meta_db($flags)\n" if ($self->{'debug'}); + + $self->_untie_meta_db; + $self->{'_meta_db_flags'} = $flags; + + my $file = $self->{'index_dir'}.'/meta.db'; + + tie %{$self->{'meta_db'}}, "BerkeleyDB::Hash", + -Filename => $file, + -Flags => $flags + or confess "cannot open $file: $! $BerkeleyDB::Error\n" ; + + return 1; +} + +=head2 _untie_meta_db + +Close BerkeleyDB database with meta properties. + + $i->_untie_meta_db + +=cut + +sub _untie_meta_db { + my $self = shift; + + return unless ($self->{'meta_db'}); + + print STDERR "## _untie_meta_db\n" if ($self->{'debug'}); + untie %{$self->{'meta_db'}} || confess "can't untie!"; + undef $self->{'meta_db'}; + undef $self->{'_meta_db_flags'}; + + return 1; } =head2 _create_doc @@ -336,11 +437,15 @@ my $arg = {@_}; # open indexer if needed - $self->{'index_fh'} ||= $self->_init_indexer; + $self->_init_indexer; my $path = $self->{'tmp_dir'} || confess "no tmp_dir?"; + my $id = $arg->{'path'} || confess "no path?"; + $path .= "/$id"; - open(TMP, '>', $arg->{'path'}) || die "can't create temp file ".$arg->{'path'}.": $!"; + print STDERR "## _create_doc: $path\n" if ($self->{'debug'}); + + open(TMP, '>', $path) || die "can't create temp file $path: $!"; print TMP ''; @@ -351,19 +456,22 @@ my $content = $arg->{'meta'}->{$name}; print TMP qq{}; $arg->{'body'} .= " $content" if ($self->{'meta_in_body'}); + $self->{'meta_db'}->{"$id-$name"} = $content; } } - if (defined($arg->{'title'})) { - print TMP '' . ($arg->{'title'} || '') . ''; - $arg->{'body'} .= " ".$arg->{'title'} if ($self->{'meta_in_body'}); + my $title = $arg->{'title'}; + if (defined($title)) { + print TMP "$title"; + $arg->{'body'} .= " $title" if ($self->{'meta_in_body'}); + $self->{'meta_db'}->{"$id-title"} = $title; } print TMP '' . $arg->{'body'} . ''; close(TMP) || confess "can't close tmp file ".$arg->{'path'}.": $!"; - print { $self->{'index_fh'} } $arg->{'path'}."\n"; + print { $self->{'_index_fh'} } "$id\n"; } =head2 _close_index @@ -379,12 +487,14 @@ sub _close_index { my $self = shift; - return unless ($self->{'index_fh'}); + return unless ($self->{'_index_fh'}); print STDERR "## close index\n" if ($self->{'debug'}); - close($self->{'index_fh'}); - undef $self->{'index_fh'}; + close($self->{'_index_fh'}) || confess "can't close index: $!"; + undef $self->{'_index_fh'}; + + return 1; } 1;