--- trunk/PlusPlus.pm 2004/12/04 17:49:20 8 +++ trunk/PlusPlus.pm 2004/12/05 00:59:50 9 @@ -4,7 +4,7 @@ use strict; use warnings; -our $VERSION = '0.02'; +our $VERSION = '0.03'; use Carp; use File::Temp qw/ tempdir /; @@ -40,6 +40,8 @@ index => 'index++', search => 'search++', debug => 1, + meta_in_body => 1, + use_stopwords => 1, ); Options to open are following: @@ -65,6 +67,16 @@ This option (off by default) will produce a lot of debugging output on C prefixed by C<##>. +=item C + +This option (off by default) enables to search content of meta fields +without specifing them (like they are in body of document). This will +somewhat increate index size. + +=item C + +Use built-in SWISH++ stop words. By default, they are disabled. + =back =cut @@ -155,6 +167,32 @@ return 1; } +=head2 add + +Add document with metadata to index. + + $i->add( + path => 'path/to/document', + title => 'this is result title', + meta => { + description => 'this is description meta tag', + date => '2004-11-04', + author => 'Dobrica Pavlinusic', + } + body => 'this is text without meta data', + ); + +This is thin wrapper round L<_create_doc>. + +=cut + +sub add { + my $self = shift; + + $self->_create_doc(@_); + + return 1; +} =head2 search Search your index. @@ -207,28 +245,39 @@ swish++. You should have no need to call them directly, and they are here just to have documentation. -=head2 _init_index +=head2 _init_indexer Create temporary directory in which files for indexing will be created and start index process. - my $i->_init_index || die "can't start indexer"; + my $i->_init_indexer || die "can't start indexer"; + +It will also create empty file C<_stopwords_> to disable stop words. =cut -sub _init_index { +sub _init_indexer { my $self = shift; $self->{'tmp_dir'} = tempdir( CLEANUP => 1 ) || confess "can't create temporary directory: $!"; + chdir $self->{'tmp_dir'} || confess "can't chdir to ".$self->{'tmp_dir'}.": $!"; + my $opt = "-v 4"; + unless ($self->{'use_stopwrods'}) { + CORE::open(STOP, '>', "_stopwords_") || carp "can't create empty stopword file, skipping\n"; + print STOP " "; + close(STOP); + $opt .= " -s _stopwords_"; + } + my $open_cmd = '| '.$self->{'index'}.' '.$opt.' -e "html:*" -i '.$self->{'index_dir'}.'/index -'; - chdir $self->{'tmp_dir'} || confess "can't chdir to ".$self->{'tmp_dir'}.": $!"; CORE::open($self->{'index_fh'}, $open_cmd) || confess "can't start index with $open_cmd: $!"; + return $self->{'index_fh'}; } @@ -238,6 +287,7 @@ $i->_create_doc( path => 'path/to/store/in/index', + title => 'this is title in results', body => 'data to story in body tag', meta => { 'meta name' => 'data for this meta', @@ -255,19 +305,26 @@ my $arg = {@_}; # open indexer if needed - $self->{'index_fh'} ||= $self->_init_index; + $self->{'index_fh'} ||= $self->_init_indexer; my $path = $self->{'tmp_dir'} || confess "no tmp_dir?"; CORE::open(TMP, '>', $arg->{'path'}) || die "can't create temp file ".$arg->{'path'}.": $!"; - print TMP ''; + print TMP ''; + + $arg->{'body'} ||= ''; if ($arg->{'meta'}) { confess "not yet implemented"; } - - print TMP '' . ($arg->{'body'} || '') . ''; + + if (defined($arg->{'title'})) { + print TMP '' . ($arg->{'title'} || '') . ''; + $arg->{'body'} .= " ".$arg->{'title'} if ($self->{'meta_in_body'}); + } + + print TMP '' . $arg->{'body'} . ''; close(TMP) || confess "can't close tmp file ".$arg->{'path'}.": $!";