/[SWISH-PlusPlus]/trunk/PlusPlus.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/PlusPlus.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 8 by dpavlin, Sat Dec 4 17:49:20 2004 UTC revision 9 by dpavlin, Sun Dec 5 00:59:50 2004 UTC
# Line 4  use 5.008004; Line 4  use 5.008004;
4  use strict;  use strict;
5  use warnings;  use warnings;
6    
7  our $VERSION = '0.02';  our $VERSION = '0.03';
8    
9  use Carp;  use Carp;
10  use File::Temp qw/ tempdir /;  use File::Temp qw/ tempdir /;
# Line 40  Create new indexing object. Line 40  Create new indexing object.
40          index => 'index++',          index => 'index++',
41          search => 'search++',          search => 'search++',
42          debug => 1,          debug => 1,
43            meta_in_body => 1,
44            use_stopwords => 1,
45    );    );
46    
47  Options to open are following:  Options to open are following:
# Line 65  Full or partial path to SWISH++ search e Line 67  Full or partial path to SWISH++ search e
67  This option (off by default) will produce a lot of debugging output on  This option (off by default) will produce a lot of debugging output on
68  C<STDERR> prefixed by C<##>.  C<STDERR> prefixed by C<##>.
69    
70    =item C<meta_in_body>
71    
72    This option (off by default) enables to search content of meta fields
73    without specifing them (like they are in body of document). This will
74    somewhat increate index size.
75    
76    =item C<use_stopwords>
77    
78    Use built-in SWISH++ stop words. By default, they are disabled.
79    
80  =back  =back
81    
82  =cut  =cut
# Line 155  sub index_document { Line 167  sub index_document {
167          return 1;          return 1;
168  }  }
169    
170    =head2 add
171    
172    Add document with metadata to index.
173    
174      $i->add(
175            path => 'path/to/document',
176            title => 'this is result title',
177            meta => {
178                    description => 'this is description meta tag',
179                    date => '2004-11-04',
180                    author => 'Dobrica Pavlinusic',
181            }
182            body => 'this is text without meta data',
183      );
184    
185    This is thin wrapper round L<_create_doc>.
186    
187    =cut
188    
189    sub add {
190            my $self = shift;
191    
192            $self->_create_doc(@_);
193    
194            return 1;
195    }
196  =head2 search  =head2 search
197    
198  Search your index.  Search your index.
# Line 207  Private methods implement internals for Line 245  Private methods implement internals for
245  swish++. You should have no need to call them directly, and they are here  swish++. You should have no need to call them directly, and they are here
246  just to have documentation.  just to have documentation.
247    
248  =head2 _init_index  =head2 _init_indexer
249    
250  Create temporary directory in which files for indexing will be created and  Create temporary directory in which files for indexing will be created and
251  start index process.  start index process.
252    
253    my $i->_init_index || die "can't start indexer";    my $i->_init_indexer || die "can't start indexer";
254    
255    It will also create empty file C<_stopwords_> to disable stop words.
256    
257  =cut  =cut
258    
259  sub _init_index {  sub _init_indexer {
260          my $self = shift;          my $self = shift;
261    
262          $self->{'tmp_dir'} = tempdir( CLEANUP => 1 ) || confess "can't create temporary directory: $!";          $self->{'tmp_dir'} = tempdir( CLEANUP => 1 ) || confess "can't create temporary directory: $!";
263    
264            chdir $self->{'tmp_dir'} || confess "can't chdir to ".$self->{'tmp_dir'}.": $!";
265    
266          my $opt = "-v 4";          my $opt = "-v 4";
267    
268            unless ($self->{'use_stopwrods'}) {
269                    CORE::open(STOP, '>', "_stopwords_") || carp "can't create empty stopword file, skipping\n";
270                    print STOP "  ";
271                    close(STOP);
272                    $opt .= " -s _stopwords_";
273            }
274    
275          my $open_cmd = '| '.$self->{'index'}.' '.$opt.' -e "html:*" -i '.$self->{'index_dir'}.'/index -';          my $open_cmd = '| '.$self->{'index'}.' '.$opt.' -e "html:*" -i '.$self->{'index_dir'}.'/index -';
276    
         chdir $self->{'tmp_dir'} || confess "can't chdir to ".$self->{'tmp_dir'}.": $!";  
277    
278          CORE::open($self->{'index_fh'}, $open_cmd) || confess "can't start index with $open_cmd: $!";          CORE::open($self->{'index_fh'}, $open_cmd) || confess "can't start index with $open_cmd: $!";
279    
280    
281          return $self->{'index_fh'};          return $self->{'index_fh'};
282  }  }
283    
# Line 238  Create temporary file and pass it's name Line 287  Create temporary file and pass it's name
287    
288    $i->_create_doc(    $i->_create_doc(
289          path => 'path/to/store/in/index',          path => 'path/to/store/in/index',
290            title => 'this is title in results',
291          body => 'data to story in body tag',          body => 'data to story in body tag',
292          meta => {          meta => {
293                  'meta name' => 'data for this meta',                  'meta name' => 'data for this meta',
# Line 255  sub _create_doc { Line 305  sub _create_doc {
305          my $arg = {@_};          my $arg = {@_};
306    
307          # open indexer if needed          # open indexer if needed
308          $self->{'index_fh'} ||= $self->_init_index;          $self->{'index_fh'} ||= $self->_init_indexer;
309    
310          my $path = $self->{'tmp_dir'} || confess "no tmp_dir?";          my $path = $self->{'tmp_dir'} || confess "no tmp_dir?";
311    
312          CORE::open(TMP, '>', $arg->{'path'}) || die "can't create temp file ".$arg->{'path'}.": $!";          CORE::open(TMP, '>', $arg->{'path'}) || die "can't create temp file ".$arg->{'path'}.": $!";
313    
314          print TMP '<html>';          print TMP '<html><head>';
315    
316            $arg->{'body'} ||= '';
317    
318          if ($arg->{'meta'}) {          if ($arg->{'meta'}) {
319                  confess "not yet implemented";                  confess "not yet implemented";
320          }          }
321            
322          print TMP '<body>' . ($arg->{'body'} || '') . '</body></html>';          if (defined($arg->{'title'})) {
323                    print TMP '<title>' . ($arg->{'title'} || '') . '</title>';
324                    $arg->{'body'} .= " ".$arg->{'title'} if ($self->{'meta_in_body'});
325            }
326    
327            print TMP '</head><body>' . $arg->{'body'} . '</body></html>';
328                    
329          close(TMP) || confess "can't close tmp file ".$arg->{'path'}.": $!";          close(TMP) || confess "can't close tmp file ".$arg->{'path'}.": $!";
330    

Legend:
Removed from v.8  
changed lines
  Added in v.9

  ViewVC Help
Powered by ViewVC 1.1.26