/[SWISH-PlusPlus]/trunk/PlusPlus.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/PlusPlus.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3 by dpavlin, Fri Dec 3 15:23:23 2004 UTC revision 5 by dpavlin, Fri Dec 3 21:48:15 2004 UTC
# Line 7  use warnings; Line 7  use warnings;
7  our $VERSION = '0.02';  our $VERSION = '0.02';
8    
9  use Carp;  use Carp;
10    use File::Temp qw/ tempdir /;
11    
12  =head1 NAME  =head1 NAME
13    
# Line 121  sub check_bin { Line 122  sub check_bin {
122    
123  }  }
124    
125    =head2 index_document
126    
127    Quick way to add simple data to index.
128    
129      $i->index_document($key, $data);
130      $i->index_document( 42 => 'meaning of life' );
131    
132    =cut
133    
134    sub index_document {
135            my $self = shift;
136    
137            my %doc = @_;
138    
139            foreach my $id (keys %doc) {
140                    $self->_create_doc(
141                            path => $id,
142                            body => $doc{$id},
143                    );
144            }
145    
146            return 1;
147    }
148    
149    =head1 PRIVATE METHODS
150    
151    Private methods implement internals for creating temporary file needed for
152    swish++. You should have no need to call them directly, and they are here
153    just to have documentation.
154    
155    =head2 _init_index
156    
157    Create temporary directory in which files for indexing will be created and
158    start index process.
159    
160      my $i->_init_index || die "can't start indexer";
161    
162    =cut
163    
164    sub _init_index {
165            my $self = shift;
166    
167            $self->{'tmp_dir'} = tempdir( CLEANUP => 1 ) || confess "can't create temporary directory: $!";
168    
169            my $opt = "-v 4";
170    
171            my $open_cmd = '| index '.$opt.' -e "html:*" -i '.$self->{'index_dir'}.'/index -';
172    
173            chdir $self->{'tmp_dir'} || confess "can't chdir to ".$self->{'tmp_dir'}.": $!";
174    
175            CORE::open($self->{'index_fh'}, $open_cmd) || confess "can't start index with $open_cmd: $!";
176    
177            return $self->{'index_fh'};
178    }
179    
180    =head2 _create_doc
181    
182    Create temporary file and pass it's name to swish++
183    
184      $i->_create_doc(
185            path => 'path/to/store/in/index',
186            body => 'data to story in body tag',
187            meta => {
188                    'meta name' => 'data for this meta',
189                    'another' => 'again more data',
190            }
191      );
192    
193    =cut
194    
195    sub _create_doc {
196            my $self = shift;
197    
198            my $arg = {@_};
199    
200            # open indexer if needed
201            $self->{'index_fh'} ||= $self->_init_index;
202    
203            my $path = $self->{'tmp_dir'} || confess "no tmp_dir?";
204    
205            CORE::open(TMP, '>', $arg->{'path'}) || die "can't create temp file ".$arg->{'path'}.": $!";
206    
207            print TMP '<html>';
208    
209            if ($arg->{'meta'}) {
210                    confess "not yet implemented";
211            }
212            
213            print TMP '<body>' . ($arg->{'body'} || '') . '</body></html>';
214            
215            close(TMP) || confess "can't close tmp file ".$arg->{'path'}.": $!";
216    
217            print { $self->{'index_fh'} } $arg->{'path'}."\n";
218    }
219    
220  1;  1;
221  __END__  __END__
222    
# Line 138  uses executable names B<index> and B<sea Line 234  uses executable names B<index> and B<sea
234  instead of one from Debian package. See L<open> how to specify Debian  instead of one from Debian package. See L<open> how to specify Debian
235  default binaries B<index++> and B<search++>.  default binaries B<index++> and B<search++>.
236    
237  =head1 SEE ALSO  =head2 SWISH++
238    
239  Mention other useful documentation such as the documentation of  Aside from very good rewrite in C++, SWISH++ is fatster because it has
240  related modules or operating system documentation (such as man pages  claver heuristics about which data in input files are words to index and
241  in UNIX), or any relevant external documentation such as RFCs or  which are not. It's based on English language and might be best choice if
242  standards.  you plan to install large amount of long text documents.
243    
244    However, if you plan to index all data from structured storage (e.g. RDBMS)
245    you might want B<all> words from data to end up in index as opposed to just
246    those which look like English words. This is especially important if you
247    don't plan to index English texts with this module.
248    
249    With distribution build versions of SWISH++ you might have problems with
250    disepearing words. To overcome this problem, you will have to compile and
251    configure SWISH++ yourself (because language characteristics are
252    compilation-time option).
253    
254    Compilation of SWISH++ is easy process well described on project's web
255    pages. To see my very relaxed sample configuration take a look at C<swish++>
256    directory included in distribution.
257    
258  If you have a mailing list set up for your module, mention it here.  =head1 SEE ALSO
259    
260  If you have a web site set up for your module, mention it here.  C<swish++> web site L<http://homepage.mac.com/pauljlucas/software/swish/>
261    
262  =head1 AUTHOR  =head1 AUTHOR
263    
264  Dobrica Pavlinusic, E<lt>dpavlin@E<gt>  Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
265    
266  =head1 COPYRIGHT AND LICENSE  =head1 COPYRIGHT AND LICENSE
267    

Legend:
Removed from v.3  
changed lines
  Added in v.5

  ViewVC Help
Powered by ViewVC 1.1.26