/[webpac]/trunk2/lib/WebPAC/jsFind.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk2/lib/WebPAC/jsFind.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 491 - (hide annotations)
Sat Oct 9 21:47:42 2004 UTC (16 years, 7 months ago) by dpavlin
File size: 3970 byte(s)
lowercase terms before insertion into jsFind index, refactored code a bit

1 dpavlin 390 package WebPAC::jsFind;
2    
3     use warnings;
4     use strict;
5    
6     use Carp;
7 dpavlin 406 use jsFind 0.04;
8 dpavlin 390 use Log::Log4perl qw(get_logger :levels);
9    
10     =head1 NAME
11    
12     WebPAC::jsFind - create jsFind index instead of swish-e
13    
14     =head1 DESCRIPTION
15    
16     This module will create jsFind index, which is static B-Tree index
17     searchable by JavaScript. It's very useful if you want to build
18     CD-ROM with static content and search engine.
19    
20     =head1 METHODS
21    
22     =head2 new
23    
24     Create new index object
25    
26     my $index = new WebPAC::jsFind(
27     index_path => '/path/to/jsFind/index',
28     keys => 10,
29 dpavlin 409 log => 'log4perl.conf',
30 dpavlin 390 );
31    
32     C<index> is path to location where jsFind index should be created.
33    
34     C<keys> is optional parametar which specify number of keys in each node
35     (which has to be even number). Default is 10.
36    
37 dpavlin 409 C<log> is optional parametar which specify filename of L<Log::Log4Perl>
38     config file. Default is C<log.conf>.
39    
40 dpavlin 390 =cut
41    
42     sub new {
43     my $class = shift;
44     my $self = {@_};
45     bless($self, $class);
46    
47     confess "need index_path argument!" unless ($self->{'index_path'});
48    
49     my $log_file = $self->{'log'} || "log.conf";
50     Log::Log4perl->init($log_file);
51    
52     return $self;
53     }
54    
55     =head2 tree
56    
57     Create or retreive jsFind tree object
58    
59     $index->tree('index_name');
60    
61     =cut
62    
63     sub tree {
64     my $self = shift;
65    
66     my $index_name = shift || confess "need index name!";
67    
68     if (! $self->{'tree'}->{$index_name}) {
69     $self->{'tree'}->{$index_name} = new jsFind(B => $self->{keys} || 10);
70     my $log = $self->_get_logger();
71     $log->debug("tree object $index_name created");
72    
73     }
74    
75     return $self->{'tree'}->{$index_name};
76    
77     }
78    
79     =head2 insert
80    
81     Insert data into index
82    
83     $index->insert(
84     index_name => 'index_name',
85     path => 'path',
86     headline => 'headline text',
87     words => 'words to insert into index'
88     );
89    
90     =cut
91    
92     sub insert {
93     my $self = shift;
94    
95     my $args = {@_};
96    
97     my $log = $self->_get_logger();
98    
99     confess "need index name" unless ($args->{'index_name'});
100     confess "need path" unless ($args->{'path'});
101     if (! $args->{'headline'}) {
102     carp "no headline for ",$args->{'path'}," ?";
103     $args->{'headline'} = "no headline: ".$args->{'path'};
104     }
105     return unless (defined($args->{'words'}));
106    
107 dpavlin 491 my $words = lc($args->{'words'});
108 dpavlin 390
109     # chop leading and trailing spaces
110     $words =~ s/^\s+//;
111     $words =~ s/\s+$//;
112    
113 dpavlin 491 my @words = split(/\s+/,$words);
114    
115 dpavlin 390 my %usage;
116 dpavlin 491 foreach (@words) {
117 dpavlin 390 $usage{$_}++;
118     }
119    
120     $log->debug("inserting '$words'",
121     " into index ",$args->{'index_name'},
122     " headline: ",$args->{'headline'},
123     " path: ",$args->{'path'}
124     );
125    
126 dpavlin 491 foreach my $word (@words) {
127 dpavlin 390
128     $self->tree($args->{'index_name'})->B_search(
129     Key => $word,
130     Data => { $args->{'path'} => {
131     t => $args->{'headline'},
132     f => $usage{$word},
133     },
134     },
135     Insert => 1,
136     Append => 1,
137     );
138     }
139     }
140    
141     =head2 close
142    
143     This method will dump indexes to disk.
144    
145     $index->close;
146    
147     This method will create directories if needed and store tree xml files
148     for all indexes.
149    
150 dpavlin 445 Turning debugging for this function by inserting
151 dpavlin 390
152 dpavlin 445 log4perl.logger.WebPAC.jsFind.close=DEBUG
153    
154     into C<log.conf> will also result in creation of GraphViz C<.dot> files
155     for each index in current directory.
156    
157 dpavlin 390 =cut
158    
159     sub close {
160     my $self = shift;
161    
162     my $log = $self->_get_logger();
163    
164     foreach my $index_name (keys %{$self->{'tree'}}) {
165     my $path = $self->{'index_path'}."/".$index_name;
166    
167     $log->debug("saving index '$index_name' xml files to '$path'");
168    
169 dpavlin 395 $self->tree($index_name)->to_jsfind($path,'ISO-8859-2','UTF-8');
170 dpavlin 445
171     if ($log->is_debug()) {
172     my $dot_file = $index_name.".dot";
173    
174     $log->debug("saving graphviz file for '$index_name' to '$dot_file'");
175    
176     open(DOT, ">", $dot_file) || $log->logdie("can't open '$dot_file': $!");
177     print DOT $self->tree($index_name)->to_dot;
178     close(DOT);
179     }
180 dpavlin 390 }
181    
182     }
183    
184     #
185    
186     =head1 INTERNAL METHODS
187    
188     You shouldn't call this methods directly.
189    
190     =head2 _get_logger
191    
192     Get C<Log::Log4perl> object with a twist: domains are defined for each
193     method
194    
195     my $log = $webpac->_get_logger();
196    
197     =cut
198    
199     sub _get_logger {
200     my $self = shift;
201    
202     my $name = (caller(1))[3] || caller;
203     return get_logger($name);
204     }
205     1;

  ViewVC Help
Powered by ViewVC 1.1.26