/[webpac]/trunk2/lib/WebPAC/jsFind.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk2/lib/WebPAC/jsFind.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 706 - (show annotations)
Wed Jul 13 23:35:59 2005 UTC (18 years, 8 months ago) by dpavlin
File size: 4107 byte(s)
small doc fix

1 package WebPAC::jsFind;
2
3 use warnings;
4 use strict;
5
6 use Carp;
7 use jsFind 0.06;
8 use Log::Log4perl qw(get_logger :levels);
9
10 =head1 NAME
11
12 WebPAC::jsFind - create jsFind index instead of swish-e
13
14 =head1 DESCRIPTION
15
16 This module will create jsFind index, which is static B-Tree index
17 searchable by JavaScript. It's very useful if you want to build
18 CD-ROM with static content and search engine.
19
20 =head1 METHODS
21
22 =head2 new
23
24 Create new index object
25
26 my $index = new WebPAC::jsFind(
27 index_path => '/path/to/jsFind/index',
28 keys => 10,
29 log => 'log4perl.conf',
30 );
31
32 C<index_path> is path to location where jsFind index should be created.
33
34 C<keys> is optional parametar which specify number of keys in each node
35 (which has to be even number). Default is 10.
36
37 C<log> is optional parametar which specify filename of L<Log::Log4Perl>
38 config file. Default is C<log.conf>.
39
40 =cut
41
42 sub new {
43 my $class = shift;
44 my $self = {@_};
45 bless($self, $class);
46
47 confess "need index_path argument!" unless ($self->{'index_path'});
48
49 my $log_file = $self->{'log'} || "log.conf";
50 Log::Log4perl->init($log_file);
51
52 return $self;
53 }
54
55 =head2 tree
56
57 Create or retreive jsFind tree object
58
59 $index->tree('index_name');
60
61 =cut
62
63 sub tree {
64 my $self = shift;
65
66 my $index_name = shift || confess "need index name!";
67
68 if (! $self->{'tree'}->{$index_name}) {
69 $self->{'tree'}->{$index_name} = new jsFind(B => $self->{keys} || 10);
70 my $log = $self->_get_logger();
71 $log->debug("tree object $index_name created");
72
73 }
74
75 return $self->{'tree'}->{$index_name};
76
77 }
78
79 =head2 insert
80
81 Insert data into index
82
83 $index->insert(
84 index_name => 'index_name',
85 path => 'path',
86 headline => 'headline text',
87 words => 'words to insert into index'
88 );
89
90 =cut
91
92 sub insert {
93 my $self = shift;
94
95 my $args = {@_};
96
97 my $log = $self->_get_logger();
98
99 confess "need index name" unless ($args->{'index_name'});
100 confess "need path" unless ($args->{'path'});
101 if (! $args->{'headline'}) {
102 carp "no headline for ",$args->{'path'}," ?";
103 $args->{'headline'} = "no headline: ".$args->{'path'};
104 }
105 if (! defined($args->{'words'})) {
106 $log->warn("no words to insert for headline ",$args->{'headline'});
107 return;
108 }
109
110 my $words = lc($args->{'words'});
111
112 # chop leading and trailing spaces
113 $words =~ s/^\s+//;
114 $words =~ s/\s+$//;
115
116 my @words = split(/\s+/,$words);
117
118 my %usage;
119 foreach (@words) {
120 $usage{$_}++;
121 }
122
123 $log->debug("inserting '$words'",
124 " into index ",$args->{'index_name'},
125 " headline: ",$args->{'headline'},
126 " path: ",$args->{'path'}
127 );
128
129 foreach my $word (@words) {
130
131 $self->tree($args->{'index_name'})->B_search(
132 Key => $word,
133 Data => { $args->{'path'} => {
134 t => $args->{'headline'},
135 f => $usage{$word},
136 },
137 },
138 Insert => 1,
139 Append => 1,
140 );
141 }
142 }
143
144 =head2 close
145
146 This method will dump indexes to disk.
147
148 $index->close;
149
150 This method will create directories if needed and store tree xml files
151 for all indexes.
152
153 Turning debugging for this function by inserting
154
155 log4perl.logger.WebPAC.jsFind.close=DEBUG
156
157 into C<log.conf> will also result in creation of GraphViz C<.dot> files
158 for each index in current directory.
159
160 =cut
161
162 sub close {
163 my $self = shift;
164
165 my $log = $self->_get_logger();
166
167 foreach my $index_name (keys %{$self->{'tree'}}) {
168 my $path = $self->{'index_path'}."/".$index_name;
169
170 $log->debug("saving index '$index_name' xml files to '$path'");
171
172 $self->tree($index_name)->to_jsfind(
173 dir => $path,
174 data_codepage => 'ISO-8859-2',
175 index_codepage => 'UTF-8'
176 );
177
178 if ($log->is_debug()) {
179 my $dot_file = $index_name.".dot";
180
181 $log->debug("saving graphviz file for '$index_name' to '$dot_file'");
182
183 open(DOT, ">", $dot_file) || $log->logdie("can't open '$dot_file': $!");
184 print DOT $self->tree($index_name)->to_dot;
185 close(DOT);
186 }
187 }
188
189 }
190
191 #
192
193 =head1 INTERNAL METHODS
194
195 You shouldn't call this methods directly.
196
197 =head2 _get_logger
198
199 Get C<Log::Log4perl> object with a twist: domains are defined for each
200 method
201
202 my $log = $webpac->_get_logger();
203
204 =cut
205
206 sub _get_logger {
207 my $self = shift;
208
209 my $name = (caller(1))[3] || caller;
210 return get_logger($name);
211 }
212 1;

  ViewVC Help
Powered by ViewVC 1.1.26