1 |
package WebPAC::jsFind; |
2 |
|
3 |
use warnings; |
4 |
use strict; |
5 |
|
6 |
use Carp; |
7 |
use jsFind 0.04; |
8 |
use Log::Log4perl qw(get_logger :levels); |
9 |
|
10 |
=head1 NAME |
11 |
|
12 |
WebPAC::jsFind - create jsFind index instead of swish-e |
13 |
|
14 |
=head1 DESCRIPTION |
15 |
|
16 |
This module will create jsFind index, which is static B-Tree index |
17 |
searchable by JavaScript. It's very useful if you want to build |
18 |
CD-ROM with static content and search engine. |
19 |
|
20 |
=head1 METHODS |
21 |
|
22 |
=head2 new |
23 |
|
24 |
Create new index object |
25 |
|
26 |
my $index = new WebPAC::jsFind( |
27 |
index_path => '/path/to/jsFind/index', |
28 |
keys => 10, |
29 |
); |
30 |
|
31 |
C<index> is path to location where jsFind index should be created. |
32 |
|
33 |
C<keys> is optional parametar which specify number of keys in each node |
34 |
(which has to be even number). Default is 10. |
35 |
|
36 |
=cut |
37 |
|
38 |
sub new { |
39 |
my $class = shift; |
40 |
my $self = {@_}; |
41 |
bless($self, $class); |
42 |
|
43 |
confess "need index_path argument!" unless ($self->{'index_path'}); |
44 |
|
45 |
my $log_file = $self->{'log'} || "log.conf"; |
46 |
Log::Log4perl->init($log_file); |
47 |
|
48 |
return $self; |
49 |
} |
50 |
|
51 |
=head2 tree |
52 |
|
53 |
Create or retreive jsFind tree object |
54 |
|
55 |
$index->tree('index_name'); |
56 |
|
57 |
=cut |
58 |
|
59 |
sub tree { |
60 |
my $self = shift; |
61 |
|
62 |
my $index_name = shift || confess "need index name!"; |
63 |
|
64 |
if (! $self->{'tree'}->{$index_name}) { |
65 |
$self->{'tree'}->{$index_name} = new jsFind(B => $self->{keys} || 10); |
66 |
my $log = $self->_get_logger(); |
67 |
$log->debug("tree object $index_name created"); |
68 |
|
69 |
} |
70 |
|
71 |
return $self->{'tree'}->{$index_name}; |
72 |
|
73 |
} |
74 |
|
75 |
=head2 insert |
76 |
|
77 |
Insert data into index |
78 |
|
79 |
$index->insert( |
80 |
index_name => 'index_name', |
81 |
path => 'path', |
82 |
headline => 'headline text', |
83 |
words => 'words to insert into index' |
84 |
); |
85 |
|
86 |
=cut |
87 |
|
88 |
sub insert { |
89 |
my $self = shift; |
90 |
|
91 |
my $args = {@_}; |
92 |
|
93 |
my $log = $self->_get_logger(); |
94 |
|
95 |
confess "need index name" unless ($args->{'index_name'}); |
96 |
confess "need path" unless ($args->{'path'}); |
97 |
if (! $args->{'headline'}) { |
98 |
carp "no headline for ",$args->{'path'}," ?"; |
99 |
$args->{'headline'} = "no headline: ".$args->{'path'}; |
100 |
} |
101 |
return unless (defined($args->{'words'})); |
102 |
|
103 |
my $words = $args->{'words'}; |
104 |
|
105 |
# chop leading and trailing spaces |
106 |
$words =~ s/^\s+//; |
107 |
$words =~ s/\s+$//; |
108 |
|
109 |
my %usage; |
110 |
foreach (split(/\s+/,$words)) { |
111 |
$usage{$_}++; |
112 |
} |
113 |
|
114 |
$log->debug("inserting '$words'", |
115 |
" into index ",$args->{'index_name'}, |
116 |
" headline: ",$args->{'headline'}, |
117 |
" path: ",$args->{'path'} |
118 |
); |
119 |
|
120 |
foreach my $word (keys %usage) { |
121 |
|
122 |
$self->tree($args->{'index_name'})->B_search( |
123 |
Key => $word, |
124 |
Data => { $args->{'path'} => { |
125 |
t => $args->{'headline'}, |
126 |
f => $usage{$word}, |
127 |
}, |
128 |
}, |
129 |
Insert => 1, |
130 |
Append => 1, |
131 |
); |
132 |
} |
133 |
} |
134 |
|
135 |
=head2 close |
136 |
|
137 |
This method will dump indexes to disk. |
138 |
|
139 |
$index->close; |
140 |
|
141 |
This method will create directories if needed and store tree xml files |
142 |
for all indexes. |
143 |
|
144 |
|
145 |
=cut |
146 |
|
147 |
sub close { |
148 |
my $self = shift; |
149 |
|
150 |
my $log = $self->_get_logger(); |
151 |
|
152 |
foreach my $index_name (keys %{$self->{'tree'}}) { |
153 |
my $path = $self->{'index_path'}."/".$index_name; |
154 |
|
155 |
$log->debug("saving index '$index_name' xml files to '$path'"); |
156 |
|
157 |
$self->tree($index_name)->to_jsfind($path,'ISO-8859-2','UTF-8'); |
158 |
} |
159 |
|
160 |
} |
161 |
|
162 |
# |
163 |
|
164 |
=head1 INTERNAL METHODS |
165 |
|
166 |
You shouldn't call this methods directly. |
167 |
|
168 |
=head2 _get_logger |
169 |
|
170 |
Get C<Log::Log4perl> object with a twist: domains are defined for each |
171 |
method |
172 |
|
173 |
my $log = $webpac->_get_logger(); |
174 |
|
175 |
=cut |
176 |
|
177 |
sub _get_logger { |
178 |
my $self = shift; |
179 |
|
180 |
my $name = (caller(1))[3] || caller; |
181 |
return get_logger($name); |
182 |
} |
183 |
1; |