/[webpac]/trunk2/lib/WebPAC/jsFind.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk2/lib/WebPAC/jsFind.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 706 - (hide annotations)
Wed Jul 13 23:35:59 2005 UTC (18 years, 9 months ago) by dpavlin
File size: 4107 byte(s)
small doc fix

1 dpavlin 390 package WebPAC::jsFind;
2    
3     use warnings;
4     use strict;
5    
6     use Carp;
7 dpavlin 540 use jsFind 0.06;
8 dpavlin 390 use Log::Log4perl qw(get_logger :levels);
9    
10     =head1 NAME
11    
12     WebPAC::jsFind - create jsFind index instead of swish-e
13    
14     =head1 DESCRIPTION
15    
16     This module will create jsFind index, which is static B-Tree index
17     searchable by JavaScript. It's very useful if you want to build
18     CD-ROM with static content and search engine.
19    
20     =head1 METHODS
21    
22     =head2 new
23    
24     Create new index object
25    
26     my $index = new WebPAC::jsFind(
27     index_path => '/path/to/jsFind/index',
28     keys => 10,
29 dpavlin 409 log => 'log4perl.conf',
30 dpavlin 390 );
31    
32 dpavlin 706 C<index_path> is path to location where jsFind index should be created.
33 dpavlin 390
34     C<keys> is optional parametar which specify number of keys in each node
35     (which has to be even number). Default is 10.
36    
37 dpavlin 409 C<log> is optional parametar which specify filename of L<Log::Log4Perl>
38     config file. Default is C<log.conf>.
39    
40 dpavlin 390 =cut
41    
42     sub new {
43     my $class = shift;
44     my $self = {@_};
45     bless($self, $class);
46    
47     confess "need index_path argument!" unless ($self->{'index_path'});
48    
49     my $log_file = $self->{'log'} || "log.conf";
50     Log::Log4perl->init($log_file);
51    
52     return $self;
53     }
54    
55     =head2 tree
56    
57     Create or retreive jsFind tree object
58    
59     $index->tree('index_name');
60    
61     =cut
62    
63     sub tree {
64     my $self = shift;
65    
66     my $index_name = shift || confess "need index name!";
67    
68     if (! $self->{'tree'}->{$index_name}) {
69     $self->{'tree'}->{$index_name} = new jsFind(B => $self->{keys} || 10);
70     my $log = $self->_get_logger();
71     $log->debug("tree object $index_name created");
72    
73     }
74    
75     return $self->{'tree'}->{$index_name};
76    
77     }
78    
79     =head2 insert
80    
81     Insert data into index
82    
83     $index->insert(
84     index_name => 'index_name',
85     path => 'path',
86     headline => 'headline text',
87     words => 'words to insert into index'
88     );
89    
90     =cut
91    
92     sub insert {
93     my $self = shift;
94    
95     my $args = {@_};
96    
97     my $log = $self->_get_logger();
98    
99     confess "need index name" unless ($args->{'index_name'});
100     confess "need path" unless ($args->{'path'});
101     if (! $args->{'headline'}) {
102     carp "no headline for ",$args->{'path'}," ?";
103     $args->{'headline'} = "no headline: ".$args->{'path'};
104     }
105 dpavlin 588 if (! defined($args->{'words'})) {
106     $log->warn("no words to insert for headline ",$args->{'headline'});
107     return;
108     }
109 dpavlin 390
110 dpavlin 491 my $words = lc($args->{'words'});
111 dpavlin 390
112     # chop leading and trailing spaces
113     $words =~ s/^\s+//;
114     $words =~ s/\s+$//;
115    
116 dpavlin 491 my @words = split(/\s+/,$words);
117    
118 dpavlin 390 my %usage;
119 dpavlin 491 foreach (@words) {
120 dpavlin 390 $usage{$_}++;
121     }
122    
123     $log->debug("inserting '$words'",
124     " into index ",$args->{'index_name'},
125     " headline: ",$args->{'headline'},
126     " path: ",$args->{'path'}
127     );
128    
129 dpavlin 491 foreach my $word (@words) {
130 dpavlin 390
131     $self->tree($args->{'index_name'})->B_search(
132     Key => $word,
133     Data => { $args->{'path'} => {
134     t => $args->{'headline'},
135     f => $usage{$word},
136     },
137     },
138     Insert => 1,
139     Append => 1,
140     );
141     }
142     }
143    
144     =head2 close
145    
146     This method will dump indexes to disk.
147    
148     $index->close;
149    
150     This method will create directories if needed and store tree xml files
151     for all indexes.
152    
153 dpavlin 445 Turning debugging for this function by inserting
154 dpavlin 390
155 dpavlin 445 log4perl.logger.WebPAC.jsFind.close=DEBUG
156    
157     into C<log.conf> will also result in creation of GraphViz C<.dot> files
158     for each index in current directory.
159    
160 dpavlin 390 =cut
161    
162     sub close {
163     my $self = shift;
164    
165     my $log = $self->_get_logger();
166    
167     foreach my $index_name (keys %{$self->{'tree'}}) {
168     my $path = $self->{'index_path'}."/".$index_name;
169    
170     $log->debug("saving index '$index_name' xml files to '$path'");
171    
172 dpavlin 540 $self->tree($index_name)->to_jsfind(
173     dir => $path,
174     data_codepage => 'ISO-8859-2',
175     index_codepage => 'UTF-8'
176     );
177 dpavlin 445
178     if ($log->is_debug()) {
179     my $dot_file = $index_name.".dot";
180    
181     $log->debug("saving graphviz file for '$index_name' to '$dot_file'");
182    
183     open(DOT, ">", $dot_file) || $log->logdie("can't open '$dot_file': $!");
184     print DOT $self->tree($index_name)->to_dot;
185     close(DOT);
186     }
187 dpavlin 390 }
188    
189     }
190    
191     #
192    
193     =head1 INTERNAL METHODS
194    
195     You shouldn't call this methods directly.
196    
197     =head2 _get_logger
198    
199     Get C<Log::Log4perl> object with a twist: domains are defined for each
200     method
201    
202     my $log = $webpac->_get_logger();
203    
204     =cut
205    
206     sub _get_logger {
207     my $self = shift;
208    
209     my $name = (caller(1))[3] || caller;
210     return get_logger($name);
211     }
212     1;

  ViewVC Help
Powered by ViewVC 1.1.26