/[webpac]/trunk2/lib/WebPAC/jsFind.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk2/lib/WebPAC/jsFind.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 540 - (hide annotations)
Sun Oct 24 14:51:53 2004 UTC (19 years, 6 months ago) by dpavlin
File size: 4027 byte(s)
jsFind 0.06 API

1 dpavlin 390 package WebPAC::jsFind;
2    
3     use warnings;
4     use strict;
5    
6     use Carp;
7 dpavlin 540 use jsFind 0.06;
8 dpavlin 390 use Log::Log4perl qw(get_logger :levels);
9    
10     =head1 NAME
11    
12     WebPAC::jsFind - create jsFind index instead of swish-e
13    
14     =head1 DESCRIPTION
15    
16     This module will create jsFind index, which is static B-Tree index
17     searchable by JavaScript. It's very useful if you want to build
18     CD-ROM with static content and search engine.
19    
20     =head1 METHODS
21    
22     =head2 new
23    
24     Create new index object
25    
26     my $index = new WebPAC::jsFind(
27     index_path => '/path/to/jsFind/index',
28     keys => 10,
29 dpavlin 409 log => 'log4perl.conf',
30 dpavlin 390 );
31    
32     C<index> is path to location where jsFind index should be created.
33    
34     C<keys> is optional parametar which specify number of keys in each node
35     (which has to be even number). Default is 10.
36    
37 dpavlin 409 C<log> is optional parametar which specify filename of L<Log::Log4Perl>
38     config file. Default is C<log.conf>.
39    
40 dpavlin 390 =cut
41    
42     sub new {
43     my $class = shift;
44     my $self = {@_};
45     bless($self, $class);
46    
47     confess "need index_path argument!" unless ($self->{'index_path'});
48    
49     my $log_file = $self->{'log'} || "log.conf";
50     Log::Log4perl->init($log_file);
51    
52     return $self;
53     }
54    
55     =head2 tree
56    
57     Create or retreive jsFind tree object
58    
59     $index->tree('index_name');
60    
61     =cut
62    
63     sub tree {
64     my $self = shift;
65    
66     my $index_name = shift || confess "need index name!";
67    
68     if (! $self->{'tree'}->{$index_name}) {
69     $self->{'tree'}->{$index_name} = new jsFind(B => $self->{keys} || 10);
70     my $log = $self->_get_logger();
71     $log->debug("tree object $index_name created");
72    
73     }
74    
75     return $self->{'tree'}->{$index_name};
76    
77     }
78    
79     =head2 insert
80    
81     Insert data into index
82    
83     $index->insert(
84     index_name => 'index_name',
85     path => 'path',
86     headline => 'headline text',
87     words => 'words to insert into index'
88     );
89    
90     =cut
91    
92     sub insert {
93     my $self = shift;
94    
95     my $args = {@_};
96    
97     my $log = $self->_get_logger();
98    
99     confess "need index name" unless ($args->{'index_name'});
100     confess "need path" unless ($args->{'path'});
101     if (! $args->{'headline'}) {
102     carp "no headline for ",$args->{'path'}," ?";
103     $args->{'headline'} = "no headline: ".$args->{'path'};
104     }
105     return unless (defined($args->{'words'}));
106    
107 dpavlin 491 my $words = lc($args->{'words'});
108 dpavlin 390
109     # chop leading and trailing spaces
110     $words =~ s/^\s+//;
111     $words =~ s/\s+$//;
112    
113 dpavlin 491 my @words = split(/\s+/,$words);
114    
115 dpavlin 390 my %usage;
116 dpavlin 491 foreach (@words) {
117 dpavlin 390 $usage{$_}++;
118     }
119    
120     $log->debug("inserting '$words'",
121     " into index ",$args->{'index_name'},
122     " headline: ",$args->{'headline'},
123     " path: ",$args->{'path'}
124     );
125    
126 dpavlin 491 foreach my $word (@words) {
127 dpavlin 390
128     $self->tree($args->{'index_name'})->B_search(
129     Key => $word,
130     Data => { $args->{'path'} => {
131     t => $args->{'headline'},
132     f => $usage{$word},
133     },
134     },
135     Insert => 1,
136     Append => 1,
137     );
138     }
139     }
140    
141     =head2 close
142    
143     This method will dump indexes to disk.
144    
145     $index->close;
146    
147     This method will create directories if needed and store tree xml files
148     for all indexes.
149    
150 dpavlin 445 Turning debugging for this function by inserting
151 dpavlin 390
152 dpavlin 445 log4perl.logger.WebPAC.jsFind.close=DEBUG
153    
154     into C<log.conf> will also result in creation of GraphViz C<.dot> files
155     for each index in current directory.
156    
157 dpavlin 390 =cut
158    
159     sub close {
160     my $self = shift;
161    
162     my $log = $self->_get_logger();
163    
164     foreach my $index_name (keys %{$self->{'tree'}}) {
165     my $path = $self->{'index_path'}."/".$index_name;
166    
167     $log->debug("saving index '$index_name' xml files to '$path'");
168    
169 dpavlin 540 $self->tree($index_name)->to_jsfind(
170     dir => $path,
171     data_codepage => 'ISO-8859-2',
172     index_codepage => 'UTF-8'
173     );
174 dpavlin 445
175     if ($log->is_debug()) {
176     my $dot_file = $index_name.".dot";
177    
178     $log->debug("saving graphviz file for '$index_name' to '$dot_file'");
179    
180     open(DOT, ">", $dot_file) || $log->logdie("can't open '$dot_file': $!");
181     print DOT $self->tree($index_name)->to_dot;
182     close(DOT);
183     }
184 dpavlin 390 }
185    
186     }
187    
188     #
189    
190     =head1 INTERNAL METHODS
191    
192     You shouldn't call this methods directly.
193    
194     =head2 _get_logger
195    
196     Get C<Log::Log4perl> object with a twist: domains are defined for each
197     method
198    
199     my $log = $webpac->_get_logger();
200    
201     =cut
202    
203     sub _get_logger {
204     my $self = shift;
205    
206     my $name = (caller(1))[3] || caller;
207     return get_logger($name);
208     }
209     1;

  ViewVC Help
Powered by ViewVC 1.1.26