1 |
package WebPAC::HyperEstraier; |
2 |
|
3 |
use warnings; |
4 |
use strict; |
5 |
|
6 |
use Carp; |
7 |
use HyperEstraier; |
8 |
use Log::Log4perl qw(get_logger :levels); |
9 |
use Text::Iconv; |
10 |
|
11 |
=head1 NAME |
12 |
|
13 |
WebPAC::HyperEstraier - create HyperEstraier index |
14 |
|
15 |
=head1 DESCRIPTION |
16 |
|
17 |
This module will create HyperEstraier index. Planned features include |
18 |
updating of index, but there are none yet. |
19 |
|
20 |
=head1 METHODS |
21 |
|
22 |
=head2 new |
23 |
|
24 |
Create new index object |
25 |
|
26 |
my $index = new WebPAC::HyperEstraier( |
27 |
index_path => '/path/to/casket', |
28 |
log => 'log4perl.conf', |
29 |
); |
30 |
|
31 |
C<index_path> is path to location where HyperEstraier index should be created. |
32 |
|
33 |
C<log> is optional parametar which specify filename of L<Log::Log4Perl> |
34 |
config file. Default is C<log.conf>. |
35 |
|
36 |
This function will trap C<INT> and C<QUIT> signals to sync index to disk before |
37 |
exit. |
38 |
|
39 |
=cut |
40 |
|
41 |
my $db; |
42 |
|
43 |
sub new { |
44 |
my $class = shift; |
45 |
my $self = {@_}; |
46 |
bless($self, $class); |
47 |
|
48 |
my $index_path = $self->{'index_path'} || confess "need index_path argument!"; |
49 |
|
50 |
my $log_file = $self->{'log'} || "log.conf"; |
51 |
Log::Log4perl->init($log_file); |
52 |
|
53 |
my $log = $self->_get_logger(); |
54 |
|
55 |
carp "database allready opened" if ($db); |
56 |
|
57 |
$db = HyperEstraier::Database->new(); |
58 |
|
59 |
my $flags = $HyperEstraier::Database::DBWRITER; |
60 |
$flags |= $HyperEstraier::Database::DBCREAT if (! -e $index_path); |
61 |
|
62 |
$log->debug("HyperEstraier::open($index_path, $flags)"); |
63 |
|
64 |
$db->open($index_path, $flags); |
65 |
$log->logdie("Can't open index '$index_path' database problem?") unless defined($db); |
66 |
|
67 |
my $signal = sub { |
68 |
my($sig) = @_; |
69 |
warn "\nCaught a SIG$sig--syncing database and shutting down\n"; |
70 |
$db->sync(); |
71 |
exit(0); |
72 |
}; |
73 |
|
74 |
$SIG{'INT'} = $signal; |
75 |
$SIG{'QUIT'} = $signal; |
76 |
|
77 |
$self->{'iconv'} = new Text::Iconv('iso-8859-2', 'utf-8') || croak "can't create iconv"; |
78 |
|
79 |
return $self; |
80 |
} |
81 |
|
82 |
=head2 insert |
83 |
|
84 |
Insert data into index |
85 |
|
86 |
$index->insert( |
87 |
index_name => 'index_name', |
88 |
path => 'path', |
89 |
headline => 'headline text', |
90 |
words => 'words to insert into index' |
91 |
); |
92 |
|
93 |
=cut |
94 |
|
95 |
sub insert { |
96 |
my $self = shift; |
97 |
|
98 |
my $args = {@_}; |
99 |
|
100 |
my $log = $self->_get_logger(); |
101 |
|
102 |
confess "need index name" unless ($args->{'index_name'}); |
103 |
confess "need path" unless ($args->{'path'}); |
104 |
if (! $args->{'headline'}) { |
105 |
carp "no headline for ",$args->{'path'}," ?"; |
106 |
$args->{'headline'} = "no headline: ".$args->{'path'}; |
107 |
} |
108 |
if (! defined($args->{'words'})) { |
109 |
$log->warn("no words to insert for headline ",$args->{'headline'}); |
110 |
return; |
111 |
} |
112 |
|
113 |
my $words = lc($args->{'words'}); |
114 |
|
115 |
# chop leading and trailing spaces |
116 |
$words =~ s/^\s+//; |
117 |
$words =~ s/\s+$//; |
118 |
|
119 |
my $url = 'file:///' . $args->{'path'}; |
120 |
|
121 |
my $doc = HyperEstraier::Document->new; |
122 |
my $iconv = $self->{'iconv'} || croak "no iconv?"; |
123 |
|
124 |
$doc->add_attr('@uri', "file:///" . $args->{'path'}); |
125 |
$doc->add_attr('@title', $iconv->convert($args->{'headline'}) ); |
126 |
$doc->add_attr('@size', length($words)); |
127 |
$doc->add_attr('@mtime', time()); |
128 |
|
129 |
$doc->add_attr('index_name', $iconv->convert( $args->{'index_name'}) ); |
130 |
|
131 |
$doc->add_text( $iconv->convert($words) ); |
132 |
|
133 |
$log->debug("draft:\n", $doc->dump_draft); |
134 |
|
135 |
$db->put_doc($doc, $HyperEstraier::Database::PDCLEAN); |
136 |
|
137 |
} |
138 |
|
139 |
=head2 close |
140 |
|
141 |
This method will close indexes and sync to disk. |
142 |
|
143 |
$index->close; |
144 |
|
145 |
=cut |
146 |
|
147 |
sub close { |
148 |
my $self = shift; |
149 |
|
150 |
my $log = $self->_get_logger(); |
151 |
|
152 |
$log->debug("sync database"); |
153 |
|
154 |
$db->sync(); |
155 |
|
156 |
$log->debug("optimize database"); |
157 |
|
158 |
$db->optimize(0); |
159 |
|
160 |
} |
161 |
|
162 |
# |
163 |
|
164 |
=head1 INTERNAL METHODS |
165 |
|
166 |
You shouldn't call this methods directly. |
167 |
|
168 |
=head2 _get_logger |
169 |
|
170 |
Get C<Log::Log4perl> object with a twist: domains are defined for each |
171 |
method |
172 |
|
173 |
my $log = $webpac->_get_logger(); |
174 |
|
175 |
=cut |
176 |
|
177 |
sub _get_logger { |
178 |
my $self = shift; |
179 |
|
180 |
my $name = (caller(1))[3] || caller; |
181 |
return get_logger($name); |
182 |
} |
183 |
1; |