/[webpac]/trunk2/lib/WebPAC/HyperEstraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk2/lib/WebPAC/HyperEstraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 708 - (show annotations)
Wed Jul 13 23:38:04 2005 UTC (18 years, 9 months ago) by dpavlin
File size: 3639 byte(s)
added HyperEstraier back-end indexer

1 package WebPAC::HyperEstraier;
2
3 use warnings;
4 use strict;
5
6 use Carp;
7 use HyperEstraier;
8 use Log::Log4perl qw(get_logger :levels);
9 use Text::Iconv;
10
11 =head1 NAME
12
13 WebPAC::HyperEstraier - create HyperEstraier index
14
15 =head1 DESCRIPTION
16
17 This module will create HyperEstraier index. Planned features include
18 updating of index, but there are none yet.
19
20 =head1 METHODS
21
22 =head2 new
23
24 Create new index object
25
26 my $index = new WebPAC::HyperEstraier(
27 index_path => '/path/to/casket',
28 log => 'log4perl.conf',
29 );
30
31 C<index_path> is path to location where HyperEstraier index should be created.
32
33 C<log> is optional parametar which specify filename of L<Log::Log4Perl>
34 config file. Default is C<log.conf>.
35
36 This function will trap C<INT> and C<QUIT> signals to sync index to disk before
37 exit.
38
39 =cut
40
41 my $db;
42
43 sub new {
44 my $class = shift;
45 my $self = {@_};
46 bless($self, $class);
47
48 my $index_path = $self->{'index_path'} || confess "need index_path argument!";
49
50 my $log_file = $self->{'log'} || "log.conf";
51 Log::Log4perl->init($log_file);
52
53 my $log = $self->_get_logger();
54
55 carp "database allready opened" if ($db);
56
57 $db = HyperEstraier::Database->new();
58
59 my $flags = $HyperEstraier::Database::DBWRITER;
60 $flags |= $HyperEstraier::Database::DBCREAT if (! -e $index_path);
61
62 $log->debug("HyperEstraier::open($index_path, $flags)");
63
64 $db->open($index_path, $flags);
65 $log->logdie("Can't open index '$index_path' database problem?") unless defined($db);
66
67 my $signal = sub {
68 my($sig) = @_;
69 warn "\nCaught a SIG$sig--syncing database and shutting down\n";
70 $db->sync();
71 exit(0);
72 };
73
74 $SIG{'INT'} = $signal;
75 $SIG{'QUIT'} = $signal;
76
77 $self->{'iconv'} = new Text::Iconv('iso-8859-2', 'utf-8') || croak "can't create iconv";
78
79 return $self;
80 }
81
82 =head2 insert
83
84 Insert data into index
85
86 $index->insert(
87 index_name => 'index_name',
88 path => 'path',
89 headline => 'headline text',
90 words => 'words to insert into index'
91 );
92
93 =cut
94
95 sub insert {
96 my $self = shift;
97
98 my $args = {@_};
99
100 my $log = $self->_get_logger();
101
102 confess "need index name" unless ($args->{'index_name'});
103 confess "need path" unless ($args->{'path'});
104 if (! $args->{'headline'}) {
105 carp "no headline for ",$args->{'path'}," ?";
106 $args->{'headline'} = "no headline: ".$args->{'path'};
107 }
108 if (! defined($args->{'words'})) {
109 $log->warn("no words to insert for headline ",$args->{'headline'});
110 return;
111 }
112
113 my $words = lc($args->{'words'});
114
115 # chop leading and trailing spaces
116 $words =~ s/^\s+//;
117 $words =~ s/\s+$//;
118
119 my $url = 'file:///' . $args->{'path'};
120
121 my $doc = HyperEstraier::Document->new;
122 my $iconv = $self->{'iconv'} || croak "no iconv?";
123
124 $doc->add_attr('@uri', "file:///" . $args->{'path'});
125 $doc->add_attr('@title', $iconv->convert($args->{'headline'}) );
126 $doc->add_attr('@size', length($words));
127 $doc->add_attr('@mtime', time());
128
129 $doc->add_attr('index_name', $iconv->convert( $args->{'index_name'}) );
130
131 $doc->add_text( $iconv->convert($words) );
132
133 $log->debug("draft:\n", $doc->dump_draft);
134
135 $db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
136
137 }
138
139 =head2 close
140
141 This method will close indexes and sync to disk.
142
143 $index->close;
144
145 =cut
146
147 sub close {
148 my $self = shift;
149
150 my $log = $self->_get_logger();
151
152 $log->debug("sync database");
153
154 $db->sync();
155
156 $log->debug("optimize database");
157
158 $db->optimize(0);
159
160 }
161
162 #
163
164 =head1 INTERNAL METHODS
165
166 You shouldn't call this methods directly.
167
168 =head2 _get_logger
169
170 Get C<Log::Log4perl> object with a twist: domains are defined for each
171 method
172
173 my $log = $webpac->_get_logger();
174
175 =cut
176
177 sub _get_logger {
178 my $self = shift;
179
180 my $name = (caller(1))[3] || caller;
181 return get_logger($name);
182 }
183 1;

  ViewVC Help
Powered by ViewVC 1.1.26