/[webpac]/trunk2/lib/WebPAC/HyperEstraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk2/lib/WebPAC/HyperEstraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 708 - (hide annotations)
Wed Jul 13 23:38:04 2005 UTC (18 years, 10 months ago) by dpavlin
File size: 3639 byte(s)
added HyperEstraier back-end indexer

1 dpavlin 708 package WebPAC::HyperEstraier;
2    
3     use warnings;
4     use strict;
5    
6     use Carp;
7     use HyperEstraier;
8     use Log::Log4perl qw(get_logger :levels);
9     use Text::Iconv;
10    
11     =head1 NAME
12    
13     WebPAC::HyperEstraier - create HyperEstraier index
14    
15     =head1 DESCRIPTION
16    
17     This module will create HyperEstraier index. Planned features include
18     updating of index, but there are none yet.
19    
20     =head1 METHODS
21    
22     =head2 new
23    
24     Create new index object
25    
26     my $index = new WebPAC::HyperEstraier(
27     index_path => '/path/to/casket',
28     log => 'log4perl.conf',
29     );
30    
31     C<index_path> is path to location where HyperEstraier index should be created.
32    
33     C<log> is optional parametar which specify filename of L<Log::Log4Perl>
34     config file. Default is C<log.conf>.
35    
36     This function will trap C<INT> and C<QUIT> signals to sync index to disk before
37     exit.
38    
39     =cut
40    
41     my $db;
42    
43     sub new {
44     my $class = shift;
45     my $self = {@_};
46     bless($self, $class);
47    
48     my $index_path = $self->{'index_path'} || confess "need index_path argument!";
49    
50     my $log_file = $self->{'log'} || "log.conf";
51     Log::Log4perl->init($log_file);
52    
53     my $log = $self->_get_logger();
54    
55     carp "database allready opened" if ($db);
56    
57     $db = HyperEstraier::Database->new();
58    
59     my $flags = $HyperEstraier::Database::DBWRITER;
60     $flags |= $HyperEstraier::Database::DBCREAT if (! -e $index_path);
61    
62     $log->debug("HyperEstraier::open($index_path, $flags)");
63    
64     $db->open($index_path, $flags);
65     $log->logdie("Can't open index '$index_path' database problem?") unless defined($db);
66    
67     my $signal = sub {
68     my($sig) = @_;
69     warn "\nCaught a SIG$sig--syncing database and shutting down\n";
70     $db->sync();
71     exit(0);
72     };
73    
74     $SIG{'INT'} = $signal;
75     $SIG{'QUIT'} = $signal;
76    
77     $self->{'iconv'} = new Text::Iconv('iso-8859-2', 'utf-8') || croak "can't create iconv";
78    
79     return $self;
80     }
81    
82     =head2 insert
83    
84     Insert data into index
85    
86     $index->insert(
87     index_name => 'index_name',
88     path => 'path',
89     headline => 'headline text',
90     words => 'words to insert into index'
91     );
92    
93     =cut
94    
95     sub insert {
96     my $self = shift;
97    
98     my $args = {@_};
99    
100     my $log = $self->_get_logger();
101    
102     confess "need index name" unless ($args->{'index_name'});
103     confess "need path" unless ($args->{'path'});
104     if (! $args->{'headline'}) {
105     carp "no headline for ",$args->{'path'}," ?";
106     $args->{'headline'} = "no headline: ".$args->{'path'};
107     }
108     if (! defined($args->{'words'})) {
109     $log->warn("no words to insert for headline ",$args->{'headline'});
110     return;
111     }
112    
113     my $words = lc($args->{'words'});
114    
115     # chop leading and trailing spaces
116     $words =~ s/^\s+//;
117     $words =~ s/\s+$//;
118    
119     my $url = 'file:///' . $args->{'path'};
120    
121     my $doc = HyperEstraier::Document->new;
122     my $iconv = $self->{'iconv'} || croak "no iconv?";
123    
124     $doc->add_attr('@uri', "file:///" . $args->{'path'});
125     $doc->add_attr('@title', $iconv->convert($args->{'headline'}) );
126     $doc->add_attr('@size', length($words));
127     $doc->add_attr('@mtime', time());
128    
129     $doc->add_attr('index_name', $iconv->convert( $args->{'index_name'}) );
130    
131     $doc->add_text( $iconv->convert($words) );
132    
133     $log->debug("draft:\n", $doc->dump_draft);
134    
135     $db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
136    
137     }
138    
139     =head2 close
140    
141     This method will close indexes and sync to disk.
142    
143     $index->close;
144    
145     =cut
146    
147     sub close {
148     my $self = shift;
149    
150     my $log = $self->_get_logger();
151    
152     $log->debug("sync database");
153    
154     $db->sync();
155    
156     $log->debug("optimize database");
157    
158     $db->optimize(0);
159    
160     }
161    
162     #
163    
164     =head1 INTERNAL METHODS
165    
166     You shouldn't call this methods directly.
167    
168     =head2 _get_logger
169    
170     Get C<Log::Log4perl> object with a twist: domains are defined for each
171     method
172    
173     my $log = $webpac->_get_logger();
174    
175     =cut
176    
177     sub _get_logger {
178     my $self = shift;
179    
180     my $name = (caller(1))[3] || caller;
181     return get_logger($name);
182     }
183     1;

  ViewVC Help
Powered by ViewVC 1.1.26