/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 357 - (show annotations)
Wed Jun 16 13:39:17 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 7801 byte(s)
implement limit_mfn

1 package WebPAC;
2
3 use Carp;
4 use Text::Iconv;
5 use Config::IniFiles;
6
7 =head1 NAME
8
9 WebPAC - base class for WebPAC
10
11 =head1 DESCRIPTION
12
13 This module implements methods used by WebPAC.
14
15 =head1 METHODS
16
17 =head2 new
18
19 This will create new instance of WebPAC using configuration specified by C<config_file>.
20
21 my $webpac = new WebPAC(
22 config_file => 'name.conf',
23 [code_page => 'ISO-8859-2',]
24 );
25
26 Default C<code_page> is C<ISO-8859-2>.
27
28 It will also read configuration files
29 C<global.conf> (used by indexer and Web font-end)
30 and configuration file specified by C<config_file>
31 which describes databases to be indexed.
32
33 =cut
34
35 sub new {
36 my $class = shift;
37 my $self = {@_};
38 bless($self, $class);
39
40 # fill in default values
41 # output codepage
42 $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
43
44 #
45 # read global.conf
46 #
47
48 $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
49
50 # read global config parametars
51 foreach my $var (qw(
52 dbi_dbd
53 dbi_dsn
54 dbi_user
55 dbi_passwd
56 show_progress
57 my_unac_filter
58 )) {
59 $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
60 }
61
62 #
63 # read indexer config file
64 #
65
66 $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
67
68 # read global config parametars
69 foreach my $var (qw(
70 dbi_dbd
71 dbi_dsn
72 dbi_user
73 dbi_passwd
74 show_progress
75 my_unac_filter
76 )) {
77 $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
78 }
79
80 return $self;
81 }
82
83 =head2 open_isis
84
85 Open CDS/ISIS database using OpenIsis module and read all records to memory.
86
87 $webpac->open_isis(
88 filename => '/data/ISIS/ISIS',
89 code_page => '852',
90 limit_mfn => '500',
91 lookup => [ ... ],
92 );
93
94 By default, ISIS code page is assumed to be C<852>.
95
96 If optional parametar C<limit_mfn> is set, it will read just 500 records
97 from database in example above.
98
99 Returns number of last record read into memory (size of database, really).
100
101 C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
102 C<val>. Optional parametar C<eval> is perl code to evaluate before storing
103 value in index.
104
105 lookup => [
106 { 'key' => 'd:v900', 'val' => 'v250^a' },
107 { 'eval' => '"v901^a" eq "Podruèje"',
108 'key' => 'pa:v561^4:v562^4:v461^1',
109 'val' => 'v900' },
110 ]
111
112 =cut
113
114 sub open_isis {
115 my $self = shift;
116 my $arg = {@_};
117
118 croak "need filename" if (! $arg->{'filename'});
119 my $code_page = $arg->{'code_page'} || '852';
120
121 use OpenIsis;
122
123 #$self->{'isis_code_page'} = $code_page;
124
125 # create Text::Iconv object
126 my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
127
128 my $isis_db = OpenIsis::open($arg->{'filename'});
129
130 my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
131
132 $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
133
134 # read database
135 for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
136
137 # read record
138 my $row = OpenIsis::read( $isis_db, $mfn );
139 foreach my $k (keys %{$row}) {
140 if ($k ne "mfn") {
141 foreach my $l (@{$row->{$k}}) {
142 $l = $cp->convert($l);
143 # has subfields?
144 my $val;
145 if ($l =~ m/\^/) {
146 foreach my $t (split(/\^/,$l)) {
147 next if (! $t);
148 $val->{substr($t,0,1)} = substr($t,1);
149 }
150 } else {
151 $val = $l;
152 }
153
154 push @{$self->{'data'}->{$mfn}->{$k}}, $val;
155 }
156 }
157
158 }
159
160 # create lookup
161 my $rec = $self->{'data'}->{$mfn};
162 $self->create_lookup($rec, @{$arg->{'lookup'}});
163
164 }
165
166 # store max mfn and return it.
167 return $self->{'max_mfn'} = $maxmfn;
168 }
169
170 =head2 create_lookup
171
172 Create lookup from record using lookup definition.
173
174 =cut
175
176 sub create_lookup {
177 my $self = shift;
178
179 my $rec = shift || confess "need record to create lookup";
180 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
181
182 foreach my $i (@_) {
183 if ($i->{'eval'}) {
184 my $eval = $self->fill_in($rec,$i->{'eval'});
185 my $key = $self->fill_in($rec,$i->{'key'});
186 my @val = $self->fill_in($rec,$i->{'val'});
187 if ($key && @val && eval $eval) {
188 push @{$self->{'lookup'}->{$key}}, @val;
189 }
190 } else {
191 my $key = $self->fill_in($rec,$i->{'key'});
192 my @val = $self->fill_in($rec,$i->{'val'});
193 if ($key && @val) {
194 push @{$self->{'lookup'}->{$key}}, @val;
195 }
196 }
197 }
198 }
199
200 =head2 get_data
201
202 Returns value from record.
203
204 $self->get_data(\$rec,$f,$sf,$i,\$found);
205
206 Arguments are:
207 record reference C<$rec>,
208 field C<$f>,
209 optional subfiled C<$sf>,
210 index for repeatable values C<$i>.
211
212 Optinal variable C<$found> will be incremeted if thre
213 is field.
214
215 Returns value or empty string.
216
217 =cut
218
219 sub get_data {
220 my $self = shift;
221
222 my ($rec,$f,$sf,$i,$found) = @_;
223 if ($$rec->{$f}) {
224 if ($sf && $$rec->{$f}->[$i]->{$sf}) {
225 $$found++ if (defined($$found));
226 return $$rec->{$f}->[$i]->{$sf};
227 } elsif ($$rec->{$f}->[$i]) {
228 $$found++ if (defined($$found));
229 return $$rec->{$f}->[$i];
230 }
231 } else {
232 return '';
233 }
234 }
235
236 =head2 fill_in
237
238 Workhourse of all: takes record from in-memory structure of database and
239 strings with placeholders and returns string or array of with substituted
240 values from record.
241
242 $webpac->fill_in($rec,'v250^a');
243
244 Optional argument is ordinal number for repeatable fields. By default,
245 it's assume to be first repeatable field (fields are perl array, so first
246 element is 0).
247 Following example will read second value from repeatable field.
248
249 $webpac->fill_in($rec,'Title: v250^a',1);
250
251 This function B<does not> perform parsing of format to inteligenty skip
252 delimiters before fields which aren't used.
253
254 =cut
255
256 sub fill_in {
257 my $self = shift;
258
259 my $rec = shift || confess "need data record";
260 my $format = shift || confess "need format to parse";
261 # iteration (for repeatable fields)
262 my $i = shift || 0;
263
264 # FIXME remove for speedup?
265 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
266
267 my $found = 0;
268
269 # do actual replacement of placeholders
270 $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
271
272 if ($found) {
273 # do we have lookups?
274 if ($format =~ /\[[^\[\]]+\]/o) {
275 return $self->lookup($format);
276 } else {
277 return $format;
278 }
279 } else {
280 return;
281 }
282 }
283
284 =head2 lookup
285
286 Perform lookups on format supplied to it.
287
288 my $txt = $self->lookup('[v900]');
289
290 Lookups can be nested (like C<[d:[a:[v900]]]>).
291
292 =cut
293
294 sub lookup {
295 my $self = shift;
296
297 my $tmp = shift || confess "need format";
298
299 if ($tmp =~ /\[[^\[\]]+\]/o) {
300 my @in = ( $tmp );
301 #print "##lookup $tmp\n";
302 my @out;
303 while (my $f = shift @in) {
304 if ($f =~ /\[([^\[\]]+)\]/) {
305 my $k = $1;
306 if ($self->{'lookup'}->{$k}) {
307 #print "## lookup key = $k\n";
308 foreach my $nv (@{$self->{'lookup'}->{$k}}) {
309 my $tmp2 = $f;
310 $tmp2 =~ s/\[$k\]/$nv/g;
311 push @in, $tmp2;
312 #print "## lookup in => $tmp2\n";
313 }
314 } else {
315 undef $f;
316 }
317 } elsif ($f) {
318 push @out, $f;
319 #print "## lookup out => $f\n";
320 }
321 }
322 return @out;
323 } else {
324 return $tmp;
325 }
326 }
327
328 =head2 parse
329
330 Perform smart parsing of string, skipping delimiters for fields which aren't
331 defined. It can also eval code in format starting with C<eval{...}> and
332 return output or nothing depending on eval code.
333
334 $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
335
336 =cut
337
338 sub parse {
339 my $self = shift;
340
341 my ($rec, $format, $i) = @_;
342
343 my @out;
344
345 my $eval_code;
346 # remove eval{...} from beginning
347 $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
348
349 my $prefix = '';
350 $prefix = $1 if ($format =~ s/^(.+)(v\d+(?:\^\w)*)/$2/s);
351
352 sub f_sf_del {
353 my ($self,$rec,$out,$f,$sf,$del,$i) = @_;
354
355 my $found=0;
356 my $tmp = $self->get_data($rec,$f,$sf,$i,\$found);
357 if ($found) {
358 push @{$$out}, $tmp;
359 push @{$$out}, $del;
360 }
361 return '';
362 }
363
364 #$format =~ s/(.*)v(\d+)(?:\^(\w))*/f_sf_del($self,\$rec,\@out,$2,$3,$1,$i/ges;
365
366 print Dumper(@out);
367
368 }
369
370 1;

  ViewVC Help
Powered by ViewVC 1.1.26