/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 362 - (show annotations)
Wed Jun 16 16:50:30 2004 UTC (15 years, 7 months ago) by dpavlin
File size: 8795 byte(s)
fetch_rec method

1 package WebPAC;
2
3 use Carp;
4 use Text::Iconv;
5 use Config::IniFiles;
6
7 use Data::Dumper;
8
9 =head1 NAME
10
11 WebPAC - base class for WebPAC
12
13 =head1 DESCRIPTION
14
15 This module implements methods used by WebPAC.
16
17 =head1 METHODS
18
19 =head2 new
20
21 This will create new instance of WebPAC using configuration specified by C<config_file>.
22
23 my $webpac = new WebPAC(
24 config_file => 'name.conf',
25 [code_page => 'ISO-8859-2',]
26 );
27
28 Default C<code_page> is C<ISO-8859-2>.
29
30 It will also read configuration files
31 C<global.conf> (used by indexer and Web font-end)
32 and configuration file specified by C<config_file>
33 which describes databases to be indexed.
34
35 =cut
36
37 sub new {
38 my $class = shift;
39 my $self = {@_};
40 bless($self, $class);
41
42 # fill in default values
43 # output codepage
44 $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
45
46 #
47 # read global.conf
48 #
49
50 $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
51
52 # read global config parametars
53 foreach my $var (qw(
54 dbi_dbd
55 dbi_dsn
56 dbi_user
57 dbi_passwd
58 show_progress
59 my_unac_filter
60 )) {
61 $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
62 }
63
64 #
65 # read indexer config file
66 #
67
68 $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
69
70 # read global config parametars
71 foreach my $var (qw(
72 dbi_dbd
73 dbi_dsn
74 dbi_user
75 dbi_passwd
76 show_progress
77 my_unac_filter
78 )) {
79 $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
80 }
81
82 return $self;
83 }
84
85 =head2 open_isis
86
87 Open CDS/ISIS database using OpenIsis module and read all records to memory.
88
89 $webpac->open_isis(
90 filename => '/data/ISIS/ISIS',
91 code_page => '852',
92 limit_mfn => '500',
93 lookup => [ ... ],
94 );
95
96 By default, ISIS code page is assumed to be C<852>.
97
98 If optional parametar C<limit_mfn> is set, it will read just 500 records
99 from database in example above.
100
101 Returns number of last record read into memory (size of database, really).
102
103 C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
104 C<val>. Optional parametar C<eval> is perl code to evaluate before storing
105 value in index.
106
107 lookup => [
108 { 'key' => 'd:v900', 'val' => 'v250^a' },
109 { 'eval' => '"v901^a" eq "Podruèje"',
110 'key' => 'pa:v561^4:v562^4:v461^1',
111 'val' => 'v900' },
112 ]
113
114 =cut
115
116 sub open_isis {
117 my $self = shift;
118 my $arg = {@_};
119
120 croak "need filename" if (! $arg->{'filename'});
121 my $code_page = $arg->{'code_page'} || '852';
122
123 use OpenIsis;
124
125 #$self->{'isis_code_page'} = $code_page;
126
127 # create Text::Iconv object
128 my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
129
130 my $isis_db = OpenIsis::open($arg->{'filename'});
131
132 my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
133
134 $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
135
136 # read database
137 for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
138
139 # read record
140 my $row = OpenIsis::read( $isis_db, $mfn );
141 foreach my $k (keys %{$row}) {
142 if ($k ne "mfn") {
143 foreach my $l (@{$row->{$k}}) {
144 $l = $cp->convert($l);
145 # has subfields?
146 my $val;
147 if ($l =~ m/\^/) {
148 foreach my $t (split(/\^/,$l)) {
149 next if (! $t);
150 $val->{substr($t,0,1)} = substr($t,1);
151 }
152 } else {
153 $val = $l;
154 }
155
156 push @{$self->{'data'}->{$mfn}->{$k}}, $val;
157 }
158 }
159
160 }
161
162 # create lookup
163 my $rec = $self->{'data'}->{$mfn};
164 $self->create_lookup($rec, @{$arg->{'lookup'}});
165
166 }
167
168 $self->{'current_mfn'} = 1;
169
170 # store max mfn and return it.
171 return $self->{'max_mfn'} = $maxmfn;
172 }
173
174 =head2 fetch_rec
175
176 Fetch next record from database. It will also display progress bar (once
177 it's implemented, that is).
178
179 my $rec = $webpac->fetch_rec;
180
181 =cut
182
183 sub fetch_rec {
184 my $self = shift;
185
186 my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!";
187
188 if ($mfn > $self->{'max_mfn'}) {
189 $self->{'current_mfn'} = $self->{'max_mfn'};
190 return;
191 }
192
193 return $self->{'data'}->{$mfn};
194 }
195
196 =head2 create_lookup
197
198 Create lookup from record using lookup definition.
199
200 =cut
201
202 sub create_lookup {
203 my $self = shift;
204
205 my $rec = shift || confess "need record to create lookup";
206 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
207
208 foreach my $i (@_) {
209 if ($i->{'eval'}) {
210 my $eval = $self->fill_in($rec,$i->{'eval'});
211 my $key = $self->fill_in($rec,$i->{'key'});
212 my @val = $self->fill_in($rec,$i->{'val'});
213 if ($key && @val && eval $eval) {
214 push @{$self->{'lookup'}->{$key}}, @val;
215 }
216 } else {
217 my $key = $self->fill_in($rec,$i->{'key'});
218 my @val = $self->fill_in($rec,$i->{'val'});
219 if ($key && @val) {
220 push @{$self->{'lookup'}->{$key}}, @val;
221 }
222 }
223 }
224 }
225
226 =head2 get_data
227
228 Returns value from record.
229
230 $self->get_data(\$rec,$f,$sf,$i,\$found);
231
232 Arguments are:
233 record reference C<$rec>,
234 field C<$f>,
235 optional subfiled C<$sf>,
236 index for repeatable values C<$i>.
237
238 Optinal variable C<$found> will be incremeted if thre
239 is field.
240
241 Returns value or empty string.
242
243 =cut
244
245 sub get_data {
246 my $self = shift;
247
248 my ($rec,$f,$sf,$i,$found) = @_;
249 if ($$rec->{$f}) {
250 if ($sf && $$rec->{$f}->[$i]->{$sf}) {
251 $$found++ if (defined($$found));
252 return $$rec->{$f}->[$i]->{$sf};
253 } elsif ($$rec->{$f}->[$i]) {
254 $$found++ if (defined($$found));
255 return $$rec->{$f}->[$i];
256 }
257 } else {
258 return '';
259 }
260 }
261
262 =head2 fill_in
263
264 Workhourse of all: takes record from in-memory structure of database and
265 strings with placeholders and returns string or array of with substituted
266 values from record.
267
268 $webpac->fill_in($rec,'v250^a');
269
270 Optional argument is ordinal number for repeatable fields. By default,
271 it's assume to be first repeatable field (fields are perl array, so first
272 element is 0).
273 Following example will read second value from repeatable field.
274
275 $webpac->fill_in($rec,'Title: v250^a',1);
276
277 This function B<does not> perform parsing of format to inteligenty skip
278 delimiters before fields which aren't used.
279
280 =cut
281
282 sub fill_in {
283 my $self = shift;
284
285 my $rec = shift || confess "need data record";
286 my $format = shift || confess "need format to parse";
287 # iteration (for repeatable fields)
288 my $i = shift || 0;
289
290 # FIXME remove for speedup?
291 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
292
293 my $found = 0;
294
295 my $eval_code;
296 # remove eval{...} from beginning
297 $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
298
299 # do actual replacement of placeholders
300 $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
301
302 if ($found) {
303 if ($eval_code) {
304 my $eval = $self->fill_in($rec,$eval_code,$i);
305 return if (! eval $eval);
306 }
307 # do we have lookups?
308 if ($format =~ /\[[^\[\]]+\]/o) {
309 return $self->lookup($format);
310 } else {
311 return $format;
312 }
313 } else {
314 return;
315 }
316 }
317
318 =head2 lookup
319
320 Perform lookups on format supplied to it.
321
322 my $txt = $self->lookup('[v900]');
323
324 Lookups can be nested (like C<[d:[a:[v900]]]>).
325
326 =cut
327
328 sub lookup {
329 my $self = shift;
330
331 my $tmp = shift || confess "need format";
332
333 if ($tmp =~ /\[[^\[\]]+\]/o) {
334 my @in = ( $tmp );
335 #print "##lookup $tmp\n";
336 my @out;
337 while (my $f = shift @in) {
338 if ($f =~ /\[([^\[\]]+)\]/) {
339 my $k = $1;
340 if ($self->{'lookup'}->{$k}) {
341 #print "## lookup key = $k\n";
342 foreach my $nv (@{$self->{'lookup'}->{$k}}) {
343 my $tmp2 = $f;
344 $tmp2 =~ s/\[$k\]/$nv/g;
345 push @in, $tmp2;
346 #print "## lookup in => $tmp2\n";
347 }
348 } else {
349 undef $f;
350 }
351 } elsif ($f) {
352 push @out, $f;
353 #print "## lookup out => $f\n";
354 }
355 }
356 return @out;
357 } else {
358 return $tmp;
359 }
360 }
361
362 =head2 parse
363
364 Perform smart parsing of string, skipping delimiters for fields which aren't
365 defined. It can also eval code in format starting with C<eval{...}> and
366 return output or nothing depending on eval code.
367
368 $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
369
370 =cut
371
372 sub parse {
373 my $self = shift;
374
375 my ($rec, $format, $i) = @_;
376
377 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
378
379 $i = 0 if (! $i);
380
381 my @out;
382
383 my $eval_code;
384 # remove eval{...} from beginning
385 $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
386
387 my $prefix;
388 my $all_found=0;
389
390 #print "## $format\n";
391 while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {
392 #print "## [ $1 | $2 | $3 ] $format\n";
393
394 my $del = $1 || '';
395 $prefix ||= $del if ($all_found == 0);
396
397 my $found = 0;
398 my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
399
400 if ($found) {
401 push @out, $del;
402 push @out, $tmp;
403 $all_found += $found;
404 }
405 }
406
407 return if (! $all_found);
408
409 my $out = join('',@out) . $format;
410
411 # add prefix if not there
412 $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
413
414 if ($eval_code) {
415 my $eval = $self->fill_in($rec,$eval_code,$i);
416 return if (! eval $eval);
417 }
418
419 return $out;
420 }
421
422 1;

  ViewVC Help
Powered by ViewVC 1.1.26