/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 370 - (show annotations)
Thu Jun 17 17:25:12 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 12956 byte(s)
method output using Template Toolkit to produce output

1 package WebPAC;
2
3 use warnings;
4 use strict;
5
6 use Carp;
7 use Text::Iconv;
8 use Config::IniFiles;
9 use XML::Simple;
10 use Template;
11
12 use Data::Dumper;
13
14 =head1 NAME
15
16 WebPAC - base class for WebPAC
17
18 =head1 DESCRIPTION
19
20 This module implements methods used by WebPAC.
21
22 =head1 METHODS
23
24 =head2 new
25
26 This will create new instance of WebPAC using configuration specified by C<config_file>.
27
28 my $webpac = new WebPAC(
29 config_file => 'name.conf',
30 [code_page => 'ISO-8859-2',]
31 );
32
33 Default C<code_page> is C<ISO-8859-2>.
34
35 It will also read configuration files
36 C<global.conf> (used by indexer and Web font-end)
37 and configuration file specified by C<config_file>
38 which describes databases to be indexed.
39
40 =cut
41
42 # mapping between data type and tag which specify
43 # format in XML file
44 my %type2tag = (
45 'isis' => 'isis',
46 # 'excel' => 'column',
47 # 'marc' => 'marc',
48 # 'feed' => 'feed'
49 );
50
51 sub new {
52 my $class = shift;
53 my $self = {@_};
54 bless($self, $class);
55
56 # fill in default values
57 # output codepage
58 $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
59
60 #
61 # read global.conf
62 #
63
64 my $config = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
65
66 # read global config parametars
67 foreach my $var (qw(
68 dbi_dbd
69 dbi_dsn
70 dbi_user
71 dbi_passwd
72 show_progress
73 my_unac_filter
74 output_template
75 )) {
76 $self->{'global_config'}->{$var} = $config->val('global', $var);
77 }
78
79 #
80 # read indexer config file
81 #
82
83 $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
84
85 # create UTF-8 convertor for import_xml files
86 $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
87
88 # create Template toolkit instance
89 $self->{'tt'} = Template->new(
90 INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} || './output_template'),
91 # FILTERS => {
92 # 'foo' => \&foo_filter,
93 # },
94 EVAL_PERL => 1,
95 );
96
97 return $self;
98 }
99
100 =head2 open_isis
101
102 Open CDS/ISIS database using OpenIsis module and read all records to memory.
103
104 $webpac->open_isis(
105 filename => '/data/ISIS/ISIS',
106 code_page => '852',
107 limit_mfn => '500',
108 lookup => [ ... ],
109 );
110
111 By default, ISIS code page is assumed to be C<852>.
112
113 If optional parametar C<limit_mfn> is set, it will read just 500 records
114 from database in example above.
115
116 C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
117 C<val>. Optional parametar C<eval> is perl code to evaluate before storing
118 value in index.
119
120 lookup => [
121 { 'key' => 'd:v900', 'val' => 'v250^a' },
122 { 'eval' => '"v901^a" eq "Podruèje"',
123 'key' => 'pa:v561^4:v562^4:v461^1',
124 'val' => 'v900' },
125 ]
126
127 Returns number of last record read into memory (size of database, really).
128
129 =cut
130
131 sub open_isis {
132 my $self = shift;
133 my $arg = {@_};
134
135 croak "need filename" if (! $arg->{'filename'});
136 my $code_page = $arg->{'code_page'} || '852';
137
138 use OpenIsis;
139
140 #$self->{'isis_code_page'} = $code_page;
141
142 # create Text::Iconv object
143 my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
144
145 print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'});
146
147 my $isis_db = OpenIsis::open($arg->{'filename'});
148
149 my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
150
151 $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
152
153 print STDERR "processing $maxmfn records...\n" if ($self->{'debug'});
154
155 # read database
156 for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
157
158 # read record
159 my $row = OpenIsis::read( $isis_db, $mfn );
160 foreach my $k (keys %{$row}) {
161 if ($k ne "mfn") {
162 foreach my $l (@{$row->{$k}}) {
163 $l = $cp->convert($l);
164 # has subfields?
165 my $val;
166 if ($l =~ m/\^/) {
167 foreach my $t (split(/\^/,$l)) {
168 next if (! $t);
169 $val->{substr($t,0,1)} = substr($t,1);
170 }
171 } else {
172 $val = $l;
173 }
174
175 push @{$self->{'data'}->{$mfn}->{$k}}, $val;
176 }
177 }
178
179 }
180
181 # create lookup
182 my $rec = $self->{'data'}->{$mfn};
183 $self->create_lookup($rec, @{$arg->{'lookup'}});
184
185 }
186
187 $self->{'current_mfn'} = 1;
188
189 # store max mfn and return it.
190 return $self->{'max_mfn'} = $maxmfn;
191 }
192
193 =head2 fetch_rec
194
195 Fetch next record from database. It will also display progress bar (once
196 it's implemented, that is).
197
198 my $rec = $webpac->fetch_rec;
199
200 =cut
201
202 sub fetch_rec {
203 my $self = shift;
204
205 my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!";
206
207 if ($mfn > $self->{'max_mfn'}) {
208 $self->{'current_mfn'} = $self->{'max_mfn'};
209 return;
210 }
211
212 return $self->{'data'}->{$mfn};
213 }
214
215 =head2 open_import_xml
216
217 Read file from C<import_xml/> directory and parse it.
218
219 $webpac->open_import_xml(type => 'isis');
220
221 =cut
222
223 sub open_import_xml {
224 my $self = shift;
225
226 my $arg = {@_};
227 confess "need type to load file from import_xml/" if (! $arg->{'type'});
228
229 $self->{'type'} = $arg->{'type'};
230
231 my $type_base = $arg->{'type'};
232 $type_base =~ s/_.*$//g;
233
234 $self->{'tag'} = $type2tag{$type_base};
235
236 print STDERR "using type ",$self->{'type'}," tag ",$self->{'tag'},"\n" if ($self->{'debug'});
237
238 my $f = "./import_xml/".$self->{'type'}.".xml";
239 confess "import_xml file '$f' doesn't exist!" if (! -e "$f");
240
241 print STDERR "reading '$f'\n" if ($self->{'debug'});
242
243 $self->{'import_xml'} = XMLin($f,
244 ForceArray => [ $self->{'tag'}, 'config', 'format' ],
245 ForceContent => 1
246 );
247
248 }
249
250 =head2 create_lookup
251
252 Create lookup from record using lookup definition.
253
254 $self->create_lookup($rec, @lookups);
255
256 Called internally by C<open_*> methods.
257
258 =cut
259
260 sub create_lookup {
261 my $self = shift;
262
263 my $rec = shift || confess "need record to create lookup";
264 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
265
266 foreach my $i (@_) {
267 if ($i->{'eval'}) {
268 my $eval = $self->fill_in($rec,$i->{'eval'});
269 my $key = $self->fill_in($rec,$i->{'key'});
270 my @val = $self->fill_in($rec,$i->{'val'});
271 if ($key && @val && eval $eval) {
272 push @{$self->{'lookup'}->{$key}}, @val;
273 }
274 } else {
275 my $key = $self->fill_in($rec,$i->{'key'});
276 my @val = $self->fill_in($rec,$i->{'val'});
277 if ($key && @val) {
278 push @{$self->{'lookup'}->{$key}}, @val;
279 }
280 }
281 }
282 }
283
284 =head2 get_data
285
286 Returns value from record.
287
288 my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);
289
290 Arguments are:
291 record reference C<$rec>,
292 field C<$f>,
293 optional subfiled C<$sf>,
294 index for repeatable values C<$i>.
295
296 Optinal variable C<$found> will be incremeted if there
297 is field.
298
299 Returns value or empty string.
300
301 =cut
302
303 sub get_data {
304 my $self = shift;
305
306 my ($rec,$f,$sf,$i,$found) = @_;
307
308 if ($$rec->{$f}) {
309 return '' if (! $$rec->{$f}->[$i]);
310 if ($sf && $$rec->{$f}->[$i]->{$sf}) {
311 $$found++ if (defined($$found));
312 return $$rec->{$f}->[$i]->{$sf};
313 } elsif ($$rec->{$f}->[$i]) {
314 $$found++ if (defined($$found));
315 # it still might have subfield, just
316 # not specified, so we'll dump all
317 if ($$rec->{$f}->[$i] =~ /HASH/o) {
318 my $out;
319 foreach my $k (keys %{$$rec->{$f}->[$i]}) {
320 $out .= $$rec->{$f}->[$i]->{$k}." ";
321 }
322 return $out;
323 } else {
324 return $$rec->{$f}->[$i];
325 }
326 }
327 } else {
328 return '';
329 }
330 }
331
332 =head2 fill_in
333
334 Workhourse of all: takes record from in-memory structure of database and
335 strings with placeholders and returns string or array of with substituted
336 values from record.
337
338 my $text = $webpac->fill_in($rec,'v250^a');
339
340 Optional argument is ordinal number for repeatable fields. By default,
341 it's assume to be first repeatable field (fields are perl array, so first
342 element is 0).
343 Following example will read second value from repeatable field.
344
345 my $text = $webpac->fill_in($rec,'Title: v250^a',1);
346
347 This function B<does not> perform parsing of format to inteligenty skip
348 delimiters before fields which aren't used.
349
350 =cut
351
352 sub fill_in {
353 my $self = shift;
354
355 my $rec = shift || confess "need data record";
356 my $format = shift || confess "need format to parse";
357 # iteration (for repeatable fields)
358 my $i = shift || 0;
359
360 # FIXME remove for speedup?
361 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
362
363 my $found = 0;
364
365 my $eval_code;
366 # remove eval{...} from beginning
367 $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
368
369 # do actual replacement of placeholders
370 $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
371
372 if ($found) {
373 if ($eval_code) {
374 my $eval = $self->fill_in($rec,$eval_code,$i);
375 return if (! eval $eval);
376 }
377 # do we have lookups?
378 if ($format =~ /\[[^\[\]]+\]/o) {
379 return $self->lookup($format);
380 } else {
381 return $format;
382 }
383 } else {
384 return;
385 }
386 }
387
388 =head2 lookup
389
390 Perform lookups on format supplied to it.
391
392 my $text = $self->lookup('[v900]');
393
394 Lookups can be nested (like C<[d:[a:[v900]]]>).
395
396 =cut
397
398 sub lookup {
399 my $self = shift;
400
401 my $tmp = shift || confess "need format";
402
403 if ($tmp =~ /\[[^\[\]]+\]/o) {
404 my @in = ( $tmp );
405 my @out;
406 while (my $f = shift @in) {
407 if ($f =~ /\[([^\[\]]+)\]/) {
408 my $k = $1;
409 if ($self->{'lookup'}->{$k}) {
410 foreach my $nv (@{$self->{'lookup'}->{$k}}) {
411 my $tmp2 = $f;
412 $tmp2 =~ s/\[$k\]/$nv/g;
413 push @in, $tmp2;
414 }
415 } else {
416 undef $f;
417 }
418 } elsif ($f) {
419 push @out, $f;
420 }
421 }
422 return @out;
423 } else {
424 return $tmp;
425 }
426 }
427
428 =head2 parse
429
430 Perform smart parsing of string, skipping delimiters for fields which aren't
431 defined. It can also eval code in format starting with C<eval{...}> and
432 return output or nothing depending on eval code.
433
434 my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
435
436 =cut
437
438 sub parse {
439 my $self = shift;
440
441 my ($rec, $format_utf8, $i) = @_;
442
443 return if (! $format_utf8);
444
445 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
446 confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
447
448 $i = 0 if (! $i);
449
450 my $format = $self->{'utf2cp'}->convert($format_utf8) || confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
451
452 my @out;
453
454 my $eval_code;
455 # remove eval{...} from beginning
456 $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
457
458 my $prefix;
459 my $all_found=0;
460
461 while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {
462
463 my $del = $1 || '';
464 $prefix ||= $del if ($all_found == 0);
465
466 my $found = 0;
467 my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
468
469 if ($found) {
470 push @out, $del;
471 push @out, $tmp;
472 $all_found += $found;
473 }
474 }
475
476 return if (! $all_found);
477
478 my $out = join('',@out) . $format;
479
480 # add prefix if not there
481 $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
482
483 if ($eval_code) {
484 my $eval = $self->fill_in($rec,$eval_code,$i);
485 return if (! eval $eval);
486 }
487
488 return $out;
489 }
490
491 =head2 parse_to_arr
492
493 Similar to C<parse>, but returns array of all repeatable fields
494
495 my @arr = $webpac->parse_to_arr($rec,'v250^a');
496
497 =cut
498
499 sub parse_to_arr {
500 my $self = shift;
501
502 my ($rec, $format_utf8) = @_;
503
504 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
505 return if (! $format_utf8);
506
507 my $i = 0;
508 my @arr;
509
510 while (my $v = $self->parse($rec,$format_utf8,$i++)) {
511 push @arr, $v;
512 }
513
514 return @arr;
515 }
516
517 =head2 data_structure
518
519 Create in-memory data structure which represents layout from C<import_xml>.
520 It is used later to produce output.
521
522 my @ds = $webpac->data_structure($rec);
523
524 =cut
525
526 # private method _sort_by_order
527 # sort subrouting using order="" attribute
528 sub _sort_by_order {
529 my $self = shift;
530
531 my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
532 $self->{'import_xml'}->{'indexer'}->{$a};
533 my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
534 $self->{'import_xml'}->{'indexer'}->{$b};
535
536 return $va <=> $vb;
537 }
538
539 sub data_structure {
540 my $self = shift;
541
542 my $rec = shift;
543 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
544
545 my @sorted_tags;
546 if ($self->{tags_by_order}) {
547 @sorted_tags = @{$self->{tags_by_order}};
548 } else {
549 @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
550 $self->{tags_by_order} = \@sorted_tags;
551 }
552
553 my @ds;
554
555 foreach my $field (@sorted_tags) {
556
557 my $row;
558
559 #print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
560
561 foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
562 my @v = $self->parse_to_arr($rec,$tag->{'content'});
563
564 next if (! @v);
565
566 # does tag have type?
567 if ($tag->{'type'}) {
568 push @{$row->{$tag->{'type'}}}, @v;
569 } else {
570 push @{$row->{'display'}}, @v;
571 push @{$row->{'swish'}}, @v;
572 }
573 }
574
575 if ($row) {
576 $row->{'tag'} = $field;
577 push @ds, $row;
578 }
579
580 }
581
582 return @ds;
583
584 }
585
586 =head2 output
587
588 Create output from in-memory data structure using Template Toolkit template.
589
590 my $text = $webpac->output( template => 'text.tt', data => @ds );
591
592 =cut
593
594 sub output {
595 my $self = shift;
596
597 my $args = {@_};
598
599 confess("need template name") if (! $args->{'template'});
600 confess("need data array") if (! $args->{'data'});
601
602 my $out;
603
604 $self->{'tt'}->process(
605 $args->{'template'},
606 $args,
607 \$out
608 ) || confess $self->{'tt'}->error();
609
610 return $out;
611 }
612
613 1;

  ViewVC Help
Powered by ViewVC 1.1.26