/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 370 - (hide annotations)
Thu Jun 17 17:25:12 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 12956 byte(s)
method output using Template Toolkit to produce output

1 dpavlin 354 package WebPAC;
2 dpavlin 352
3 dpavlin 367 use warnings;
4     use strict;
5    
6 dpavlin 352 use Carp;
7 dpavlin 353 use Text::Iconv;
8     use Config::IniFiles;
9 dpavlin 363 use XML::Simple;
10 dpavlin 370 use Template;
11 dpavlin 352
12 dpavlin 358 use Data::Dumper;
13    
14 dpavlin 352 =head1 NAME
15    
16 dpavlin 354 WebPAC - base class for WebPAC
17 dpavlin 352
18     =head1 DESCRIPTION
19    
20 dpavlin 354 This module implements methods used by WebPAC.
21 dpavlin 352
22     =head1 METHODS
23    
24     =head2 new
25    
26 dpavlin 354 This will create new instance of WebPAC using configuration specified by C<config_file>.
27 dpavlin 352
28 dpavlin 354 my $webpac = new WebPAC(
29 dpavlin 352 config_file => 'name.conf',
30     [code_page => 'ISO-8859-2',]
31     );
32    
33     Default C<code_page> is C<ISO-8859-2>.
34    
35 dpavlin 353 It will also read configuration files
36     C<global.conf> (used by indexer and Web font-end)
37     and configuration file specified by C<config_file>
38     which describes databases to be indexed.
39    
40 dpavlin 352 =cut
41    
42 dpavlin 363 # mapping between data type and tag which specify
43     # format in XML file
44     my %type2tag = (
45     'isis' => 'isis',
46     # 'excel' => 'column',
47     # 'marc' => 'marc',
48     # 'feed' => 'feed'
49     );
50    
51 dpavlin 352 sub new {
52     my $class = shift;
53     my $self = {@_};
54     bless($self, $class);
55    
56     # fill in default values
57     # output codepage
58     $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
59    
60 dpavlin 353 #
61     # read global.conf
62     #
63 dpavlin 352
64 dpavlin 370 my $config = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
65 dpavlin 352
66     # read global config parametars
67     foreach my $var (qw(
68     dbi_dbd
69     dbi_dsn
70     dbi_user
71     dbi_passwd
72     show_progress
73     my_unac_filter
74 dpavlin 370 output_template
75 dpavlin 352 )) {
76 dpavlin 370 $self->{'global_config'}->{$var} = $config->val('global', $var);
77 dpavlin 352 }
78    
79 dpavlin 353 #
80     # read indexer config file
81     #
82 dpavlin 352
83     $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
84    
85 dpavlin 370 # create UTF-8 convertor for import_xml files
86 dpavlin 366 $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
87 dpavlin 370
88     # create Template toolkit instance
89     $self->{'tt'} = Template->new(
90     INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} || './output_template'),
91     # FILTERS => {
92     # 'foo' => \&foo_filter,
93     # },
94     EVAL_PERL => 1,
95     );
96    
97 dpavlin 352 return $self;
98     }
99    
100     =head2 open_isis
101    
102     Open CDS/ISIS database using OpenIsis module and read all records to memory.
103    
104     $webpac->open_isis(
105     filename => '/data/ISIS/ISIS',
106     code_page => '852',
107     limit_mfn => '500',
108     lookup => [ ... ],
109     );
110    
111     By default, ISIS code page is assumed to be C<852>.
112    
113 dpavlin 353 If optional parametar C<limit_mfn> is set, it will read just 500 records
114     from database in example above.
115 dpavlin 352
116     C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
117     C<val>. Optional parametar C<eval> is perl code to evaluate before storing
118     value in index.
119    
120     lookup => [
121     { 'key' => 'd:v900', 'val' => 'v250^a' },
122     { 'eval' => '"v901^a" eq "Podruèje"',
123     'key' => 'pa:v561^4:v562^4:v461^1',
124     'val' => 'v900' },
125     ]
126    
127 dpavlin 367 Returns number of last record read into memory (size of database, really).
128    
129 dpavlin 352 =cut
130    
131     sub open_isis {
132     my $self = shift;
133     my $arg = {@_};
134    
135     croak "need filename" if (! $arg->{'filename'});
136     my $code_page = $arg->{'code_page'} || '852';
137    
138 dpavlin 353 use OpenIsis;
139    
140 dpavlin 352 #$self->{'isis_code_page'} = $code_page;
141    
142     # create Text::Iconv object
143     my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
144    
145 dpavlin 363 print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'});
146    
147 dpavlin 352 my $isis_db = OpenIsis::open($arg->{'filename'});
148    
149     my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
150    
151 dpavlin 357 $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
152    
153 dpavlin 363 print STDERR "processing $maxmfn records...\n" if ($self->{'debug'});
154    
155 dpavlin 352 # read database
156     for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
157    
158     # read record
159     my $row = OpenIsis::read( $isis_db, $mfn );
160     foreach my $k (keys %{$row}) {
161     if ($k ne "mfn") {
162     foreach my $l (@{$row->{$k}}) {
163     $l = $cp->convert($l);
164     # has subfields?
165     my $val;
166     if ($l =~ m/\^/) {
167     foreach my $t (split(/\^/,$l)) {
168     next if (! $t);
169     $val->{substr($t,0,1)} = substr($t,1);
170     }
171     } else {
172     $val = $l;
173     }
174    
175     push @{$self->{'data'}->{$mfn}->{$k}}, $val;
176     }
177     }
178    
179     }
180    
181     # create lookup
182 dpavlin 355 my $rec = $self->{'data'}->{$mfn};
183     $self->create_lookup($rec, @{$arg->{'lookup'}});
184 dpavlin 352
185     }
186    
187 dpavlin 362 $self->{'current_mfn'} = 1;
188    
189 dpavlin 352 # store max mfn and return it.
190     return $self->{'max_mfn'} = $maxmfn;
191     }
192    
193 dpavlin 362 =head2 fetch_rec
194    
195     Fetch next record from database. It will also display progress bar (once
196     it's implemented, that is).
197    
198     my $rec = $webpac->fetch_rec;
199    
200     =cut
201    
202     sub fetch_rec {
203     my $self = shift;
204    
205     my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!";
206    
207     if ($mfn > $self->{'max_mfn'}) {
208     $self->{'current_mfn'} = $self->{'max_mfn'};
209     return;
210     }
211    
212     return $self->{'data'}->{$mfn};
213     }
214    
215 dpavlin 363 =head2 open_import_xml
216    
217     Read file from C<import_xml/> directory and parse it.
218    
219     $webpac->open_import_xml(type => 'isis');
220    
221     =cut
222    
223     sub open_import_xml {
224     my $self = shift;
225    
226     my $arg = {@_};
227     confess "need type to load file from import_xml/" if (! $arg->{'type'});
228    
229 dpavlin 366 $self->{'type'} = $arg->{'type'};
230 dpavlin 363
231 dpavlin 366 my $type_base = $arg->{'type'};
232 dpavlin 363 $type_base =~ s/_.*$//g;
233    
234 dpavlin 366 $self->{'tag'} = $type2tag{$type_base};
235    
236     print STDERR "using type ",$self->{'type'}," tag ",$self->{'tag'},"\n" if ($self->{'debug'});
237    
238     my $f = "./import_xml/".$self->{'type'}.".xml";
239 dpavlin 363 confess "import_xml file '$f' doesn't exist!" if (! -e "$f");
240    
241     print STDERR "reading '$f'\n" if ($self->{'debug'});
242    
243     $self->{'import_xml'} = XMLin($f,
244 dpavlin 366 ForceArray => [ $self->{'tag'}, 'config', 'format' ],
245 dpavlin 363 ForceContent => 1
246     );
247    
248     }
249    
250 dpavlin 355 =head2 create_lookup
251    
252     Create lookup from record using lookup definition.
253    
254 dpavlin 367 $self->create_lookup($rec, @lookups);
255    
256     Called internally by C<open_*> methods.
257    
258 dpavlin 355 =cut
259    
260     sub create_lookup {
261     my $self = shift;
262    
263     my $rec = shift || confess "need record to create lookup";
264     confess("need HASH as first argument!") if ($rec !~ /HASH/o);
265    
266     foreach my $i (@_) {
267     if ($i->{'eval'}) {
268     my $eval = $self->fill_in($rec,$i->{'eval'});
269     my $key = $self->fill_in($rec,$i->{'key'});
270     my @val = $self->fill_in($rec,$i->{'val'});
271     if ($key && @val && eval $eval) {
272     push @{$self->{'lookup'}->{$key}}, @val;
273     }
274     } else {
275     my $key = $self->fill_in($rec,$i->{'key'});
276     my @val = $self->fill_in($rec,$i->{'val'});
277     if ($key && @val) {
278     push @{$self->{'lookup'}->{$key}}, @val;
279     }
280     }
281     }
282     }
283    
284 dpavlin 356 =head2 get_data
285    
286     Returns value from record.
287    
288 dpavlin 367 my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);
289 dpavlin 356
290     Arguments are:
291     record reference C<$rec>,
292     field C<$f>,
293     optional subfiled C<$sf>,
294     index for repeatable values C<$i>.
295    
296 dpavlin 367 Optinal variable C<$found> will be incremeted if there
297 dpavlin 356 is field.
298    
299     Returns value or empty string.
300    
301     =cut
302    
303     sub get_data {
304     my $self = shift;
305    
306     my ($rec,$f,$sf,$i,$found) = @_;
307 dpavlin 367
308 dpavlin 356 if ($$rec->{$f}) {
309 dpavlin 367 return '' if (! $$rec->{$f}->[$i]);
310 dpavlin 356 if ($sf && $$rec->{$f}->[$i]->{$sf}) {
311     $$found++ if (defined($$found));
312     return $$rec->{$f}->[$i]->{$sf};
313     } elsif ($$rec->{$f}->[$i]) {
314     $$found++ if (defined($$found));
315 dpavlin 366 # it still might have subfield, just
316     # not specified, so we'll dump all
317     if ($$rec->{$f}->[$i] =~ /HASH/o) {
318     my $out;
319     foreach my $k (keys %{$$rec->{$f}->[$i]}) {
320     $out .= $$rec->{$f}->[$i]->{$k}." ";
321     }
322     return $out;
323     } else {
324     return $$rec->{$f}->[$i];
325     }
326 dpavlin 356 }
327     } else {
328     return '';
329     }
330     }
331    
332 dpavlin 352 =head2 fill_in
333    
334     Workhourse of all: takes record from in-memory structure of database and
335     strings with placeholders and returns string or array of with substituted
336     values from record.
337    
338 dpavlin 367 my $text = $webpac->fill_in($rec,'v250^a');
339 dpavlin 352
340     Optional argument is ordinal number for repeatable fields. By default,
341 dpavlin 353 it's assume to be first repeatable field (fields are perl array, so first
342     element is 0).
343     Following example will read second value from repeatable field.
344 dpavlin 352
345 dpavlin 367 my $text = $webpac->fill_in($rec,'Title: v250^a',1);
346 dpavlin 353
347     This function B<does not> perform parsing of format to inteligenty skip
348     delimiters before fields which aren't used.
349    
350 dpavlin 352 =cut
351    
352     sub fill_in {
353     my $self = shift;
354    
355     my $rec = shift || confess "need data record";
356     my $format = shift || confess "need format to parse";
357     # iteration (for repeatable fields)
358     my $i = shift || 0;
359    
360     # FIXME remove for speedup?
361 dpavlin 355 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
362 dpavlin 352
363     my $found = 0;
364    
365 dpavlin 359 my $eval_code;
366     # remove eval{...} from beginning
367     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
368    
369 dpavlin 352 # do actual replacement of placeholders
370 dpavlin 356 $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
371 dpavlin 352
372 dpavlin 353 if ($found) {
373 dpavlin 359 if ($eval_code) {
374     my $eval = $self->fill_in($rec,$eval_code,$i);
375     return if (! eval $eval);
376     }
377 dpavlin 353 # do we have lookups?
378     if ($format =~ /\[[^\[\]]+\]/o) {
379     return $self->lookup($format);
380     } else {
381     return $format;
382     }
383 dpavlin 352 } else {
384     return;
385     }
386     }
387    
388     =head2 lookup
389    
390 dpavlin 355 Perform lookups on format supplied to it.
391 dpavlin 352
392 dpavlin 367 my $text = $self->lookup('[v900]');
393 dpavlin 352
394 dpavlin 355 Lookups can be nested (like C<[d:[a:[v900]]]>).
395    
396 dpavlin 352 =cut
397    
398     sub lookup {
399     my $self = shift;
400    
401     my $tmp = shift || confess "need format";
402    
403 dpavlin 353 if ($tmp =~ /\[[^\[\]]+\]/o) {
404 dpavlin 352 my @in = ( $tmp );
405     my @out;
406     while (my $f = shift @in) {
407     if ($f =~ /\[([^\[\]]+)\]/) {
408     my $k = $1;
409     if ($self->{'lookup'}->{$k}) {
410     foreach my $nv (@{$self->{'lookup'}->{$k}}) {
411     my $tmp2 = $f;
412     $tmp2 =~ s/\[$k\]/$nv/g;
413     push @in, $tmp2;
414     }
415     } else {
416     undef $f;
417     }
418     } elsif ($f) {
419     push @out, $f;
420     }
421     }
422     return @out;
423     } else {
424     return $tmp;
425     }
426     }
427    
428 dpavlin 356 =head2 parse
429    
430     Perform smart parsing of string, skipping delimiters for fields which aren't
431     defined. It can also eval code in format starting with C<eval{...}> and
432     return output or nothing depending on eval code.
433    
434 dpavlin 367 my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
435 dpavlin 356
436     =cut
437    
438     sub parse {
439     my $self = shift;
440    
441 dpavlin 366 my ($rec, $format_utf8, $i) = @_;
442 dpavlin 356
443 dpavlin 366 return if (! $format_utf8);
444    
445 dpavlin 358 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
446 dpavlin 366 confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
447 dpavlin 358
448     $i = 0 if (! $i);
449    
450 dpavlin 366 my $format = $self->{'utf2cp'}->convert($format_utf8) || confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
451    
452 dpavlin 356 my @out;
453    
454     my $eval_code;
455     # remove eval{...} from beginning
456     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
457    
458 dpavlin 358 my $prefix;
459     my $all_found=0;
460 dpavlin 356
461 dpavlin 358 while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {
462 dpavlin 356
463 dpavlin 358 my $del = $1 || '';
464 dpavlin 359 $prefix ||= $del if ($all_found == 0);
465 dpavlin 358
466     my $found = 0;
467     my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
468    
469 dpavlin 356 if ($found) {
470 dpavlin 358 push @out, $del;
471     push @out, $tmp;
472     $all_found += $found;
473 dpavlin 356 }
474     }
475    
476 dpavlin 358 return if (! $all_found);
477 dpavlin 356
478 dpavlin 358 my $out = join('',@out) . $format;
479    
480     # add prefix if not there
481     $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
482 dpavlin 367
483 dpavlin 359 if ($eval_code) {
484     my $eval = $self->fill_in($rec,$eval_code,$i);
485     return if (! eval $eval);
486     }
487    
488 dpavlin 358 return $out;
489 dpavlin 356 }
490    
491 dpavlin 367 =head2 parse_to_arr
492    
493     Similar to C<parse>, but returns array of all repeatable fields
494    
495     my @arr = $webpac->parse_to_arr($rec,'v250^a');
496    
497     =cut
498    
499     sub parse_to_arr {
500     my $self = shift;
501    
502     my ($rec, $format_utf8) = @_;
503    
504     confess("need HASH as first argument!") if ($rec !~ /HASH/o);
505     return if (! $format_utf8);
506    
507     my $i = 0;
508     my @arr;
509    
510     while (my $v = $self->parse($rec,$format_utf8,$i++)) {
511     push @arr, $v;
512     }
513    
514     return @arr;
515     }
516    
517 dpavlin 366 =head2 data_structure
518    
519     Create in-memory data structure which represents layout from C<import_xml>.
520     It is used later to produce output.
521    
522 dpavlin 368 my @ds = $webpac->data_structure($rec);
523 dpavlin 366
524     =cut
525    
526     # private method _sort_by_order
527     # sort subrouting using order="" attribute
528     sub _sort_by_order {
529     my $self = shift;
530    
531     my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
532     $self->{'import_xml'}->{'indexer'}->{$a};
533     my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
534     $self->{'import_xml'}->{'indexer'}->{$b};
535    
536     return $va <=> $vb;
537     }
538    
539     sub data_structure {
540     my $self = shift;
541    
542     my $rec = shift;
543     confess("need HASH as first argument!") if ($rec !~ /HASH/o);
544    
545     my @sorted_tags;
546     if ($self->{tags_by_order}) {
547     @sorted_tags = @{$self->{tags_by_order}};
548     } else {
549     @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
550     $self->{tags_by_order} = \@sorted_tags;
551     }
552    
553 dpavlin 368 my @ds;
554 dpavlin 366
555     foreach my $field (@sorted_tags) {
556    
557     my $row;
558    
559     #print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
560    
561     foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
562 dpavlin 367 my @v = $self->parse_to_arr($rec,$tag->{'content'});
563 dpavlin 366
564 dpavlin 367 next if (! @v);
565 dpavlin 366
566     # does tag have type?
567     if ($tag->{'type'}) {
568 dpavlin 367 push @{$row->{$tag->{'type'}}}, @v;
569 dpavlin 366 } else {
570 dpavlin 367 push @{$row->{'display'}}, @v;
571     push @{$row->{'swish'}}, @v;
572 dpavlin 366 }
573     }
574    
575 dpavlin 368 if ($row) {
576     $row->{'tag'} = $field;
577     push @ds, $row;
578     }
579 dpavlin 366
580     }
581    
582 dpavlin 370 return @ds;
583 dpavlin 366
584     }
585    
586 dpavlin 370 =head2 output
587    
588     Create output from in-memory data structure using Template Toolkit template.
589    
590     my $text = $webpac->output( template => 'text.tt', data => @ds );
591    
592     =cut
593    
594     sub output {
595     my $self = shift;
596    
597     my $args = {@_};
598    
599     confess("need template name") if (! $args->{'template'});
600     confess("need data array") if (! $args->{'data'});
601    
602     my $out;
603    
604     $self->{'tt'}->process(
605     $args->{'template'},
606     $args,
607     \$out
608     ) || confess $self->{'tt'}->error();
609    
610     return $out;
611     }
612    
613 dpavlin 352 1;

  ViewVC Help
Powered by ViewVC 1.1.26