/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 389 - (hide annotations)
Tue Jul 20 17:15:48 2004 UTC (16 years, 3 months ago) by dpavlin
File size: 19563 byte(s)
added progress_bar

1 dpavlin 354 package WebPAC;
2 dpavlin 352
3 dpavlin 367 use warnings;
4     use strict;
5    
6 dpavlin 352 use Carp;
7 dpavlin 353 use Text::Iconv;
8     use Config::IniFiles;
9 dpavlin 363 use XML::Simple;
10 dpavlin 370 use Template;
11 dpavlin 372 use Log::Log4perl qw(get_logger :levels);
12 dpavlin 352
13 dpavlin 358 use Data::Dumper;
14    
15 dpavlin 373 #my $LOOKUP_REGEX = '\[[^\[\]]+\]';
16     #my $LOOKUP_REGEX_SAVE = '\[([^\[\]]+)\]';
17     my $LOOKUP_REGEX = 'lookup{[^\{\}]+}';
18     my $LOOKUP_REGEX_SAVE = 'lookup{([^\{\}]+)}';
19    
20 dpavlin 352 =head1 NAME
21    
22 dpavlin 354 WebPAC - base class for WebPAC
23 dpavlin 352
24     =head1 DESCRIPTION
25    
26 dpavlin 354 This module implements methods used by WebPAC.
27 dpavlin 352
28     =head1 METHODS
29    
30     =head2 new
31    
32 dpavlin 354 This will create new instance of WebPAC using configuration specified by C<config_file>.
33 dpavlin 352
34 dpavlin 354 my $webpac = new WebPAC(
35 dpavlin 352 config_file => 'name.conf',
36     [code_page => 'ISO-8859-2',]
37     );
38    
39     Default C<code_page> is C<ISO-8859-2>.
40    
41 dpavlin 353 It will also read configuration files
42     C<global.conf> (used by indexer and Web font-end)
43     and configuration file specified by C<config_file>
44     which describes databases to be indexed.
45    
46 dpavlin 352 =cut
47    
48 dpavlin 363 # mapping between data type and tag which specify
49     # format in XML file
50     my %type2tag = (
51     'isis' => 'isis',
52     # 'excel' => 'column',
53     # 'marc' => 'marc',
54     # 'feed' => 'feed'
55     );
56    
57 dpavlin 352 sub new {
58     my $class = shift;
59     my $self = {@_};
60     bless($self, $class);
61    
62 dpavlin 372 my $log_file = $self->{'log'} || "log.conf";
63     Log::Log4perl->init($log_file);
64    
65     my $log = $self->_get_logger();
66    
67 dpavlin 352 # fill in default values
68     # output codepage
69     $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
70    
71 dpavlin 353 #
72     # read global.conf
73     #
74 dpavlin 372 $log->debug("read 'global.conf'");
75 dpavlin 352
76 dpavlin 372 my $config = new Config::IniFiles( -file => 'global.conf' ) || $log->logcroak("can't open 'global.conf'");
77 dpavlin 352
78     # read global config parametars
79     foreach my $var (qw(
80     dbi_dbd
81     dbi_dsn
82     dbi_user
83     dbi_passwd
84     show_progress
85     my_unac_filter
86 dpavlin 370 output_template
87 dpavlin 352 )) {
88 dpavlin 370 $self->{'global_config'}->{$var} = $config->val('global', $var);
89 dpavlin 352 }
90    
91 dpavlin 353 #
92     # read indexer config file
93     #
94 dpavlin 352
95 dpavlin 372 $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || $log->logcroak("can't open '",$self->{config_file},"'");
96 dpavlin 352
97 dpavlin 370 # create UTF-8 convertor for import_xml files
98 dpavlin 366 $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
99 dpavlin 370
100     # create Template toolkit instance
101     $self->{'tt'} = Template->new(
102     INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} || './output_template'),
103     # FILTERS => {
104     # 'foo' => \&foo_filter,
105     # },
106     EVAL_PERL => 1,
107     );
108    
109 dpavlin 352 return $self;
110     }
111    
112     =head2 open_isis
113    
114     Open CDS/ISIS database using OpenIsis module and read all records to memory.
115    
116     $webpac->open_isis(
117     filename => '/data/ISIS/ISIS',
118     code_page => '852',
119     limit_mfn => '500',
120     lookup => [ ... ],
121     );
122    
123     By default, ISIS code page is assumed to be C<852>.
124    
125 dpavlin 353 If optional parametar C<limit_mfn> is set, it will read just 500 records
126     from database in example above.
127 dpavlin 352
128     C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
129     C<val>. Optional parametar C<eval> is perl code to evaluate before storing
130     value in index.
131    
132     lookup => [
133     { 'key' => 'd:v900', 'val' => 'v250^a' },
134     { 'eval' => '"v901^a" eq "Podruèje"',
135     'key' => 'pa:v561^4:v562^4:v461^1',
136     'val' => 'v900' },
137     ]
138    
139 dpavlin 367 Returns number of last record read into memory (size of database, really).
140    
141 dpavlin 352 =cut
142    
143     sub open_isis {
144     my $self = shift;
145     my $arg = {@_};
146    
147 dpavlin 372 my $log = $self->_get_logger();
148    
149     $log->logcroak("need filename") if (! $arg->{'filename'});
150 dpavlin 352 my $code_page = $arg->{'code_page'} || '852';
151    
152 dpavlin 353 use OpenIsis;
153    
154 dpavlin 352 #$self->{'isis_code_page'} = $code_page;
155    
156     # create Text::Iconv object
157     my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
158    
159 dpavlin 372 $log->info("reading ISIS database '",$arg->{'filename'},"'");
160 dpavlin 363
161 dpavlin 352 my $isis_db = OpenIsis::open($arg->{'filename'});
162    
163     my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
164    
165 dpavlin 357 $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
166    
167 dpavlin 372 $log->info("processing $maxmfn records...");
168 dpavlin 363
169 dpavlin 352 # read database
170     for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
171    
172     # read record
173     my $row = OpenIsis::read( $isis_db, $mfn );
174     foreach my $k (keys %{$row}) {
175     if ($k ne "mfn") {
176     foreach my $l (@{$row->{$k}}) {
177     $l = $cp->convert($l);
178     # has subfields?
179     my $val;
180     if ($l =~ m/\^/) {
181     foreach my $t (split(/\^/,$l)) {
182     next if (! $t);
183     $val->{substr($t,0,1)} = substr($t,1);
184     }
185     } else {
186     $val = $l;
187     }
188    
189     push @{$self->{'data'}->{$mfn}->{$k}}, $val;
190     }
191 dpavlin 374 } else {
192     push @{$self->{'data'}->{$mfn}->{'000'}}, $mfn;
193 dpavlin 352 }
194    
195     }
196    
197     # create lookup
198 dpavlin 355 my $rec = $self->{'data'}->{$mfn};
199     $self->create_lookup($rec, @{$arg->{'lookup'}});
200 dpavlin 352
201 dpavlin 389 $self->progress_bar($mfn,$maxmfn);
202    
203 dpavlin 352 }
204    
205 dpavlin 362 $self->{'current_mfn'} = 1;
206 dpavlin 389 $self->{'last_pcnt'} = 0;
207 dpavlin 362
208 dpavlin 352 # store max mfn and return it.
209     return $self->{'max_mfn'} = $maxmfn;
210     }
211    
212 dpavlin 362 =head2 fetch_rec
213    
214     Fetch next record from database. It will also display progress bar (once
215     it's implemented, that is).
216    
217     my $rec = $webpac->fetch_rec;
218    
219     =cut
220    
221     sub fetch_rec {
222     my $self = shift;
223    
224 dpavlin 372 my $log = $self->_get_logger();
225 dpavlin 362
226 dpavlin 372 my $mfn = $self->{'current_mfn'}++ || $log->logconfess("it seems that you didn't load database!");
227    
228 dpavlin 362 if ($mfn > $self->{'max_mfn'}) {
229     $self->{'current_mfn'} = $self->{'max_mfn'};
230 dpavlin 373 $log->debug("at EOF");
231 dpavlin 362 return;
232     }
233    
234 dpavlin 389 $self->progress_bar($mfn,$self->{'max_mfn'});
235    
236 dpavlin 362 return $self->{'data'}->{$mfn};
237     }
238    
239 dpavlin 389 =head2 progress_bar
240    
241     Draw progress bar on STDERR.
242    
243     $webpac->progress_bar($current, $max);
244    
245     =cut
246    
247     sub progress_bar {
248     my $self = shift;
249    
250     my ($curr,$max) = @_;
251    
252     my $log = $self->_get_logger();
253    
254     $log->logconfess("no current value!") if (! $curr);
255     $log->logconfess("no maximum value!") if (! $max);
256    
257     if ($curr > $max) {
258     $max = $curr;
259     $log->debug("overflow to $curr");
260     }
261    
262     $self->{'last_pcnt'} ||= 1;
263    
264     $self->{'last_pcnt'} = $curr if ($curr < $self->{'last_pcnt'});
265    
266     my $p = int($curr * 100 / $max);
267     if ($p != $self->{'last_pcnt'}) {
268     printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$curr,$max,"=" x ($p/2).">", $p );
269     $self->{'last_pcnt'} = $p;
270     }
271     }
272    
273 dpavlin 363 =head2 open_import_xml
274    
275     Read file from C<import_xml/> directory and parse it.
276    
277     $webpac->open_import_xml(type => 'isis');
278    
279     =cut
280    
281     sub open_import_xml {
282     my $self = shift;
283    
284 dpavlin 372 my $log = $self->_get_logger();
285    
286 dpavlin 363 my $arg = {@_};
287 dpavlin 372 $log->logconfess("need type to load file from import_xml/") if (! $arg->{'type'});
288 dpavlin 363
289 dpavlin 366 $self->{'type'} = $arg->{'type'};
290 dpavlin 363
291 dpavlin 366 my $type_base = $arg->{'type'};
292 dpavlin 363 $type_base =~ s/_.*$//g;
293    
294 dpavlin 366 $self->{'tag'} = $type2tag{$type_base};
295    
296 dpavlin 375 $log->info("using type '",$self->{'type'},"' tag <",$self->{'tag'},">");
297 dpavlin 366
298     my $f = "./import_xml/".$self->{'type'}.".xml";
299 dpavlin 372 $log->logconfess("import_xml file '$f' doesn't exist!") if (! -e "$f");
300 dpavlin 363
301 dpavlin 375 $log->info("reading '$f'");
302 dpavlin 363
303 dpavlin 375 $self->{'import_xml_file'} = $f;
304    
305 dpavlin 363 $self->{'import_xml'} = XMLin($f,
306 dpavlin 366 ForceArray => [ $self->{'tag'}, 'config', 'format' ],
307 dpavlin 363 );
308    
309 dpavlin 375 $log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) });
310    
311 dpavlin 363 }
312    
313 dpavlin 355 =head2 create_lookup
314    
315     Create lookup from record using lookup definition.
316    
317 dpavlin 367 $self->create_lookup($rec, @lookups);
318    
319     Called internally by C<open_*> methods.
320    
321 dpavlin 355 =cut
322    
323     sub create_lookup {
324     my $self = shift;
325    
326 dpavlin 372 my $log = $self->_get_logger();
327 dpavlin 355
328 dpavlin 372 my $rec = shift || $log->logconfess("need record to create lookup");
329     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
330    
331 dpavlin 355 foreach my $i (@_) {
332     if ($i->{'eval'}) {
333     my $eval = $self->fill_in($rec,$i->{'eval'});
334     my $key = $self->fill_in($rec,$i->{'key'});
335     my @val = $self->fill_in($rec,$i->{'val'});
336     if ($key && @val && eval $eval) {
337 dpavlin 373 $log->debug("stored $key = ",sub { join(" | ",@val) });
338 dpavlin 355 push @{$self->{'lookup'}->{$key}}, @val;
339     }
340     } else {
341     my $key = $self->fill_in($rec,$i->{'key'});
342     my @val = $self->fill_in($rec,$i->{'val'});
343     if ($key && @val) {
344 dpavlin 373 $log->debug("stored $key = ",sub { join(" | ",@val) });
345 dpavlin 355 push @{$self->{'lookup'}->{$key}}, @val;
346     }
347     }
348     }
349     }
350    
351 dpavlin 356 =head2 get_data
352    
353     Returns value from record.
354    
355 dpavlin 367 my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);
356 dpavlin 356
357     Arguments are:
358     record reference C<$rec>,
359     field C<$f>,
360     optional subfiled C<$sf>,
361     index for repeatable values C<$i>.
362    
363 dpavlin 367 Optinal variable C<$found> will be incremeted if there
364 dpavlin 356 is field.
365    
366     Returns value or empty string.
367    
368     =cut
369    
370     sub get_data {
371     my $self = shift;
372    
373     my ($rec,$f,$sf,$i,$found) = @_;
374 dpavlin 367
375 dpavlin 356 if ($$rec->{$f}) {
376 dpavlin 367 return '' if (! $$rec->{$f}->[$i]);
377 dpavlin 389 no strict 'refs';
378 dpavlin 356 if ($sf && $$rec->{$f}->[$i]->{$sf}) {
379     $$found++ if (defined($$found));
380     return $$rec->{$f}->[$i]->{$sf};
381     } elsif ($$rec->{$f}->[$i]) {
382     $$found++ if (defined($$found));
383 dpavlin 366 # it still might have subfield, just
384     # not specified, so we'll dump all
385     if ($$rec->{$f}->[$i] =~ /HASH/o) {
386     my $out;
387     foreach my $k (keys %{$$rec->{$f}->[$i]}) {
388     $out .= $$rec->{$f}->[$i]->{$k}." ";
389     }
390     return $out;
391     } else {
392     return $$rec->{$f}->[$i];
393     }
394 dpavlin 356 }
395     } else {
396     return '';
397     }
398     }
399    
400 dpavlin 352 =head2 fill_in
401    
402     Workhourse of all: takes record from in-memory structure of database and
403     strings with placeholders and returns string or array of with substituted
404     values from record.
405    
406 dpavlin 367 my $text = $webpac->fill_in($rec,'v250^a');
407 dpavlin 352
408     Optional argument is ordinal number for repeatable fields. By default,
409 dpavlin 353 it's assume to be first repeatable field (fields are perl array, so first
410     element is 0).
411     Following example will read second value from repeatable field.
412 dpavlin 352
413 dpavlin 367 my $text = $webpac->fill_in($rec,'Title: v250^a',1);
414 dpavlin 353
415     This function B<does not> perform parsing of format to inteligenty skip
416     delimiters before fields which aren't used.
417    
418 dpavlin 376 This method will automatically decode UTF-8 string to local code page
419     if needed.
420    
421 dpavlin 352 =cut
422    
423 dpavlin 372 sub fill_in {
424 dpavlin 371 my $self = shift;
425    
426 dpavlin 372 my $log = $self->_get_logger();
427 dpavlin 371
428 dpavlin 372 my $rec = shift || $log->logconfess("need data record");
429     my $format = shift || $log->logconfess("need format to parse");
430 dpavlin 352 # iteration (for repeatable fields)
431     my $i = shift || 0;
432    
433     # FIXME remove for speedup?
434 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
435 dpavlin 352
436 dpavlin 376 if (utf8::is_utf8($format)) {
437     $format = $self->_x($format);
438     }
439    
440 dpavlin 352 my $found = 0;
441    
442 dpavlin 359 my $eval_code;
443     # remove eval{...} from beginning
444     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
445    
446 dpavlin 352 # do actual replacement of placeholders
447 dpavlin 373 $format =~ s/v(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
448 dpavlin 352
449 dpavlin 353 if ($found) {
450 dpavlin 373 $log->debug("format: $format");
451 dpavlin 359 if ($eval_code) {
452     my $eval = $self->fill_in($rec,$eval_code,$i);
453 dpavlin 371 return if (! $self->_eval($eval));
454 dpavlin 359 }
455 dpavlin 353 # do we have lookups?
456 dpavlin 373 if ($format =~ /$LOOKUP_REGEX/o) {
457     $log->debug("format '$format' has lookup");
458 dpavlin 353 return $self->lookup($format);
459     } else {
460     return $format;
461     }
462 dpavlin 352 } else {
463     return;
464     }
465     }
466    
467     =head2 lookup
468    
469 dpavlin 355 Perform lookups on format supplied to it.
470 dpavlin 352
471 dpavlin 367 my $text = $self->lookup('[v900]');
472 dpavlin 352
473 dpavlin 355 Lookups can be nested (like C<[d:[a:[v900]]]>).
474    
475 dpavlin 352 =cut
476    
477     sub lookup {
478     my $self = shift;
479    
480 dpavlin 372 my $log = $self->_get_logger();
481 dpavlin 352
482 dpavlin 372 my $tmp = shift || $log->logconfess("need format");
483    
484 dpavlin 373 if ($tmp =~ /$LOOKUP_REGEX/o) {
485 dpavlin 352 my @in = ( $tmp );
486 dpavlin 372
487     $log->debug("lookup for: ",$tmp);
488    
489 dpavlin 352 my @out;
490     while (my $f = shift @in) {
491 dpavlin 373 if ($f =~ /$LOOKUP_REGEX_SAVE/o) {
492 dpavlin 352 my $k = $1;
493     if ($self->{'lookup'}->{$k}) {
494     foreach my $nv (@{$self->{'lookup'}->{$k}}) {
495     my $tmp2 = $f;
496 dpavlin 373 $tmp2 =~ s/lookup{$k}/$nv/g;
497 dpavlin 352 push @in, $tmp2;
498     }
499     } else {
500     undef $f;
501     }
502     } elsif ($f) {
503     push @out, $f;
504     }
505     }
506 dpavlin 373 $log->logconfess("return is array and it's not expected!") unless wantarray;
507 dpavlin 352 return @out;
508     } else {
509     return $tmp;
510     }
511     }
512    
513 dpavlin 356 =head2 parse
514    
515     Perform smart parsing of string, skipping delimiters for fields which aren't
516     defined. It can also eval code in format starting with C<eval{...}> and
517     return output or nothing depending on eval code.
518    
519 dpavlin 367 my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
520 dpavlin 356
521     =cut
522    
523     sub parse {
524     my $self = shift;
525    
526 dpavlin 366 my ($rec, $format_utf8, $i) = @_;
527 dpavlin 356
528 dpavlin 366 return if (! $format_utf8);
529    
530 dpavlin 372 my $log = $self->_get_logger();
531 dpavlin 358
532 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
533     $log->logconfess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
534    
535 dpavlin 358 $i = 0 if (! $i);
536    
537 dpavlin 375 my $format = $self->_x($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
538 dpavlin 366
539 dpavlin 356 my @out;
540    
541 dpavlin 373 $log->debug("format: $format");
542    
543 dpavlin 356 my $eval_code;
544     # remove eval{...} from beginning
545     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
546    
547 dpavlin 358 my $prefix;
548     my $all_found=0;
549 dpavlin 356
550 dpavlin 373 while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))?//s) {
551 dpavlin 356
552 dpavlin 358 my $del = $1 || '';
553 dpavlin 359 $prefix ||= $del if ($all_found == 0);
554 dpavlin 358
555     my $found = 0;
556     my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
557    
558 dpavlin 356 if ($found) {
559 dpavlin 358 push @out, $del;
560     push @out, $tmp;
561     $all_found += $found;
562 dpavlin 356 }
563     }
564    
565 dpavlin 358 return if (! $all_found);
566 dpavlin 356
567 dpavlin 373 my $out = join('',@out);
568 dpavlin 358
569 dpavlin 373 if ($out) {
570     # add rest of format (suffix)
571     $out .= $format;
572 dpavlin 367
573 dpavlin 373 # add prefix if not there
574     $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
575    
576     $log->debug("result: $out");
577     }
578    
579 dpavlin 359 if ($eval_code) {
580     my $eval = $self->fill_in($rec,$eval_code,$i);
581 dpavlin 373 $log->debug("about to eval{",$eval,"} format: $out");
582 dpavlin 371 return if (! $self->_eval($eval));
583 dpavlin 359 }
584    
585 dpavlin 358 return $out;
586 dpavlin 356 }
587    
588 dpavlin 367 =head2 parse_to_arr
589    
590     Similar to C<parse>, but returns array of all repeatable fields
591    
592     my @arr = $webpac->parse_to_arr($rec,'v250^a');
593    
594     =cut
595    
596     sub parse_to_arr {
597     my $self = shift;
598    
599     my ($rec, $format_utf8) = @_;
600    
601 dpavlin 372 my $log = $self->_get_logger();
602    
603     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
604 dpavlin 367 return if (! $format_utf8);
605    
606     my $i = 0;
607     my @arr;
608    
609     while (my $v = $self->parse($rec,$format_utf8,$i++)) {
610     push @arr, $v;
611     }
612    
613 dpavlin 373 $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
614    
615 dpavlin 367 return @arr;
616     }
617    
618 dpavlin 373 =head2 fill_in_to_arr
619    
620     Similar to C<fill_in>, but returns array of all repeatable fields. Usable
621     for fields which have lookups, so they shouldn't be parsed but rather
622     C<fill_id>ed.
623    
624     my @arr = $webpac->fill_in_to_arr($rec,'[v900];;[v250^a]');
625    
626     =cut
627    
628     sub fill_in_to_arr {
629     my $self = shift;
630    
631     my ($rec, $format_utf8) = @_;
632    
633     my $log = $self->_get_logger();
634    
635     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
636     return if (! $format_utf8);
637    
638     my $i = 0;
639     my @arr;
640    
641     while (my @v = $self->fill_in($rec,$format_utf8,$i++)) {
642     push @arr, @v;
643     }
644    
645     $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
646    
647     return @arr;
648     }
649    
650    
651 dpavlin 366 =head2 data_structure
652    
653     Create in-memory data structure which represents layout from C<import_xml>.
654     It is used later to produce output.
655    
656 dpavlin 368 my @ds = $webpac->data_structure($rec);
657 dpavlin 366
658 dpavlin 374 This method will also set C<$webpac->{'currnet_filename'}> if there is
659     <filename> tag in C<import_xml>.
660    
661 dpavlin 366 =cut
662    
663 dpavlin 372 sub data_structure {
664 dpavlin 366 my $self = shift;
665    
666 dpavlin 372 my $log = $self->_get_logger();
667 dpavlin 366
668     my $rec = shift;
669 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
670 dpavlin 366
671 dpavlin 374 undef $self->{'currnet_filename'};
672    
673 dpavlin 366 my @sorted_tags;
674     if ($self->{tags_by_order}) {
675     @sorted_tags = @{$self->{tags_by_order}};
676     } else {
677     @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
678     $self->{tags_by_order} = \@sorted_tags;
679     }
680    
681 dpavlin 368 my @ds;
682 dpavlin 366
683 dpavlin 373 $log->debug("tags: ",sub { join(", ",@sorted_tags) });
684    
685 dpavlin 366 foreach my $field (@sorted_tags) {
686    
687     my $row;
688    
689     #print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
690    
691     foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
692 dpavlin 373 my $format = $tag->{'value'} || $tag->{'content'};
693 dpavlin 366
694 dpavlin 373 $log->debug("format: $format");
695    
696     my @v;
697     if ($format =~ /$LOOKUP_REGEX/o) {
698     @v = $self->fill_in_to_arr($rec,$format);
699     } else {
700     @v = $self->parse_to_arr($rec,$format);
701     }
702 dpavlin 367 next if (! @v);
703 dpavlin 366
704 dpavlin 375 # use format?
705     if ($tag->{'format_name'}) {
706     @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v;
707     }
708    
709 dpavlin 366 # does tag have type?
710     if ($tag->{'type'}) {
711 dpavlin 367 push @{$row->{$tag->{'type'}}}, @v;
712 dpavlin 366 } else {
713 dpavlin 367 push @{$row->{'display'}}, @v;
714     push @{$row->{'swish'}}, @v;
715 dpavlin 366 }
716 dpavlin 373
717 dpavlin 374 if ($field eq 'filename') {
718     $self->{'current_filename'} = join('',@v);
719     $log->debug("filename: ",$self->{'current_filename'});
720     }
721    
722 dpavlin 366 }
723    
724 dpavlin 368 if ($row) {
725     $row->{'tag'} = $field;
726 dpavlin 375
727     # TODO: name_sigular, name_plural
728     my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'};
729     $row->{'name'} = $name ? $self->_x($name) : $field;
730    
731 dpavlin 368 push @ds, $row;
732 dpavlin 374
733 dpavlin 373 $log->debug("row $field: ",sub { Dumper($row) });
734 dpavlin 368 }
735 dpavlin 366
736     }
737    
738 dpavlin 370 return @ds;
739 dpavlin 366
740     }
741    
742 dpavlin 370 =head2 output
743    
744     Create output from in-memory data structure using Template Toolkit template.
745    
746     my $text = $webpac->output( template => 'text.tt', data => @ds );
747    
748     =cut
749    
750     sub output {
751     my $self = shift;
752    
753     my $args = {@_};
754    
755 dpavlin 372 my $log = $self->_get_logger();
756 dpavlin 370
757 dpavlin 372 $log->logconfess("need template name") if (! $args->{'template'});
758     $log->logconfess("need data array") if (! $args->{'data'});
759    
760 dpavlin 370 my $out;
761    
762     $self->{'tt'}->process(
763     $args->{'template'},
764     $args,
765     \$out
766     ) || confess $self->{'tt'}->error();
767    
768     return $out;
769     }
770    
771 dpavlin 375 =head2 apply_format
772    
773     Apply format specified in tag with C<format_name="name"> and
774     C<format_delimiter=";;">.
775    
776     my $text = $webpac->apply_format($format_name,$format_delimiter,$data);
777    
778     Formats can contain C<lookup{...}> if you need them.
779    
780     =cut
781    
782     sub apply_format {
783     my $self = shift;
784    
785     my ($name,$delimiter,$data) = @_;
786    
787     my $log = $self->_get_logger();
788    
789     if (! $self->{'import_xml'}->{'format'}->{$name}) {
790     $log->warn("<format name=\"$name\"> is not defined in ",$self->{'import_xml_file'});
791     return $data;
792     }
793    
794     $log->warn("no delimiter for format $name") if (! $delimiter);
795    
796     my $format = $self->_x($self->{'import_xml'}->{'format'}->{$name}->{'content'}) || $log->logdie("can't find format '$name'");
797    
798     my @data = split(/\Q$delimiter\E/, $data);
799    
800     my $out = sprintf($format, @data);
801     $log->debug("using format $name [$format] on $data to produce: $out");
802    
803     if ($out =~ m/$LOOKUP_REGEX/o) {
804     return $self->lookup($out);
805     } else {
806     return $out;
807     }
808    
809     }
810    
811    
812 dpavlin 372 #
813     #
814     #
815    
816     =head1 INTERNAL METHODS
817    
818     Here is a quick list of internal methods, mostly useful to turn debugging
819     on them (see L<LOGGING> below for explanation).
820    
821     =cut
822    
823     =head2 _eval
824    
825     Internal function to eval code without C<strict 'subs'>.
826    
827     =cut
828    
829     sub _eval {
830     my $self = shift;
831    
832     my $code = shift || return;
833    
834     my $log = $self->_get_logger();
835    
836     no strict 'subs';
837     my $ret = eval $code;
838     if ($@) {
839     $log->error("problem with eval code [$code]: $@");
840     }
841    
842     $log->debug("eval: ",$code," [",$ret,"]");
843    
844     return $ret || 0;
845     }
846    
847     =head2 _sort_by_order
848    
849     Sort xml tags data structure accoding to C<order=""> attribute.
850    
851     =cut
852    
853     sub _sort_by_order {
854     my $self = shift;
855    
856     my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
857     $self->{'import_xml'}->{'indexer'}->{$a};
858     my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
859     $self->{'import_xml'}->{'indexer'}->{$b};
860    
861     return $va <=> $vb;
862     }
863    
864 dpavlin 375 =head2 _get_logger
865    
866     Get C<Log::Log4perl> object with a twist: domains are defined for each
867     method
868    
869     my $log = $webpac->_get_logger();
870    
871     =cut
872    
873 dpavlin 372 sub _get_logger {
874     my $self = shift;
875    
876 dpavlin 374 my $name = (caller(1))[3] || caller;
877     return get_logger($name);
878 dpavlin 372 }
879    
880 dpavlin 375 =head2 _x
881    
882     Convert string from UTF-8 to code page defined in C<import_xml>.
883    
884     my $text = $webpac->_x('utf8 text');
885    
886     =cut
887    
888     sub _x {
889     my $self = shift;
890     my $utf8 = shift || return;
891    
892     return $self->{'utf2cp'}->convert($utf8) ||
893     $self->_get_logger()->logwarn("can't convert '$utf8'");
894     }
895    
896 dpavlin 372 #
897     #
898     #
899    
900     =head1 LOGGING
901    
902     Logging in WebPAC is performed by L<Log::Log4perl> with config file
903     C<log.conf>.
904    
905     Methods defined above have different levels of logging, so
906     it's descriptions will be useful to turn (mostry B<debug> logging) on
907     or off to see why WabPAC isn't perforing as you expect it (it might even
908     be a bug!).
909    
910     B<This is different from normal Log4perl behaviour>. To repeat, you can
911     also use method names, and not only classes (which are just few)
912     to filter logging.
913    
914     =cut
915    
916 dpavlin 352 1;

  ViewVC Help
Powered by ViewVC 1.1.26