/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 376 - (hide annotations)
Sun Jun 20 18:39:30 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 18769 byte(s)
fill_in will automatically decode utf8 formats if passed to it.

1 dpavlin 354 package WebPAC;
2 dpavlin 352
3 dpavlin 367 use warnings;
4     use strict;
5    
6 dpavlin 352 use Carp;
7 dpavlin 353 use Text::Iconv;
8     use Config::IniFiles;
9 dpavlin 363 use XML::Simple;
10 dpavlin 370 use Template;
11 dpavlin 372 use Log::Log4perl qw(get_logger :levels);
12 dpavlin 352
13 dpavlin 358 use Data::Dumper;
14    
15 dpavlin 373 #my $LOOKUP_REGEX = '\[[^\[\]]+\]';
16     #my $LOOKUP_REGEX_SAVE = '\[([^\[\]]+)\]';
17     my $LOOKUP_REGEX = 'lookup{[^\{\}]+}';
18     my $LOOKUP_REGEX_SAVE = 'lookup{([^\{\}]+)}';
19    
20 dpavlin 352 =head1 NAME
21    
22 dpavlin 354 WebPAC - base class for WebPAC
23 dpavlin 352
24     =head1 DESCRIPTION
25    
26 dpavlin 354 This module implements methods used by WebPAC.
27 dpavlin 352
28     =head1 METHODS
29    
30     =head2 new
31    
32 dpavlin 354 This will create new instance of WebPAC using configuration specified by C<config_file>.
33 dpavlin 352
34 dpavlin 354 my $webpac = new WebPAC(
35 dpavlin 352 config_file => 'name.conf',
36     [code_page => 'ISO-8859-2',]
37     );
38    
39     Default C<code_page> is C<ISO-8859-2>.
40    
41 dpavlin 353 It will also read configuration files
42     C<global.conf> (used by indexer and Web font-end)
43     and configuration file specified by C<config_file>
44     which describes databases to be indexed.
45    
46 dpavlin 352 =cut
47    
48 dpavlin 363 # mapping between data type and tag which specify
49     # format in XML file
50     my %type2tag = (
51     'isis' => 'isis',
52     # 'excel' => 'column',
53     # 'marc' => 'marc',
54     # 'feed' => 'feed'
55     );
56    
57 dpavlin 352 sub new {
58     my $class = shift;
59     my $self = {@_};
60     bless($self, $class);
61    
62 dpavlin 372 my $log_file = $self->{'log'} || "log.conf";
63     Log::Log4perl->init($log_file);
64    
65     my $log = $self->_get_logger();
66    
67 dpavlin 352 # fill in default values
68     # output codepage
69     $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
70    
71 dpavlin 353 #
72     # read global.conf
73     #
74 dpavlin 372 $log->debug("read 'global.conf'");
75 dpavlin 352
76 dpavlin 372 my $config = new Config::IniFiles( -file => 'global.conf' ) || $log->logcroak("can't open 'global.conf'");
77 dpavlin 352
78     # read global config parametars
79     foreach my $var (qw(
80     dbi_dbd
81     dbi_dsn
82     dbi_user
83     dbi_passwd
84     show_progress
85     my_unac_filter
86 dpavlin 370 output_template
87 dpavlin 352 )) {
88 dpavlin 370 $self->{'global_config'}->{$var} = $config->val('global', $var);
89 dpavlin 352 }
90    
91 dpavlin 353 #
92     # read indexer config file
93     #
94 dpavlin 352
95 dpavlin 372 $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || $log->logcroak("can't open '",$self->{config_file},"'");
96 dpavlin 352
97 dpavlin 370 # create UTF-8 convertor for import_xml files
98 dpavlin 366 $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
99 dpavlin 370
100     # create Template toolkit instance
101     $self->{'tt'} = Template->new(
102     INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} || './output_template'),
103     # FILTERS => {
104     # 'foo' => \&foo_filter,
105     # },
106     EVAL_PERL => 1,
107     );
108    
109 dpavlin 352 return $self;
110     }
111    
112     =head2 open_isis
113    
114     Open CDS/ISIS database using OpenIsis module and read all records to memory.
115    
116     $webpac->open_isis(
117     filename => '/data/ISIS/ISIS',
118     code_page => '852',
119     limit_mfn => '500',
120     lookup => [ ... ],
121     );
122    
123     By default, ISIS code page is assumed to be C<852>.
124    
125 dpavlin 353 If optional parametar C<limit_mfn> is set, it will read just 500 records
126     from database in example above.
127 dpavlin 352
128     C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
129     C<val>. Optional parametar C<eval> is perl code to evaluate before storing
130     value in index.
131    
132     lookup => [
133     { 'key' => 'd:v900', 'val' => 'v250^a' },
134     { 'eval' => '"v901^a" eq "Podruèje"',
135     'key' => 'pa:v561^4:v562^4:v461^1',
136     'val' => 'v900' },
137     ]
138    
139 dpavlin 367 Returns number of last record read into memory (size of database, really).
140    
141 dpavlin 352 =cut
142    
143     sub open_isis {
144     my $self = shift;
145     my $arg = {@_};
146    
147 dpavlin 372 my $log = $self->_get_logger();
148    
149     $log->logcroak("need filename") if (! $arg->{'filename'});
150 dpavlin 352 my $code_page = $arg->{'code_page'} || '852';
151    
152 dpavlin 353 use OpenIsis;
153    
154 dpavlin 352 #$self->{'isis_code_page'} = $code_page;
155    
156     # create Text::Iconv object
157     my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
158    
159 dpavlin 372 $log->info("reading ISIS database '",$arg->{'filename'},"'");
160 dpavlin 363
161 dpavlin 352 my $isis_db = OpenIsis::open($arg->{'filename'});
162    
163     my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
164    
165 dpavlin 357 $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
166    
167 dpavlin 372 $log->info("processing $maxmfn records...");
168 dpavlin 363
169 dpavlin 352 # read database
170     for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
171    
172     # read record
173     my $row = OpenIsis::read( $isis_db, $mfn );
174     foreach my $k (keys %{$row}) {
175     if ($k ne "mfn") {
176     foreach my $l (@{$row->{$k}}) {
177     $l = $cp->convert($l);
178     # has subfields?
179     my $val;
180     if ($l =~ m/\^/) {
181     foreach my $t (split(/\^/,$l)) {
182     next if (! $t);
183     $val->{substr($t,0,1)} = substr($t,1);
184     }
185     } else {
186     $val = $l;
187     }
188    
189     push @{$self->{'data'}->{$mfn}->{$k}}, $val;
190     }
191 dpavlin 374 } else {
192     push @{$self->{'data'}->{$mfn}->{'000'}}, $mfn;
193 dpavlin 352 }
194    
195     }
196    
197     # create lookup
198 dpavlin 355 my $rec = $self->{'data'}->{$mfn};
199     $self->create_lookup($rec, @{$arg->{'lookup'}});
200 dpavlin 352
201     }
202    
203 dpavlin 362 $self->{'current_mfn'} = 1;
204    
205 dpavlin 352 # store max mfn and return it.
206     return $self->{'max_mfn'} = $maxmfn;
207     }
208    
209 dpavlin 362 =head2 fetch_rec
210    
211     Fetch next record from database. It will also display progress bar (once
212     it's implemented, that is).
213    
214     my $rec = $webpac->fetch_rec;
215    
216     =cut
217    
218     sub fetch_rec {
219     my $self = shift;
220    
221 dpavlin 372 my $log = $self->_get_logger();
222 dpavlin 362
223 dpavlin 372 my $mfn = $self->{'current_mfn'}++ || $log->logconfess("it seems that you didn't load database!");
224    
225 dpavlin 362 if ($mfn > $self->{'max_mfn'}) {
226     $self->{'current_mfn'} = $self->{'max_mfn'};
227 dpavlin 373 $log->debug("at EOF");
228 dpavlin 362 return;
229     }
230    
231     return $self->{'data'}->{$mfn};
232     }
233    
234 dpavlin 363 =head2 open_import_xml
235    
236     Read file from C<import_xml/> directory and parse it.
237    
238     $webpac->open_import_xml(type => 'isis');
239    
240     =cut
241    
242     sub open_import_xml {
243     my $self = shift;
244    
245 dpavlin 372 my $log = $self->_get_logger();
246    
247 dpavlin 363 my $arg = {@_};
248 dpavlin 372 $log->logconfess("need type to load file from import_xml/") if (! $arg->{'type'});
249 dpavlin 363
250 dpavlin 366 $self->{'type'} = $arg->{'type'};
251 dpavlin 363
252 dpavlin 366 my $type_base = $arg->{'type'};
253 dpavlin 363 $type_base =~ s/_.*$//g;
254    
255 dpavlin 366 $self->{'tag'} = $type2tag{$type_base};
256    
257 dpavlin 375 $log->info("using type '",$self->{'type'},"' tag <",$self->{'tag'},">");
258 dpavlin 366
259     my $f = "./import_xml/".$self->{'type'}.".xml";
260 dpavlin 372 $log->logconfess("import_xml file '$f' doesn't exist!") if (! -e "$f");
261 dpavlin 363
262 dpavlin 375 $log->info("reading '$f'");
263 dpavlin 363
264 dpavlin 375 $self->{'import_xml_file'} = $f;
265    
266 dpavlin 363 $self->{'import_xml'} = XMLin($f,
267 dpavlin 366 ForceArray => [ $self->{'tag'}, 'config', 'format' ],
268 dpavlin 363 );
269    
270 dpavlin 375 $log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) });
271    
272 dpavlin 363 }
273    
274 dpavlin 355 =head2 create_lookup
275    
276     Create lookup from record using lookup definition.
277    
278 dpavlin 367 $self->create_lookup($rec, @lookups);
279    
280     Called internally by C<open_*> methods.
281    
282 dpavlin 355 =cut
283    
284     sub create_lookup {
285     my $self = shift;
286    
287 dpavlin 372 my $log = $self->_get_logger();
288 dpavlin 355
289 dpavlin 372 my $rec = shift || $log->logconfess("need record to create lookup");
290     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
291    
292 dpavlin 355 foreach my $i (@_) {
293     if ($i->{'eval'}) {
294     my $eval = $self->fill_in($rec,$i->{'eval'});
295     my $key = $self->fill_in($rec,$i->{'key'});
296     my @val = $self->fill_in($rec,$i->{'val'});
297     if ($key && @val && eval $eval) {
298 dpavlin 373 $log->debug("stored $key = ",sub { join(" | ",@val) });
299 dpavlin 355 push @{$self->{'lookup'}->{$key}}, @val;
300     }
301     } else {
302     my $key = $self->fill_in($rec,$i->{'key'});
303     my @val = $self->fill_in($rec,$i->{'val'});
304     if ($key && @val) {
305 dpavlin 373 $log->debug("stored $key = ",sub { join(" | ",@val) });
306 dpavlin 355 push @{$self->{'lookup'}->{$key}}, @val;
307     }
308     }
309     }
310     }
311    
312 dpavlin 356 =head2 get_data
313    
314     Returns value from record.
315    
316 dpavlin 367 my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);
317 dpavlin 356
318     Arguments are:
319     record reference C<$rec>,
320     field C<$f>,
321     optional subfiled C<$sf>,
322     index for repeatable values C<$i>.
323    
324 dpavlin 367 Optinal variable C<$found> will be incremeted if there
325 dpavlin 356 is field.
326    
327     Returns value or empty string.
328    
329     =cut
330    
331     sub get_data {
332     my $self = shift;
333    
334     my ($rec,$f,$sf,$i,$found) = @_;
335 dpavlin 367
336 dpavlin 356 if ($$rec->{$f}) {
337 dpavlin 367 return '' if (! $$rec->{$f}->[$i]);
338 dpavlin 356 if ($sf && $$rec->{$f}->[$i]->{$sf}) {
339     $$found++ if (defined($$found));
340     return $$rec->{$f}->[$i]->{$sf};
341     } elsif ($$rec->{$f}->[$i]) {
342     $$found++ if (defined($$found));
343 dpavlin 366 # it still might have subfield, just
344     # not specified, so we'll dump all
345     if ($$rec->{$f}->[$i] =~ /HASH/o) {
346     my $out;
347     foreach my $k (keys %{$$rec->{$f}->[$i]}) {
348     $out .= $$rec->{$f}->[$i]->{$k}." ";
349     }
350     return $out;
351     } else {
352     return $$rec->{$f}->[$i];
353     }
354 dpavlin 356 }
355     } else {
356     return '';
357     }
358     }
359    
360 dpavlin 352 =head2 fill_in
361    
362     Workhourse of all: takes record from in-memory structure of database and
363     strings with placeholders and returns string or array of with substituted
364     values from record.
365    
366 dpavlin 367 my $text = $webpac->fill_in($rec,'v250^a');
367 dpavlin 352
368     Optional argument is ordinal number for repeatable fields. By default,
369 dpavlin 353 it's assume to be first repeatable field (fields are perl array, so first
370     element is 0).
371     Following example will read second value from repeatable field.
372 dpavlin 352
373 dpavlin 367 my $text = $webpac->fill_in($rec,'Title: v250^a',1);
374 dpavlin 353
375     This function B<does not> perform parsing of format to inteligenty skip
376     delimiters before fields which aren't used.
377    
378 dpavlin 376 This method will automatically decode UTF-8 string to local code page
379     if needed.
380    
381 dpavlin 352 =cut
382    
383 dpavlin 372 sub fill_in {
384 dpavlin 371 my $self = shift;
385    
386 dpavlin 372 my $log = $self->_get_logger();
387 dpavlin 371
388 dpavlin 372 my $rec = shift || $log->logconfess("need data record");
389     my $format = shift || $log->logconfess("need format to parse");
390 dpavlin 352 # iteration (for repeatable fields)
391     my $i = shift || 0;
392    
393     # FIXME remove for speedup?
394 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
395 dpavlin 352
396 dpavlin 376 if (utf8::is_utf8($format)) {
397     $format = $self->_x($format);
398     }
399    
400 dpavlin 352 my $found = 0;
401    
402 dpavlin 359 my $eval_code;
403     # remove eval{...} from beginning
404     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
405    
406 dpavlin 352 # do actual replacement of placeholders
407 dpavlin 373 $format =~ s/v(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
408 dpavlin 352
409 dpavlin 353 if ($found) {
410 dpavlin 373 $log->debug("format: $format");
411 dpavlin 359 if ($eval_code) {
412     my $eval = $self->fill_in($rec,$eval_code,$i);
413 dpavlin 371 return if (! $self->_eval($eval));
414 dpavlin 359 }
415 dpavlin 353 # do we have lookups?
416 dpavlin 373 if ($format =~ /$LOOKUP_REGEX/o) {
417     $log->debug("format '$format' has lookup");
418 dpavlin 353 return $self->lookup($format);
419     } else {
420     return $format;
421     }
422 dpavlin 352 } else {
423     return;
424     }
425     }
426    
427     =head2 lookup
428    
429 dpavlin 355 Perform lookups on format supplied to it.
430 dpavlin 352
431 dpavlin 367 my $text = $self->lookup('[v900]');
432 dpavlin 352
433 dpavlin 355 Lookups can be nested (like C<[d:[a:[v900]]]>).
434    
435 dpavlin 352 =cut
436    
437     sub lookup {
438     my $self = shift;
439    
440 dpavlin 372 my $log = $self->_get_logger();
441 dpavlin 352
442 dpavlin 372 my $tmp = shift || $log->logconfess("need format");
443    
444 dpavlin 373 if ($tmp =~ /$LOOKUP_REGEX/o) {
445 dpavlin 352 my @in = ( $tmp );
446 dpavlin 372
447     $log->debug("lookup for: ",$tmp);
448    
449 dpavlin 352 my @out;
450     while (my $f = shift @in) {
451 dpavlin 373 if ($f =~ /$LOOKUP_REGEX_SAVE/o) {
452 dpavlin 352 my $k = $1;
453     if ($self->{'lookup'}->{$k}) {
454     foreach my $nv (@{$self->{'lookup'}->{$k}}) {
455     my $tmp2 = $f;
456 dpavlin 373 $tmp2 =~ s/lookup{$k}/$nv/g;
457 dpavlin 352 push @in, $tmp2;
458     }
459     } else {
460     undef $f;
461     }
462     } elsif ($f) {
463     push @out, $f;
464     }
465     }
466 dpavlin 373 $log->logconfess("return is array and it's not expected!") unless wantarray;
467 dpavlin 352 return @out;
468     } else {
469     return $tmp;
470     }
471     }
472    
473 dpavlin 356 =head2 parse
474    
475     Perform smart parsing of string, skipping delimiters for fields which aren't
476     defined. It can also eval code in format starting with C<eval{...}> and
477     return output or nothing depending on eval code.
478    
479 dpavlin 367 my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
480 dpavlin 356
481     =cut
482    
483     sub parse {
484     my $self = shift;
485    
486 dpavlin 366 my ($rec, $format_utf8, $i) = @_;
487 dpavlin 356
488 dpavlin 366 return if (! $format_utf8);
489    
490 dpavlin 372 my $log = $self->_get_logger();
491 dpavlin 358
492 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
493     $log->logconfess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
494    
495 dpavlin 358 $i = 0 if (! $i);
496    
497 dpavlin 375 my $format = $self->_x($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
498 dpavlin 366
499 dpavlin 356 my @out;
500    
501 dpavlin 373 $log->debug("format: $format");
502    
503 dpavlin 356 my $eval_code;
504     # remove eval{...} from beginning
505     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
506    
507 dpavlin 358 my $prefix;
508     my $all_found=0;
509 dpavlin 356
510 dpavlin 373 while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))?//s) {
511 dpavlin 356
512 dpavlin 358 my $del = $1 || '';
513 dpavlin 359 $prefix ||= $del if ($all_found == 0);
514 dpavlin 358
515     my $found = 0;
516     my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
517    
518 dpavlin 356 if ($found) {
519 dpavlin 358 push @out, $del;
520     push @out, $tmp;
521     $all_found += $found;
522 dpavlin 356 }
523     }
524    
525 dpavlin 358 return if (! $all_found);
526 dpavlin 356
527 dpavlin 373 my $out = join('',@out);
528 dpavlin 358
529 dpavlin 373 if ($out) {
530     # add rest of format (suffix)
531     $out .= $format;
532 dpavlin 367
533 dpavlin 373 # add prefix if not there
534     $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
535    
536     $log->debug("result: $out");
537     }
538    
539 dpavlin 359 if ($eval_code) {
540     my $eval = $self->fill_in($rec,$eval_code,$i);
541 dpavlin 373 $log->debug("about to eval{",$eval,"} format: $out");
542 dpavlin 371 return if (! $self->_eval($eval));
543 dpavlin 359 }
544    
545 dpavlin 358 return $out;
546 dpavlin 356 }
547    
548 dpavlin 367 =head2 parse_to_arr
549    
550     Similar to C<parse>, but returns array of all repeatable fields
551    
552     my @arr = $webpac->parse_to_arr($rec,'v250^a');
553    
554     =cut
555    
556     sub parse_to_arr {
557     my $self = shift;
558    
559     my ($rec, $format_utf8) = @_;
560    
561 dpavlin 372 my $log = $self->_get_logger();
562    
563     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
564 dpavlin 367 return if (! $format_utf8);
565    
566     my $i = 0;
567     my @arr;
568    
569     while (my $v = $self->parse($rec,$format_utf8,$i++)) {
570     push @arr, $v;
571     }
572    
573 dpavlin 373 $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
574    
575 dpavlin 367 return @arr;
576     }
577    
578 dpavlin 373 =head2 fill_in_to_arr
579    
580     Similar to C<fill_in>, but returns array of all repeatable fields. Usable
581     for fields which have lookups, so they shouldn't be parsed but rather
582     C<fill_id>ed.
583    
584     my @arr = $webpac->fill_in_to_arr($rec,'[v900];;[v250^a]');
585    
586     =cut
587    
588     sub fill_in_to_arr {
589     my $self = shift;
590    
591     my ($rec, $format_utf8) = @_;
592    
593     my $log = $self->_get_logger();
594    
595     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
596     return if (! $format_utf8);
597    
598     my $i = 0;
599     my @arr;
600    
601     while (my @v = $self->fill_in($rec,$format_utf8,$i++)) {
602     push @arr, @v;
603     }
604    
605     $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
606    
607     return @arr;
608     }
609    
610    
611 dpavlin 366 =head2 data_structure
612    
613     Create in-memory data structure which represents layout from C<import_xml>.
614     It is used later to produce output.
615    
616 dpavlin 368 my @ds = $webpac->data_structure($rec);
617 dpavlin 366
618 dpavlin 374 This method will also set C<$webpac->{'currnet_filename'}> if there is
619     <filename> tag in C<import_xml>.
620    
621 dpavlin 366 =cut
622    
623 dpavlin 372 sub data_structure {
624 dpavlin 366 my $self = shift;
625    
626 dpavlin 372 my $log = $self->_get_logger();
627 dpavlin 366
628     my $rec = shift;
629 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
630 dpavlin 366
631 dpavlin 374 undef $self->{'currnet_filename'};
632    
633 dpavlin 366 my @sorted_tags;
634     if ($self->{tags_by_order}) {
635     @sorted_tags = @{$self->{tags_by_order}};
636     } else {
637     @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
638     $self->{tags_by_order} = \@sorted_tags;
639     }
640    
641 dpavlin 368 my @ds;
642 dpavlin 366
643 dpavlin 373 $log->debug("tags: ",sub { join(", ",@sorted_tags) });
644    
645 dpavlin 366 foreach my $field (@sorted_tags) {
646    
647     my $row;
648    
649     #print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
650    
651     foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
652 dpavlin 373 my $format = $tag->{'value'} || $tag->{'content'};
653 dpavlin 366
654 dpavlin 373 $log->debug("format: $format");
655    
656     my @v;
657     if ($format =~ /$LOOKUP_REGEX/o) {
658     @v = $self->fill_in_to_arr($rec,$format);
659     } else {
660     @v = $self->parse_to_arr($rec,$format);
661     }
662 dpavlin 367 next if (! @v);
663 dpavlin 366
664 dpavlin 375 # use format?
665     if ($tag->{'format_name'}) {
666     @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v;
667     }
668    
669 dpavlin 366 # does tag have type?
670     if ($tag->{'type'}) {
671 dpavlin 367 push @{$row->{$tag->{'type'}}}, @v;
672 dpavlin 366 } else {
673 dpavlin 367 push @{$row->{'display'}}, @v;
674     push @{$row->{'swish'}}, @v;
675 dpavlin 366 }
676 dpavlin 373
677 dpavlin 374 if ($field eq 'filename') {
678     $self->{'current_filename'} = join('',@v);
679     $log->debug("filename: ",$self->{'current_filename'});
680     }
681    
682 dpavlin 366 }
683    
684 dpavlin 368 if ($row) {
685     $row->{'tag'} = $field;
686 dpavlin 375
687     # TODO: name_sigular, name_plural
688     my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'};
689     $row->{'name'} = $name ? $self->_x($name) : $field;
690    
691 dpavlin 368 push @ds, $row;
692 dpavlin 374
693 dpavlin 373 $log->debug("row $field: ",sub { Dumper($row) });
694 dpavlin 368 }
695 dpavlin 366
696     }
697    
698 dpavlin 370 return @ds;
699 dpavlin 366
700     }
701    
702 dpavlin 370 =head2 output
703    
704     Create output from in-memory data structure using Template Toolkit template.
705    
706     my $text = $webpac->output( template => 'text.tt', data => @ds );
707    
708     =cut
709    
710     sub output {
711     my $self = shift;
712    
713     my $args = {@_};
714    
715 dpavlin 372 my $log = $self->_get_logger();
716 dpavlin 370
717 dpavlin 372 $log->logconfess("need template name") if (! $args->{'template'});
718     $log->logconfess("need data array") if (! $args->{'data'});
719    
720 dpavlin 370 my $out;
721    
722     $self->{'tt'}->process(
723     $args->{'template'},
724     $args,
725     \$out
726     ) || confess $self->{'tt'}->error();
727    
728     return $out;
729     }
730    
731 dpavlin 375 =head2 apply_format
732    
733     Apply format specified in tag with C<format_name="name"> and
734     C<format_delimiter=";;">.
735    
736     my $text = $webpac->apply_format($format_name,$format_delimiter,$data);
737    
738     Formats can contain C<lookup{...}> if you need them.
739    
740     =cut
741    
742     sub apply_format {
743     my $self = shift;
744    
745     my ($name,$delimiter,$data) = @_;
746    
747     my $log = $self->_get_logger();
748    
749     if (! $self->{'import_xml'}->{'format'}->{$name}) {
750     $log->warn("<format name=\"$name\"> is not defined in ",$self->{'import_xml_file'});
751     return $data;
752     }
753    
754     $log->warn("no delimiter for format $name") if (! $delimiter);
755    
756     my $format = $self->_x($self->{'import_xml'}->{'format'}->{$name}->{'content'}) || $log->logdie("can't find format '$name'");
757    
758     my @data = split(/\Q$delimiter\E/, $data);
759    
760     my $out = sprintf($format, @data);
761     $log->debug("using format $name [$format] on $data to produce: $out");
762    
763     if ($out =~ m/$LOOKUP_REGEX/o) {
764     return $self->lookup($out);
765     } else {
766     return $out;
767     }
768    
769     }
770    
771    
772 dpavlin 372 #
773     #
774     #
775    
776     =head1 INTERNAL METHODS
777    
778     Here is a quick list of internal methods, mostly useful to turn debugging
779     on them (see L<LOGGING> below for explanation).
780    
781     =cut
782    
783     =head2 _eval
784    
785     Internal function to eval code without C<strict 'subs'>.
786    
787     =cut
788    
789     sub _eval {
790     my $self = shift;
791    
792     my $code = shift || return;
793    
794     my $log = $self->_get_logger();
795    
796     no strict 'subs';
797     my $ret = eval $code;
798     if ($@) {
799     $log->error("problem with eval code [$code]: $@");
800     }
801    
802     $log->debug("eval: ",$code," [",$ret,"]");
803    
804     return $ret || 0;
805     }
806    
807     =head2 _sort_by_order
808    
809     Sort xml tags data structure accoding to C<order=""> attribute.
810    
811     =cut
812    
813     sub _sort_by_order {
814     my $self = shift;
815    
816     my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
817     $self->{'import_xml'}->{'indexer'}->{$a};
818     my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
819     $self->{'import_xml'}->{'indexer'}->{$b};
820    
821     return $va <=> $vb;
822     }
823    
824 dpavlin 375 =head2 _get_logger
825    
826     Get C<Log::Log4perl> object with a twist: domains are defined for each
827     method
828    
829     my $log = $webpac->_get_logger();
830    
831     =cut
832    
833 dpavlin 372 sub _get_logger {
834     my $self = shift;
835    
836 dpavlin 374 my $name = (caller(1))[3] || caller;
837     return get_logger($name);
838 dpavlin 372 }
839    
840 dpavlin 375 =head2 _x
841    
842     Convert string from UTF-8 to code page defined in C<import_xml>.
843    
844     my $text = $webpac->_x('utf8 text');
845    
846     =cut
847    
848     sub _x {
849     my $self = shift;
850     my $utf8 = shift || return;
851    
852     return $self->{'utf2cp'}->convert($utf8) ||
853     $self->_get_logger()->logwarn("can't convert '$utf8'");
854     }
855    
856 dpavlin 372 #
857     #
858     #
859    
860     =head1 LOGGING
861    
862     Logging in WebPAC is performed by L<Log::Log4perl> with config file
863     C<log.conf>.
864    
865     Methods defined above have different levels of logging, so
866     it's descriptions will be useful to turn (mostry B<debug> logging) on
867     or off to see why WabPAC isn't perforing as you expect it (it might even
868     be a bug!).
869    
870     B<This is different from normal Log4perl behaviour>. To repeat, you can
871     also use method names, and not only classes (which are just few)
872     to filter logging.
873    
874     =cut
875    
876 dpavlin 352 1;

  ViewVC Help
Powered by ViewVC 1.1.26