/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 375 - (hide annotations)
Sun Jun 20 17:52:41 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 18620 byte(s)
implement formats

1 dpavlin 354 package WebPAC;
2 dpavlin 352
3 dpavlin 367 use warnings;
4     use strict;
5    
6 dpavlin 352 use Carp;
7 dpavlin 353 use Text::Iconv;
8     use Config::IniFiles;
9 dpavlin 363 use XML::Simple;
10 dpavlin 370 use Template;
11 dpavlin 372 use Log::Log4perl qw(get_logger :levels);
12 dpavlin 352
13 dpavlin 358 use Data::Dumper;
14    
15 dpavlin 373 #my $LOOKUP_REGEX = '\[[^\[\]]+\]';
16     #my $LOOKUP_REGEX_SAVE = '\[([^\[\]]+)\]';
17     my $LOOKUP_REGEX = 'lookup{[^\{\}]+}';
18     my $LOOKUP_REGEX_SAVE = 'lookup{([^\{\}]+)}';
19    
20 dpavlin 352 =head1 NAME
21    
22 dpavlin 354 WebPAC - base class for WebPAC
23 dpavlin 352
24     =head1 DESCRIPTION
25    
26 dpavlin 354 This module implements methods used by WebPAC.
27 dpavlin 352
28     =head1 METHODS
29    
30     =head2 new
31    
32 dpavlin 354 This will create new instance of WebPAC using configuration specified by C<config_file>.
33 dpavlin 352
34 dpavlin 354 my $webpac = new WebPAC(
35 dpavlin 352 config_file => 'name.conf',
36     [code_page => 'ISO-8859-2',]
37     );
38    
39     Default C<code_page> is C<ISO-8859-2>.
40    
41 dpavlin 353 It will also read configuration files
42     C<global.conf> (used by indexer and Web font-end)
43     and configuration file specified by C<config_file>
44     which describes databases to be indexed.
45    
46 dpavlin 352 =cut
47    
48 dpavlin 363 # mapping between data type and tag which specify
49     # format in XML file
50     my %type2tag = (
51     'isis' => 'isis',
52     # 'excel' => 'column',
53     # 'marc' => 'marc',
54     # 'feed' => 'feed'
55     );
56    
57 dpavlin 352 sub new {
58     my $class = shift;
59     my $self = {@_};
60     bless($self, $class);
61    
62 dpavlin 372 my $log_file = $self->{'log'} || "log.conf";
63     Log::Log4perl->init($log_file);
64    
65     my $log = $self->_get_logger();
66    
67 dpavlin 352 # fill in default values
68     # output codepage
69     $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
70    
71 dpavlin 353 #
72     # read global.conf
73     #
74 dpavlin 372 $log->debug("read 'global.conf'");
75 dpavlin 352
76 dpavlin 372 my $config = new Config::IniFiles( -file => 'global.conf' ) || $log->logcroak("can't open 'global.conf'");
77 dpavlin 352
78     # read global config parametars
79     foreach my $var (qw(
80     dbi_dbd
81     dbi_dsn
82     dbi_user
83     dbi_passwd
84     show_progress
85     my_unac_filter
86 dpavlin 370 output_template
87 dpavlin 352 )) {
88 dpavlin 370 $self->{'global_config'}->{$var} = $config->val('global', $var);
89 dpavlin 352 }
90    
91 dpavlin 353 #
92     # read indexer config file
93     #
94 dpavlin 352
95 dpavlin 372 $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || $log->logcroak("can't open '",$self->{config_file},"'");
96 dpavlin 352
97 dpavlin 370 # create UTF-8 convertor for import_xml files
98 dpavlin 366 $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
99 dpavlin 370
100     # create Template toolkit instance
101     $self->{'tt'} = Template->new(
102     INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} || './output_template'),
103     # FILTERS => {
104     # 'foo' => \&foo_filter,
105     # },
106     EVAL_PERL => 1,
107     );
108    
109 dpavlin 352 return $self;
110     }
111    
112     =head2 open_isis
113    
114     Open CDS/ISIS database using OpenIsis module and read all records to memory.
115    
116     $webpac->open_isis(
117     filename => '/data/ISIS/ISIS',
118     code_page => '852',
119     limit_mfn => '500',
120     lookup => [ ... ],
121     );
122    
123     By default, ISIS code page is assumed to be C<852>.
124    
125 dpavlin 353 If optional parametar C<limit_mfn> is set, it will read just 500 records
126     from database in example above.
127 dpavlin 352
128     C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
129     C<val>. Optional parametar C<eval> is perl code to evaluate before storing
130     value in index.
131    
132     lookup => [
133     { 'key' => 'd:v900', 'val' => 'v250^a' },
134     { 'eval' => '"v901^a" eq "Podruèje"',
135     'key' => 'pa:v561^4:v562^4:v461^1',
136     'val' => 'v900' },
137     ]
138    
139 dpavlin 367 Returns number of last record read into memory (size of database, really).
140    
141 dpavlin 352 =cut
142    
143     sub open_isis {
144     my $self = shift;
145     my $arg = {@_};
146    
147 dpavlin 372 my $log = $self->_get_logger();
148    
149     $log->logcroak("need filename") if (! $arg->{'filename'});
150 dpavlin 352 my $code_page = $arg->{'code_page'} || '852';
151    
152 dpavlin 353 use OpenIsis;
153    
154 dpavlin 352 #$self->{'isis_code_page'} = $code_page;
155    
156     # create Text::Iconv object
157     my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
158    
159 dpavlin 372 $log->info("reading ISIS database '",$arg->{'filename'},"'");
160 dpavlin 363
161 dpavlin 352 my $isis_db = OpenIsis::open($arg->{'filename'});
162    
163     my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
164    
165 dpavlin 357 $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
166    
167 dpavlin 372 $log->info("processing $maxmfn records...");
168 dpavlin 363
169 dpavlin 352 # read database
170     for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
171    
172     # read record
173     my $row = OpenIsis::read( $isis_db, $mfn );
174     foreach my $k (keys %{$row}) {
175     if ($k ne "mfn") {
176     foreach my $l (@{$row->{$k}}) {
177     $l = $cp->convert($l);
178     # has subfields?
179     my $val;
180     if ($l =~ m/\^/) {
181     foreach my $t (split(/\^/,$l)) {
182     next if (! $t);
183     $val->{substr($t,0,1)} = substr($t,1);
184     }
185     } else {
186     $val = $l;
187     }
188    
189     push @{$self->{'data'}->{$mfn}->{$k}}, $val;
190     }
191 dpavlin 374 } else {
192     push @{$self->{'data'}->{$mfn}->{'000'}}, $mfn;
193 dpavlin 352 }
194    
195     }
196    
197     # create lookup
198 dpavlin 355 my $rec = $self->{'data'}->{$mfn};
199     $self->create_lookup($rec, @{$arg->{'lookup'}});
200 dpavlin 352
201     }
202    
203 dpavlin 362 $self->{'current_mfn'} = 1;
204    
205 dpavlin 352 # store max mfn and return it.
206     return $self->{'max_mfn'} = $maxmfn;
207     }
208    
209 dpavlin 362 =head2 fetch_rec
210    
211     Fetch next record from database. It will also display progress bar (once
212     it's implemented, that is).
213    
214     my $rec = $webpac->fetch_rec;
215    
216     =cut
217    
218     sub fetch_rec {
219     my $self = shift;
220    
221 dpavlin 372 my $log = $self->_get_logger();
222 dpavlin 362
223 dpavlin 372 my $mfn = $self->{'current_mfn'}++ || $log->logconfess("it seems that you didn't load database!");
224    
225 dpavlin 362 if ($mfn > $self->{'max_mfn'}) {
226     $self->{'current_mfn'} = $self->{'max_mfn'};
227 dpavlin 373 $log->debug("at EOF");
228 dpavlin 362 return;
229     }
230    
231     return $self->{'data'}->{$mfn};
232     }
233    
234 dpavlin 363 =head2 open_import_xml
235    
236     Read file from C<import_xml/> directory and parse it.
237    
238     $webpac->open_import_xml(type => 'isis');
239    
240     =cut
241    
242     sub open_import_xml {
243     my $self = shift;
244    
245 dpavlin 372 my $log = $self->_get_logger();
246    
247 dpavlin 363 my $arg = {@_};
248 dpavlin 372 $log->logconfess("need type to load file from import_xml/") if (! $arg->{'type'});
249 dpavlin 363
250 dpavlin 366 $self->{'type'} = $arg->{'type'};
251 dpavlin 363
252 dpavlin 366 my $type_base = $arg->{'type'};
253 dpavlin 363 $type_base =~ s/_.*$//g;
254    
255 dpavlin 366 $self->{'tag'} = $type2tag{$type_base};
256    
257 dpavlin 375 $log->info("using type '",$self->{'type'},"' tag <",$self->{'tag'},">");
258 dpavlin 366
259     my $f = "./import_xml/".$self->{'type'}.".xml";
260 dpavlin 372 $log->logconfess("import_xml file '$f' doesn't exist!") if (! -e "$f");
261 dpavlin 363
262 dpavlin 375 $log->info("reading '$f'");
263 dpavlin 363
264 dpavlin 375 $self->{'import_xml_file'} = $f;
265    
266 dpavlin 363 $self->{'import_xml'} = XMLin($f,
267 dpavlin 366 ForceArray => [ $self->{'tag'}, 'config', 'format' ],
268 dpavlin 363 );
269    
270 dpavlin 375 $log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) });
271    
272 dpavlin 363 }
273    
274 dpavlin 355 =head2 create_lookup
275    
276     Create lookup from record using lookup definition.
277    
278 dpavlin 367 $self->create_lookup($rec, @lookups);
279    
280     Called internally by C<open_*> methods.
281    
282 dpavlin 355 =cut
283    
284     sub create_lookup {
285     my $self = shift;
286    
287 dpavlin 372 my $log = $self->_get_logger();
288 dpavlin 355
289 dpavlin 372 my $rec = shift || $log->logconfess("need record to create lookup");
290     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
291    
292 dpavlin 355 foreach my $i (@_) {
293     if ($i->{'eval'}) {
294     my $eval = $self->fill_in($rec,$i->{'eval'});
295     my $key = $self->fill_in($rec,$i->{'key'});
296     my @val = $self->fill_in($rec,$i->{'val'});
297     if ($key && @val && eval $eval) {
298 dpavlin 373 $log->debug("stored $key = ",sub { join(" | ",@val) });
299 dpavlin 355 push @{$self->{'lookup'}->{$key}}, @val;
300     }
301     } else {
302     my $key = $self->fill_in($rec,$i->{'key'});
303     my @val = $self->fill_in($rec,$i->{'val'});
304     if ($key && @val) {
305 dpavlin 373 $log->debug("stored $key = ",sub { join(" | ",@val) });
306 dpavlin 355 push @{$self->{'lookup'}->{$key}}, @val;
307     }
308     }
309     }
310     }
311    
312 dpavlin 356 =head2 get_data
313    
314     Returns value from record.
315    
316 dpavlin 367 my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);
317 dpavlin 356
318     Arguments are:
319     record reference C<$rec>,
320     field C<$f>,
321     optional subfiled C<$sf>,
322     index for repeatable values C<$i>.
323    
324 dpavlin 367 Optinal variable C<$found> will be incremeted if there
325 dpavlin 356 is field.
326    
327     Returns value or empty string.
328    
329     =cut
330    
331     sub get_data {
332     my $self = shift;
333    
334     my ($rec,$f,$sf,$i,$found) = @_;
335 dpavlin 367
336 dpavlin 356 if ($$rec->{$f}) {
337 dpavlin 367 return '' if (! $$rec->{$f}->[$i]);
338 dpavlin 356 if ($sf && $$rec->{$f}->[$i]->{$sf}) {
339     $$found++ if (defined($$found));
340     return $$rec->{$f}->[$i]->{$sf};
341     } elsif ($$rec->{$f}->[$i]) {
342     $$found++ if (defined($$found));
343 dpavlin 366 # it still might have subfield, just
344     # not specified, so we'll dump all
345     if ($$rec->{$f}->[$i] =~ /HASH/o) {
346     my $out;
347     foreach my $k (keys %{$$rec->{$f}->[$i]}) {
348     $out .= $$rec->{$f}->[$i]->{$k}." ";
349     }
350     return $out;
351     } else {
352     return $$rec->{$f}->[$i];
353     }
354 dpavlin 356 }
355     } else {
356     return '';
357     }
358     }
359    
360 dpavlin 352 =head2 fill_in
361    
362     Workhourse of all: takes record from in-memory structure of database and
363     strings with placeholders and returns string or array of with substituted
364     values from record.
365    
366 dpavlin 367 my $text = $webpac->fill_in($rec,'v250^a');
367 dpavlin 352
368     Optional argument is ordinal number for repeatable fields. By default,
369 dpavlin 353 it's assume to be first repeatable field (fields are perl array, so first
370     element is 0).
371     Following example will read second value from repeatable field.
372 dpavlin 352
373 dpavlin 367 my $text = $webpac->fill_in($rec,'Title: v250^a',1);
374 dpavlin 353
375     This function B<does not> perform parsing of format to inteligenty skip
376     delimiters before fields which aren't used.
377    
378 dpavlin 352 =cut
379    
380 dpavlin 372 sub fill_in {
381 dpavlin 371 my $self = shift;
382    
383 dpavlin 372 my $log = $self->_get_logger();
384 dpavlin 371
385 dpavlin 372 my $rec = shift || $log->logconfess("need data record");
386     my $format = shift || $log->logconfess("need format to parse");
387 dpavlin 352 # iteration (for repeatable fields)
388     my $i = shift || 0;
389    
390     # FIXME remove for speedup?
391 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
392 dpavlin 352
393     my $found = 0;
394    
395 dpavlin 359 my $eval_code;
396     # remove eval{...} from beginning
397     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
398    
399 dpavlin 352 # do actual replacement of placeholders
400 dpavlin 373 $format =~ s/v(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
401 dpavlin 352
402 dpavlin 353 if ($found) {
403 dpavlin 373 $log->debug("format: $format");
404 dpavlin 359 if ($eval_code) {
405     my $eval = $self->fill_in($rec,$eval_code,$i);
406 dpavlin 371 return if (! $self->_eval($eval));
407 dpavlin 359 }
408 dpavlin 353 # do we have lookups?
409 dpavlin 373 if ($format =~ /$LOOKUP_REGEX/o) {
410     $log->debug("format '$format' has lookup");
411 dpavlin 353 return $self->lookup($format);
412     } else {
413     return $format;
414     }
415 dpavlin 352 } else {
416     return;
417     }
418     }
419    
420     =head2 lookup
421    
422 dpavlin 355 Perform lookups on format supplied to it.
423 dpavlin 352
424 dpavlin 367 my $text = $self->lookup('[v900]');
425 dpavlin 352
426 dpavlin 355 Lookups can be nested (like C<[d:[a:[v900]]]>).
427    
428 dpavlin 352 =cut
429    
430     sub lookup {
431     my $self = shift;
432    
433 dpavlin 372 my $log = $self->_get_logger();
434 dpavlin 352
435 dpavlin 372 my $tmp = shift || $log->logconfess("need format");
436    
437 dpavlin 373 if ($tmp =~ /$LOOKUP_REGEX/o) {
438 dpavlin 352 my @in = ( $tmp );
439 dpavlin 372
440     $log->debug("lookup for: ",$tmp);
441    
442 dpavlin 352 my @out;
443     while (my $f = shift @in) {
444 dpavlin 373 if ($f =~ /$LOOKUP_REGEX_SAVE/o) {
445 dpavlin 352 my $k = $1;
446     if ($self->{'lookup'}->{$k}) {
447     foreach my $nv (@{$self->{'lookup'}->{$k}}) {
448     my $tmp2 = $f;
449 dpavlin 373 $tmp2 =~ s/lookup{$k}/$nv/g;
450 dpavlin 352 push @in, $tmp2;
451     }
452     } else {
453     undef $f;
454     }
455     } elsif ($f) {
456     push @out, $f;
457     }
458     }
459 dpavlin 373 $log->logconfess("return is array and it's not expected!") unless wantarray;
460 dpavlin 352 return @out;
461     } else {
462     return $tmp;
463     }
464     }
465    
466 dpavlin 356 =head2 parse
467    
468     Perform smart parsing of string, skipping delimiters for fields which aren't
469     defined. It can also eval code in format starting with C<eval{...}> and
470     return output or nothing depending on eval code.
471    
472 dpavlin 367 my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
473 dpavlin 356
474     =cut
475    
476     sub parse {
477     my $self = shift;
478    
479 dpavlin 366 my ($rec, $format_utf8, $i) = @_;
480 dpavlin 356
481 dpavlin 366 return if (! $format_utf8);
482    
483 dpavlin 372 my $log = $self->_get_logger();
484 dpavlin 358
485 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
486     $log->logconfess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
487    
488 dpavlin 358 $i = 0 if (! $i);
489    
490 dpavlin 375 my $format = $self->_x($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
491 dpavlin 366
492 dpavlin 356 my @out;
493    
494 dpavlin 373 $log->debug("format: $format");
495    
496 dpavlin 356 my $eval_code;
497     # remove eval{...} from beginning
498     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
499    
500 dpavlin 358 my $prefix;
501     my $all_found=0;
502 dpavlin 356
503 dpavlin 373 while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))?//s) {
504 dpavlin 356
505 dpavlin 358 my $del = $1 || '';
506 dpavlin 359 $prefix ||= $del if ($all_found == 0);
507 dpavlin 358
508     my $found = 0;
509     my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
510    
511 dpavlin 356 if ($found) {
512 dpavlin 358 push @out, $del;
513     push @out, $tmp;
514     $all_found += $found;
515 dpavlin 356 }
516     }
517    
518 dpavlin 358 return if (! $all_found);
519 dpavlin 356
520 dpavlin 373 my $out = join('',@out);
521 dpavlin 358
522 dpavlin 373 if ($out) {
523     # add rest of format (suffix)
524     $out .= $format;
525 dpavlin 367
526 dpavlin 373 # add prefix if not there
527     $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
528    
529     $log->debug("result: $out");
530     }
531    
532 dpavlin 359 if ($eval_code) {
533     my $eval = $self->fill_in($rec,$eval_code,$i);
534 dpavlin 373 $log->debug("about to eval{",$eval,"} format: $out");
535 dpavlin 371 return if (! $self->_eval($eval));
536 dpavlin 359 }
537    
538 dpavlin 358 return $out;
539 dpavlin 356 }
540    
541 dpavlin 367 =head2 parse_to_arr
542    
543     Similar to C<parse>, but returns array of all repeatable fields
544    
545     my @arr = $webpac->parse_to_arr($rec,'v250^a');
546    
547     =cut
548    
549     sub parse_to_arr {
550     my $self = shift;
551    
552     my ($rec, $format_utf8) = @_;
553    
554 dpavlin 372 my $log = $self->_get_logger();
555    
556     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
557 dpavlin 367 return if (! $format_utf8);
558    
559     my $i = 0;
560     my @arr;
561    
562     while (my $v = $self->parse($rec,$format_utf8,$i++)) {
563     push @arr, $v;
564     }
565    
566 dpavlin 373 $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
567    
568 dpavlin 367 return @arr;
569     }
570    
571 dpavlin 373 =head2 fill_in_to_arr
572    
573     Similar to C<fill_in>, but returns array of all repeatable fields. Usable
574     for fields which have lookups, so they shouldn't be parsed but rather
575     C<fill_id>ed.
576    
577     my @arr = $webpac->fill_in_to_arr($rec,'[v900];;[v250^a]');
578    
579     =cut
580    
581     sub fill_in_to_arr {
582     my $self = shift;
583    
584     my ($rec, $format_utf8) = @_;
585    
586     my $log = $self->_get_logger();
587    
588     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
589     return if (! $format_utf8);
590    
591     my $i = 0;
592     my @arr;
593    
594     while (my @v = $self->fill_in($rec,$format_utf8,$i++)) {
595     push @arr, @v;
596     }
597    
598     $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
599    
600     return @arr;
601     }
602    
603    
604 dpavlin 366 =head2 data_structure
605    
606     Create in-memory data structure which represents layout from C<import_xml>.
607     It is used later to produce output.
608    
609 dpavlin 368 my @ds = $webpac->data_structure($rec);
610 dpavlin 366
611 dpavlin 374 This method will also set C<$webpac->{'currnet_filename'}> if there is
612     <filename> tag in C<import_xml>.
613    
614 dpavlin 366 =cut
615    
616 dpavlin 372 sub data_structure {
617 dpavlin 366 my $self = shift;
618    
619 dpavlin 372 my $log = $self->_get_logger();
620 dpavlin 366
621     my $rec = shift;
622 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
623 dpavlin 366
624 dpavlin 374 undef $self->{'currnet_filename'};
625    
626 dpavlin 366 my @sorted_tags;
627     if ($self->{tags_by_order}) {
628     @sorted_tags = @{$self->{tags_by_order}};
629     } else {
630     @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
631     $self->{tags_by_order} = \@sorted_tags;
632     }
633    
634 dpavlin 368 my @ds;
635 dpavlin 366
636 dpavlin 373 $log->debug("tags: ",sub { join(", ",@sorted_tags) });
637    
638 dpavlin 366 foreach my $field (@sorted_tags) {
639    
640     my $row;
641    
642     #print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
643    
644     foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
645 dpavlin 373 my $format = $tag->{'value'} || $tag->{'content'};
646 dpavlin 366
647 dpavlin 373 $log->debug("format: $format");
648    
649     my @v;
650     if ($format =~ /$LOOKUP_REGEX/o) {
651     @v = $self->fill_in_to_arr($rec,$format);
652     } else {
653     @v = $self->parse_to_arr($rec,$format);
654     }
655 dpavlin 367 next if (! @v);
656 dpavlin 366
657 dpavlin 375 # use format?
658     if ($tag->{'format_name'}) {
659     @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v;
660     }
661    
662 dpavlin 366 # does tag have type?
663     if ($tag->{'type'}) {
664 dpavlin 367 push @{$row->{$tag->{'type'}}}, @v;
665 dpavlin 366 } else {
666 dpavlin 367 push @{$row->{'display'}}, @v;
667     push @{$row->{'swish'}}, @v;
668 dpavlin 366 }
669 dpavlin 373
670 dpavlin 374 if ($field eq 'filename') {
671     $self->{'current_filename'} = join('',@v);
672     $log->debug("filename: ",$self->{'current_filename'});
673     }
674    
675 dpavlin 366 }
676    
677 dpavlin 368 if ($row) {
678     $row->{'tag'} = $field;
679 dpavlin 375
680     # TODO: name_sigular, name_plural
681     my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'};
682     $row->{'name'} = $name ? $self->_x($name) : $field;
683    
684 dpavlin 368 push @ds, $row;
685 dpavlin 374
686 dpavlin 373 $log->debug("row $field: ",sub { Dumper($row) });
687 dpavlin 368 }
688 dpavlin 366
689     }
690    
691 dpavlin 370 return @ds;
692 dpavlin 366
693     }
694    
695 dpavlin 370 =head2 output
696    
697     Create output from in-memory data structure using Template Toolkit template.
698    
699     my $text = $webpac->output( template => 'text.tt', data => @ds );
700    
701     =cut
702    
703     sub output {
704     my $self = shift;
705    
706     my $args = {@_};
707    
708 dpavlin 372 my $log = $self->_get_logger();
709 dpavlin 370
710 dpavlin 372 $log->logconfess("need template name") if (! $args->{'template'});
711     $log->logconfess("need data array") if (! $args->{'data'});
712    
713 dpavlin 370 my $out;
714    
715     $self->{'tt'}->process(
716     $args->{'template'},
717     $args,
718     \$out
719     ) || confess $self->{'tt'}->error();
720    
721     return $out;
722     }
723    
724 dpavlin 375 =head2 apply_format
725    
726     Apply format specified in tag with C<format_name="name"> and
727     C<format_delimiter=";;">.
728    
729     my $text = $webpac->apply_format($format_name,$format_delimiter,$data);
730    
731     Formats can contain C<lookup{...}> if you need them.
732    
733     =cut
734    
735     sub apply_format {
736     my $self = shift;
737    
738     my ($name,$delimiter,$data) = @_;
739    
740     my $log = $self->_get_logger();
741    
742     if (! $self->{'import_xml'}->{'format'}->{$name}) {
743     $log->warn("<format name=\"$name\"> is not defined in ",$self->{'import_xml_file'});
744     return $data;
745     }
746    
747     $log->warn("no delimiter for format $name") if (! $delimiter);
748    
749     my $format = $self->_x($self->{'import_xml'}->{'format'}->{$name}->{'content'}) || $log->logdie("can't find format '$name'");
750    
751     my @data = split(/\Q$delimiter\E/, $data);
752    
753     my $out = sprintf($format, @data);
754     $log->debug("using format $name [$format] on $data to produce: $out");
755    
756     if ($out =~ m/$LOOKUP_REGEX/o) {
757     return $self->lookup($out);
758     } else {
759     return $out;
760     }
761    
762     }
763    
764    
765 dpavlin 372 #
766     #
767     #
768    
769     =head1 INTERNAL METHODS
770    
771     Here is a quick list of internal methods, mostly useful to turn debugging
772     on them (see L<LOGGING> below for explanation).
773    
774     =cut
775    
776     =head2 _eval
777    
778     Internal function to eval code without C<strict 'subs'>.
779    
780     =cut
781    
782     sub _eval {
783     my $self = shift;
784    
785     my $code = shift || return;
786    
787     my $log = $self->_get_logger();
788    
789     no strict 'subs';
790     my $ret = eval $code;
791     if ($@) {
792     $log->error("problem with eval code [$code]: $@");
793     }
794    
795     $log->debug("eval: ",$code," [",$ret,"]");
796    
797     return $ret || 0;
798     }
799    
800     =head2 _sort_by_order
801    
802     Sort xml tags data structure accoding to C<order=""> attribute.
803    
804     =cut
805    
806     sub _sort_by_order {
807     my $self = shift;
808    
809     my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
810     $self->{'import_xml'}->{'indexer'}->{$a};
811     my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
812     $self->{'import_xml'}->{'indexer'}->{$b};
813    
814     return $va <=> $vb;
815     }
816    
817 dpavlin 375 =head2 _get_logger
818    
819     Get C<Log::Log4perl> object with a twist: domains are defined for each
820     method
821    
822     my $log = $webpac->_get_logger();
823    
824     =cut
825    
826 dpavlin 372 sub _get_logger {
827     my $self = shift;
828    
829 dpavlin 374 my $name = (caller(1))[3] || caller;
830     return get_logger($name);
831 dpavlin 372 }
832    
833 dpavlin 375 =head2 _x
834    
835     Convert string from UTF-8 to code page defined in C<import_xml>.
836    
837     my $text = $webpac->_x('utf8 text');
838    
839     =cut
840    
841     sub _x {
842     my $self = shift;
843     my $utf8 = shift || return;
844    
845     return $self->{'utf2cp'}->convert($utf8) ||
846     $self->_get_logger()->logwarn("can't convert '$utf8'");
847     }
848    
849 dpavlin 372 #
850     #
851     #
852    
853     =head1 LOGGING
854    
855     Logging in WebPAC is performed by L<Log::Log4perl> with config file
856     C<log.conf>.
857    
858     Methods defined above have different levels of logging, so
859     it's descriptions will be useful to turn (mostry B<debug> logging) on
860     or off to see why WabPAC isn't perforing as you expect it (it might even
861     be a bug!).
862    
863     B<This is different from normal Log4perl behaviour>. To repeat, you can
864     also use method names, and not only classes (which are just few)
865     to filter logging.
866    
867     =cut
868    
869 dpavlin 352 1;

  ViewVC Help
Powered by ViewVC 1.1.26