/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 398 - (hide annotations)
Sat Jul 24 13:48:08 2004 UTC (15 years, 6 months ago) by dpavlin
File size: 19828 byte(s)
moved headline information into $webpac->{'headline'} after data_structure is
called. This makes headline desapier from output templates, and namebles new
template veriable 'headline' to contain headline.

1 dpavlin 354 package WebPAC;
2 dpavlin 352
3 dpavlin 367 use warnings;
4     use strict;
5    
6 dpavlin 352 use Carp;
7 dpavlin 353 use Text::Iconv;
8     use Config::IniFiles;
9 dpavlin 363 use XML::Simple;
10 dpavlin 370 use Template;
11 dpavlin 372 use Log::Log4perl qw(get_logger :levels);
12 dpavlin 352
13 dpavlin 358 use Data::Dumper;
14    
15 dpavlin 373 #my $LOOKUP_REGEX = '\[[^\[\]]+\]';
16     #my $LOOKUP_REGEX_SAVE = '\[([^\[\]]+)\]';
17     my $LOOKUP_REGEX = 'lookup{[^\{\}]+}';
18     my $LOOKUP_REGEX_SAVE = 'lookup{([^\{\}]+)}';
19    
20 dpavlin 352 =head1 NAME
21    
22 dpavlin 354 WebPAC - base class for WebPAC
23 dpavlin 352
24     =head1 DESCRIPTION
25    
26 dpavlin 354 This module implements methods used by WebPAC.
27 dpavlin 352
28     =head1 METHODS
29    
30     =head2 new
31    
32 dpavlin 354 This will create new instance of WebPAC using configuration specified by C<config_file>.
33 dpavlin 352
34 dpavlin 354 my $webpac = new WebPAC(
35 dpavlin 352 config_file => 'name.conf',
36     [code_page => 'ISO-8859-2',]
37     );
38    
39     Default C<code_page> is C<ISO-8859-2>.
40    
41 dpavlin 353 It will also read configuration files
42     C<global.conf> (used by indexer and Web font-end)
43     and configuration file specified by C<config_file>
44     which describes databases to be indexed.
45    
46 dpavlin 352 =cut
47    
48 dpavlin 363 # mapping between data type and tag which specify
49     # format in XML file
50     my %type2tag = (
51     'isis' => 'isis',
52     # 'excel' => 'column',
53     # 'marc' => 'marc',
54     # 'feed' => 'feed'
55     );
56    
57 dpavlin 352 sub new {
58     my $class = shift;
59     my $self = {@_};
60     bless($self, $class);
61    
62 dpavlin 372 my $log_file = $self->{'log'} || "log.conf";
63     Log::Log4perl->init($log_file);
64    
65     my $log = $self->_get_logger();
66    
67 dpavlin 352 # fill in default values
68     # output codepage
69     $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
70    
71 dpavlin 353 #
72     # read global.conf
73     #
74 dpavlin 372 $log->debug("read 'global.conf'");
75 dpavlin 352
76 dpavlin 372 my $config = new Config::IniFiles( -file => 'global.conf' ) || $log->logcroak("can't open 'global.conf'");
77 dpavlin 352
78     # read global config parametars
79     foreach my $var (qw(
80     dbi_dbd
81     dbi_dsn
82     dbi_user
83     dbi_passwd
84     show_progress
85     my_unac_filter
86 dpavlin 370 output_template
87 dpavlin 352 )) {
88 dpavlin 370 $self->{'global_config'}->{$var} = $config->val('global', $var);
89 dpavlin 352 }
90    
91 dpavlin 353 #
92     # read indexer config file
93     #
94 dpavlin 352
95 dpavlin 372 $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || $log->logcroak("can't open '",$self->{config_file},"'");
96 dpavlin 352
97 dpavlin 370 # create UTF-8 convertor for import_xml files
98 dpavlin 366 $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
99 dpavlin 370
100     # create Template toolkit instance
101     $self->{'tt'} = Template->new(
102     INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} || './output_template'),
103     # FILTERS => {
104     # 'foo' => \&foo_filter,
105     # },
106     EVAL_PERL => 1,
107     );
108    
109 dpavlin 352 return $self;
110     }
111    
112     =head2 open_isis
113    
114     Open CDS/ISIS database using OpenIsis module and read all records to memory.
115    
116     $webpac->open_isis(
117     filename => '/data/ISIS/ISIS',
118     code_page => '852',
119     limit_mfn => '500',
120     lookup => [ ... ],
121     );
122    
123     By default, ISIS code page is assumed to be C<852>.
124    
125 dpavlin 353 If optional parametar C<limit_mfn> is set, it will read just 500 records
126     from database in example above.
127 dpavlin 352
128     C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
129     C<val>. Optional parametar C<eval> is perl code to evaluate before storing
130     value in index.
131    
132     lookup => [
133     { 'key' => 'd:v900', 'val' => 'v250^a' },
134     { 'eval' => '"v901^a" eq "Podruèje"',
135     'key' => 'pa:v561^4:v562^4:v461^1',
136     'val' => 'v900' },
137     ]
138    
139 dpavlin 367 Returns number of last record read into memory (size of database, really).
140    
141 dpavlin 352 =cut
142    
143     sub open_isis {
144     my $self = shift;
145     my $arg = {@_};
146    
147 dpavlin 372 my $log = $self->_get_logger();
148    
149     $log->logcroak("need filename") if (! $arg->{'filename'});
150 dpavlin 352 my $code_page = $arg->{'code_page'} || '852';
151    
152 dpavlin 353 use OpenIsis;
153    
154 dpavlin 352 #$self->{'isis_code_page'} = $code_page;
155    
156     # create Text::Iconv object
157     my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
158    
159 dpavlin 372 $log->info("reading ISIS database '",$arg->{'filename'},"'");
160 dpavlin 363
161 dpavlin 352 my $isis_db = OpenIsis::open($arg->{'filename'});
162    
163     my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
164    
165 dpavlin 357 $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
166    
167 dpavlin 372 $log->info("processing $maxmfn records...");
168 dpavlin 363
169 dpavlin 352 # read database
170     for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
171    
172     # read record
173     my $row = OpenIsis::read( $isis_db, $mfn );
174     foreach my $k (keys %{$row}) {
175     if ($k ne "mfn") {
176     foreach my $l (@{$row->{$k}}) {
177     $l = $cp->convert($l);
178     # has subfields?
179     my $val;
180     if ($l =~ m/\^/) {
181     foreach my $t (split(/\^/,$l)) {
182     next if (! $t);
183     $val->{substr($t,0,1)} = substr($t,1);
184     }
185     } else {
186     $val = $l;
187     }
188    
189     push @{$self->{'data'}->{$mfn}->{$k}}, $val;
190     }
191 dpavlin 374 } else {
192     push @{$self->{'data'}->{$mfn}->{'000'}}, $mfn;
193 dpavlin 352 }
194    
195     }
196    
197     # create lookup
198 dpavlin 355 my $rec = $self->{'data'}->{$mfn};
199     $self->create_lookup($rec, @{$arg->{'lookup'}});
200 dpavlin 352
201 dpavlin 389 $self->progress_bar($mfn,$maxmfn);
202    
203 dpavlin 352 }
204    
205 dpavlin 362 $self->{'current_mfn'} = 1;
206 dpavlin 389 $self->{'last_pcnt'} = 0;
207 dpavlin 362
208 dpavlin 352 # store max mfn and return it.
209     return $self->{'max_mfn'} = $maxmfn;
210     }
211    
212 dpavlin 362 =head2 fetch_rec
213    
214     Fetch next record from database. It will also display progress bar (once
215     it's implemented, that is).
216    
217     my $rec = $webpac->fetch_rec;
218    
219     =cut
220    
221     sub fetch_rec {
222     my $self = shift;
223    
224 dpavlin 372 my $log = $self->_get_logger();
225 dpavlin 362
226 dpavlin 372 my $mfn = $self->{'current_mfn'}++ || $log->logconfess("it seems that you didn't load database!");
227    
228 dpavlin 362 if ($mfn > $self->{'max_mfn'}) {
229     $self->{'current_mfn'} = $self->{'max_mfn'};
230 dpavlin 373 $log->debug("at EOF");
231 dpavlin 362 return;
232     }
233    
234 dpavlin 389 $self->progress_bar($mfn,$self->{'max_mfn'});
235    
236 dpavlin 362 return $self->{'data'}->{$mfn};
237     }
238    
239 dpavlin 389 =head2 progress_bar
240    
241     Draw progress bar on STDERR.
242    
243     $webpac->progress_bar($current, $max);
244    
245     =cut
246    
247     sub progress_bar {
248     my $self = shift;
249    
250     my ($curr,$max) = @_;
251    
252     my $log = $self->_get_logger();
253    
254     $log->logconfess("no current value!") if (! $curr);
255     $log->logconfess("no maximum value!") if (! $max);
256    
257     if ($curr > $max) {
258     $max = $curr;
259     $log->debug("overflow to $curr");
260     }
261    
262     $self->{'last_pcnt'} ||= 1;
263    
264     $self->{'last_pcnt'} = $curr if ($curr < $self->{'last_pcnt'});
265    
266     my $p = int($curr * 100 / $max);
267     if ($p != $self->{'last_pcnt'}) {
268     printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$curr,$max,"=" x ($p/2).">", $p );
269     $self->{'last_pcnt'} = $p;
270     }
271     }
272    
273 dpavlin 363 =head2 open_import_xml
274    
275     Read file from C<import_xml/> directory and parse it.
276    
277     $webpac->open_import_xml(type => 'isis');
278    
279     =cut
280    
281     sub open_import_xml {
282     my $self = shift;
283    
284 dpavlin 372 my $log = $self->_get_logger();
285    
286 dpavlin 363 my $arg = {@_};
287 dpavlin 372 $log->logconfess("need type to load file from import_xml/") if (! $arg->{'type'});
288 dpavlin 363
289 dpavlin 366 $self->{'type'} = $arg->{'type'};
290 dpavlin 363
291 dpavlin 366 my $type_base = $arg->{'type'};
292 dpavlin 363 $type_base =~ s/_.*$//g;
293    
294 dpavlin 366 $self->{'tag'} = $type2tag{$type_base};
295    
296 dpavlin 375 $log->info("using type '",$self->{'type'},"' tag <",$self->{'tag'},">");
297 dpavlin 366
298     my $f = "./import_xml/".$self->{'type'}.".xml";
299 dpavlin 372 $log->logconfess("import_xml file '$f' doesn't exist!") if (! -e "$f");
300 dpavlin 363
301 dpavlin 375 $log->info("reading '$f'");
302 dpavlin 363
303 dpavlin 375 $self->{'import_xml_file'} = $f;
304    
305 dpavlin 363 $self->{'import_xml'} = XMLin($f,
306 dpavlin 366 ForceArray => [ $self->{'tag'}, 'config', 'format' ],
307 dpavlin 363 );
308    
309 dpavlin 375 $log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) });
310    
311 dpavlin 363 }
312    
313 dpavlin 355 =head2 create_lookup
314    
315     Create lookup from record using lookup definition.
316    
317 dpavlin 367 $self->create_lookup($rec, @lookups);
318    
319     Called internally by C<open_*> methods.
320    
321 dpavlin 355 =cut
322    
323     sub create_lookup {
324     my $self = shift;
325    
326 dpavlin 372 my $log = $self->_get_logger();
327 dpavlin 355
328 dpavlin 372 my $rec = shift || $log->logconfess("need record to create lookup");
329     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
330    
331 dpavlin 355 foreach my $i (@_) {
332     if ($i->{'eval'}) {
333     my $eval = $self->fill_in($rec,$i->{'eval'});
334     my $key = $self->fill_in($rec,$i->{'key'});
335     my @val = $self->fill_in($rec,$i->{'val'});
336     if ($key && @val && eval $eval) {
337 dpavlin 373 $log->debug("stored $key = ",sub { join(" | ",@val) });
338 dpavlin 355 push @{$self->{'lookup'}->{$key}}, @val;
339     }
340     } else {
341     my $key = $self->fill_in($rec,$i->{'key'});
342     my @val = $self->fill_in($rec,$i->{'val'});
343     if ($key && @val) {
344 dpavlin 373 $log->debug("stored $key = ",sub { join(" | ",@val) });
345 dpavlin 355 push @{$self->{'lookup'}->{$key}}, @val;
346     }
347     }
348     }
349     }
350    
351 dpavlin 356 =head2 get_data
352    
353     Returns value from record.
354    
355 dpavlin 367 my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);
356 dpavlin 356
357     Arguments are:
358     record reference C<$rec>,
359     field C<$f>,
360     optional subfiled C<$sf>,
361     index for repeatable values C<$i>.
362    
363 dpavlin 367 Optinal variable C<$found> will be incremeted if there
364 dpavlin 356 is field.
365    
366     Returns value or empty string.
367    
368     =cut
369    
370     sub get_data {
371     my $self = shift;
372    
373     my ($rec,$f,$sf,$i,$found) = @_;
374 dpavlin 367
375 dpavlin 356 if ($$rec->{$f}) {
376 dpavlin 367 return '' if (! $$rec->{$f}->[$i]);
377 dpavlin 389 no strict 'refs';
378 dpavlin 356 if ($sf && $$rec->{$f}->[$i]->{$sf}) {
379     $$found++ if (defined($$found));
380     return $$rec->{$f}->[$i]->{$sf};
381     } elsif ($$rec->{$f}->[$i]) {
382     $$found++ if (defined($$found));
383 dpavlin 366 # it still might have subfield, just
384     # not specified, so we'll dump all
385     if ($$rec->{$f}->[$i] =~ /HASH/o) {
386     my $out;
387     foreach my $k (keys %{$$rec->{$f}->[$i]}) {
388     $out .= $$rec->{$f}->[$i]->{$k}." ";
389     }
390     return $out;
391     } else {
392     return $$rec->{$f}->[$i];
393     }
394 dpavlin 356 }
395     } else {
396     return '';
397     }
398     }
399    
400 dpavlin 352 =head2 fill_in
401    
402     Workhourse of all: takes record from in-memory structure of database and
403     strings with placeholders and returns string or array of with substituted
404     values from record.
405    
406 dpavlin 367 my $text = $webpac->fill_in($rec,'v250^a');
407 dpavlin 352
408     Optional argument is ordinal number for repeatable fields. By default,
409 dpavlin 353 it's assume to be first repeatable field (fields are perl array, so first
410     element is 0).
411     Following example will read second value from repeatable field.
412 dpavlin 352
413 dpavlin 367 my $text = $webpac->fill_in($rec,'Title: v250^a',1);
414 dpavlin 353
415     This function B<does not> perform parsing of format to inteligenty skip
416     delimiters before fields which aren't used.
417    
418 dpavlin 376 This method will automatically decode UTF-8 string to local code page
419     if needed.
420    
421 dpavlin 352 =cut
422    
423 dpavlin 372 sub fill_in {
424 dpavlin 371 my $self = shift;
425    
426 dpavlin 372 my $log = $self->_get_logger();
427 dpavlin 371
428 dpavlin 372 my $rec = shift || $log->logconfess("need data record");
429     my $format = shift || $log->logconfess("need format to parse");
430 dpavlin 352 # iteration (for repeatable fields)
431     my $i = shift || 0;
432    
433     # FIXME remove for speedup?
434 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
435 dpavlin 352
436 dpavlin 376 if (utf8::is_utf8($format)) {
437     $format = $self->_x($format);
438     }
439    
440 dpavlin 352 my $found = 0;
441    
442 dpavlin 359 my $eval_code;
443     # remove eval{...} from beginning
444     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
445    
446 dpavlin 352 # do actual replacement of placeholders
447 dpavlin 373 $format =~ s/v(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
448 dpavlin 352
449 dpavlin 353 if ($found) {
450 dpavlin 373 $log->debug("format: $format");
451 dpavlin 359 if ($eval_code) {
452     my $eval = $self->fill_in($rec,$eval_code,$i);
453 dpavlin 371 return if (! $self->_eval($eval));
454 dpavlin 359 }
455 dpavlin 353 # do we have lookups?
456 dpavlin 373 if ($format =~ /$LOOKUP_REGEX/o) {
457     $log->debug("format '$format' has lookup");
458 dpavlin 353 return $self->lookup($format);
459     } else {
460     return $format;
461     }
462 dpavlin 352 } else {
463     return;
464     }
465     }
466    
467     =head2 lookup
468    
469 dpavlin 355 Perform lookups on format supplied to it.
470 dpavlin 352
471 dpavlin 367 my $text = $self->lookup('[v900]');
472 dpavlin 352
473 dpavlin 355 Lookups can be nested (like C<[d:[a:[v900]]]>).
474    
475 dpavlin 352 =cut
476    
477     sub lookup {
478     my $self = shift;
479    
480 dpavlin 372 my $log = $self->_get_logger();
481 dpavlin 352
482 dpavlin 372 my $tmp = shift || $log->logconfess("need format");
483    
484 dpavlin 373 if ($tmp =~ /$LOOKUP_REGEX/o) {
485 dpavlin 352 my @in = ( $tmp );
486 dpavlin 372
487     $log->debug("lookup for: ",$tmp);
488    
489 dpavlin 352 my @out;
490     while (my $f = shift @in) {
491 dpavlin 373 if ($f =~ /$LOOKUP_REGEX_SAVE/o) {
492 dpavlin 352 my $k = $1;
493     if ($self->{'lookup'}->{$k}) {
494     foreach my $nv (@{$self->{'lookup'}->{$k}}) {
495     my $tmp2 = $f;
496 dpavlin 373 $tmp2 =~ s/lookup{$k}/$nv/g;
497 dpavlin 352 push @in, $tmp2;
498     }
499     } else {
500     undef $f;
501     }
502     } elsif ($f) {
503     push @out, $f;
504     }
505     }
506 dpavlin 373 $log->logconfess("return is array and it's not expected!") unless wantarray;
507 dpavlin 352 return @out;
508     } else {
509     return $tmp;
510     }
511     }
512    
513 dpavlin 356 =head2 parse
514    
515     Perform smart parsing of string, skipping delimiters for fields which aren't
516     defined. It can also eval code in format starting with C<eval{...}> and
517     return output or nothing depending on eval code.
518    
519 dpavlin 367 my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
520 dpavlin 356
521     =cut
522    
523     sub parse {
524     my $self = shift;
525    
526 dpavlin 366 my ($rec, $format_utf8, $i) = @_;
527 dpavlin 356
528 dpavlin 366 return if (! $format_utf8);
529    
530 dpavlin 372 my $log = $self->_get_logger();
531 dpavlin 358
532 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
533     $log->logconfess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
534    
535 dpavlin 358 $i = 0 if (! $i);
536    
537 dpavlin 375 my $format = $self->_x($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
538 dpavlin 366
539 dpavlin 356 my @out;
540    
541 dpavlin 373 $log->debug("format: $format");
542    
543 dpavlin 356 my $eval_code;
544     # remove eval{...} from beginning
545     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
546    
547 dpavlin 358 my $prefix;
548     my $all_found=0;
549 dpavlin 356
550 dpavlin 373 while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))?//s) {
551 dpavlin 356
552 dpavlin 358 my $del = $1 || '';
553 dpavlin 359 $prefix ||= $del if ($all_found == 0);
554 dpavlin 358
555     my $found = 0;
556     my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
557    
558 dpavlin 356 if ($found) {
559 dpavlin 358 push @out, $del;
560     push @out, $tmp;
561     $all_found += $found;
562 dpavlin 356 }
563     }
564    
565 dpavlin 358 return if (! $all_found);
566 dpavlin 356
567 dpavlin 373 my $out = join('',@out);
568 dpavlin 358
569 dpavlin 373 if ($out) {
570     # add rest of format (suffix)
571     $out .= $format;
572 dpavlin 367
573 dpavlin 373 # add prefix if not there
574     $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
575    
576     $log->debug("result: $out");
577     }
578    
579 dpavlin 359 if ($eval_code) {
580     my $eval = $self->fill_in($rec,$eval_code,$i);
581 dpavlin 373 $log->debug("about to eval{",$eval,"} format: $out");
582 dpavlin 371 return if (! $self->_eval($eval));
583 dpavlin 359 }
584    
585 dpavlin 358 return $out;
586 dpavlin 356 }
587    
588 dpavlin 367 =head2 parse_to_arr
589    
590     Similar to C<parse>, but returns array of all repeatable fields
591    
592     my @arr = $webpac->parse_to_arr($rec,'v250^a');
593    
594     =cut
595    
596     sub parse_to_arr {
597     my $self = shift;
598    
599     my ($rec, $format_utf8) = @_;
600    
601 dpavlin 372 my $log = $self->_get_logger();
602    
603     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
604 dpavlin 367 return if (! $format_utf8);
605    
606     my $i = 0;
607     my @arr;
608    
609     while (my $v = $self->parse($rec,$format_utf8,$i++)) {
610     push @arr, $v;
611     }
612    
613 dpavlin 373 $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
614    
615 dpavlin 367 return @arr;
616     }
617    
618 dpavlin 373 =head2 fill_in_to_arr
619    
620     Similar to C<fill_in>, but returns array of all repeatable fields. Usable
621     for fields which have lookups, so they shouldn't be parsed but rather
622     C<fill_id>ed.
623    
624     my @arr = $webpac->fill_in_to_arr($rec,'[v900];;[v250^a]');
625    
626     =cut
627    
628     sub fill_in_to_arr {
629     my $self = shift;
630    
631     my ($rec, $format_utf8) = @_;
632    
633     my $log = $self->_get_logger();
634    
635     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
636     return if (! $format_utf8);
637    
638     my $i = 0;
639     my @arr;
640    
641     while (my @v = $self->fill_in($rec,$format_utf8,$i++)) {
642     push @arr, @v;
643     }
644    
645     $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
646    
647     return @arr;
648     }
649    
650    
651 dpavlin 366 =head2 data_structure
652    
653     Create in-memory data structure which represents layout from C<import_xml>.
654     It is used later to produce output.
655    
656 dpavlin 368 my @ds = $webpac->data_structure($rec);
657 dpavlin 366
658 dpavlin 374 This method will also set C<$webpac->{'currnet_filename'}> if there is
659 dpavlin 398 <filename> tag in C<import_xml> and C<$webpac->{'headline'}> if there is
660     <headline> tag.
661 dpavlin 374
662 dpavlin 366 =cut
663    
664 dpavlin 372 sub data_structure {
665 dpavlin 366 my $self = shift;
666    
667 dpavlin 372 my $log = $self->_get_logger();
668 dpavlin 366
669     my $rec = shift;
670 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
671 dpavlin 366
672 dpavlin 374 undef $self->{'currnet_filename'};
673 dpavlin 398 undef $self->{'headline'};
674 dpavlin 374
675 dpavlin 366 my @sorted_tags;
676     if ($self->{tags_by_order}) {
677     @sorted_tags = @{$self->{tags_by_order}};
678     } else {
679     @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
680     $self->{tags_by_order} = \@sorted_tags;
681     }
682    
683 dpavlin 368 my @ds;
684 dpavlin 366
685 dpavlin 373 $log->debug("tags: ",sub { join(", ",@sorted_tags) });
686    
687 dpavlin 366 foreach my $field (@sorted_tags) {
688    
689     my $row;
690    
691     #print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
692    
693     foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
694 dpavlin 373 my $format = $tag->{'value'} || $tag->{'content'};
695 dpavlin 366
696 dpavlin 373 $log->debug("format: $format");
697    
698     my @v;
699     if ($format =~ /$LOOKUP_REGEX/o) {
700     @v = $self->fill_in_to_arr($rec,$format);
701     } else {
702     @v = $self->parse_to_arr($rec,$format);
703     }
704 dpavlin 367 next if (! @v);
705 dpavlin 366
706 dpavlin 375 # use format?
707     if ($tag->{'format_name'}) {
708     @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v;
709     }
710    
711 dpavlin 398 if ($field eq 'filename') {
712     $self->{'current_filename'} = join('',@v);
713     $log->debug("filename: ",$self->{'current_filename'});
714     } elsif ($field eq 'headline') {
715     $self->{'headline'} .= join('',@v);
716     $log->debug("headline: ",$self->{'headline'});
717     next; # don't return headline in data_structure!
718     }
719    
720 dpavlin 366 # does tag have type?
721     if ($tag->{'type'}) {
722 dpavlin 367 push @{$row->{$tag->{'type'}}}, @v;
723 dpavlin 366 } else {
724 dpavlin 367 push @{$row->{'display'}}, @v;
725     push @{$row->{'swish'}}, @v;
726 dpavlin 366 }
727 dpavlin 373
728 dpavlin 374
729 dpavlin 366 }
730    
731 dpavlin 368 if ($row) {
732     $row->{'tag'} = $field;
733 dpavlin 375
734     # TODO: name_sigular, name_plural
735     my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'};
736     $row->{'name'} = $name ? $self->_x($name) : $field;
737    
738 dpavlin 368 push @ds, $row;
739 dpavlin 374
740 dpavlin 373 $log->debug("row $field: ",sub { Dumper($row) });
741 dpavlin 368 }
742 dpavlin 366
743     }
744    
745 dpavlin 370 return @ds;
746 dpavlin 366
747     }
748    
749 dpavlin 370 =head2 output
750    
751     Create output from in-memory data structure using Template Toolkit template.
752    
753     my $text = $webpac->output( template => 'text.tt', data => @ds );
754    
755     =cut
756    
757     sub output {
758     my $self = shift;
759    
760     my $args = {@_};
761    
762 dpavlin 372 my $log = $self->_get_logger();
763 dpavlin 370
764 dpavlin 372 $log->logconfess("need template name") if (! $args->{'template'});
765     $log->logconfess("need data array") if (! $args->{'data'});
766    
767 dpavlin 370 my $out;
768    
769     $self->{'tt'}->process(
770     $args->{'template'},
771     $args,
772     \$out
773     ) || confess $self->{'tt'}->error();
774    
775     return $out;
776     }
777    
778 dpavlin 375 =head2 apply_format
779    
780     Apply format specified in tag with C<format_name="name"> and
781     C<format_delimiter=";;">.
782    
783     my $text = $webpac->apply_format($format_name,$format_delimiter,$data);
784    
785     Formats can contain C<lookup{...}> if you need them.
786    
787     =cut
788    
789     sub apply_format {
790     my $self = shift;
791    
792     my ($name,$delimiter,$data) = @_;
793    
794     my $log = $self->_get_logger();
795    
796     if (! $self->{'import_xml'}->{'format'}->{$name}) {
797     $log->warn("<format name=\"$name\"> is not defined in ",$self->{'import_xml_file'});
798     return $data;
799     }
800    
801     $log->warn("no delimiter for format $name") if (! $delimiter);
802    
803     my $format = $self->_x($self->{'import_xml'}->{'format'}->{$name}->{'content'}) || $log->logdie("can't find format '$name'");
804    
805     my @data = split(/\Q$delimiter\E/, $data);
806    
807     my $out = sprintf($format, @data);
808     $log->debug("using format $name [$format] on $data to produce: $out");
809    
810     if ($out =~ m/$LOOKUP_REGEX/o) {
811     return $self->lookup($out);
812     } else {
813     return $out;
814     }
815    
816     }
817    
818    
819 dpavlin 372 #
820     #
821     #
822    
823     =head1 INTERNAL METHODS
824    
825     Here is a quick list of internal methods, mostly useful to turn debugging
826     on them (see L<LOGGING> below for explanation).
827    
828     =cut
829    
830     =head2 _eval
831    
832     Internal function to eval code without C<strict 'subs'>.
833    
834     =cut
835    
836     sub _eval {
837     my $self = shift;
838    
839     my $code = shift || return;
840    
841     my $log = $self->_get_logger();
842    
843     no strict 'subs';
844     my $ret = eval $code;
845     if ($@) {
846     $log->error("problem with eval code [$code]: $@");
847     }
848    
849     $log->debug("eval: ",$code," [",$ret,"]");
850    
851     return $ret || 0;
852     }
853    
854     =head2 _sort_by_order
855    
856     Sort xml tags data structure accoding to C<order=""> attribute.
857    
858     =cut
859    
860     sub _sort_by_order {
861     my $self = shift;
862    
863     my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
864     $self->{'import_xml'}->{'indexer'}->{$a};
865     my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
866     $self->{'import_xml'}->{'indexer'}->{$b};
867    
868     return $va <=> $vb;
869     }
870    
871 dpavlin 375 =head2 _get_logger
872    
873     Get C<Log::Log4perl> object with a twist: domains are defined for each
874     method
875    
876     my $log = $webpac->_get_logger();
877    
878     =cut
879    
880 dpavlin 372 sub _get_logger {
881     my $self = shift;
882    
883 dpavlin 374 my $name = (caller(1))[3] || caller;
884     return get_logger($name);
885 dpavlin 372 }
886    
887 dpavlin 375 =head2 _x
888    
889     Convert string from UTF-8 to code page defined in C<import_xml>.
890    
891     my $text = $webpac->_x('utf8 text');
892    
893     =cut
894    
895     sub _x {
896     my $self = shift;
897     my $utf8 = shift || return;
898    
899     return $self->{'utf2cp'}->convert($utf8) ||
900     $self->_get_logger()->logwarn("can't convert '$utf8'");
901     }
902    
903 dpavlin 372 #
904     #
905     #
906    
907     =head1 LOGGING
908    
909     Logging in WebPAC is performed by L<Log::Log4perl> with config file
910     C<log.conf>.
911    
912     Methods defined above have different levels of logging, so
913     it's descriptions will be useful to turn (mostry B<debug> logging) on
914     or off to see why WabPAC isn't perforing as you expect it (it might even
915     be a bug!).
916    
917     B<This is different from normal Log4perl behaviour>. To repeat, you can
918     also use method names, and not only classes (which are just few)
919     to filter logging.
920    
921     =cut
922    
923 dpavlin 352 1;

  ViewVC Help
Powered by ViewVC 1.1.26