/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 412 - (hide annotations)
Tue Sep 7 18:01:36 2004 UTC (19 years, 6 months ago) by dpavlin
File size: 20538 byte(s)
print lf is 100% (so that rest of output starts in new line)

1 dpavlin 354 package WebPAC;
2 dpavlin 352
3 dpavlin 367 use warnings;
4     use strict;
5    
6 dpavlin 352 use Carp;
7 dpavlin 353 use Text::Iconv;
8     use Config::IniFiles;
9 dpavlin 363 use XML::Simple;
10 dpavlin 370 use Template;
11 dpavlin 372 use Log::Log4perl qw(get_logger :levels);
12 dpavlin 352
13 dpavlin 358 use Data::Dumper;
14    
15 dpavlin 373 #my $LOOKUP_REGEX = '\[[^\[\]]+\]';
16     #my $LOOKUP_REGEX_SAVE = '\[([^\[\]]+)\]';
17     my $LOOKUP_REGEX = 'lookup{[^\{\}]+}';
18     my $LOOKUP_REGEX_SAVE = 'lookup{([^\{\}]+)}';
19    
20 dpavlin 352 =head1 NAME
21    
22 dpavlin 354 WebPAC - base class for WebPAC
23 dpavlin 352
24     =head1 DESCRIPTION
25    
26 dpavlin 354 This module implements methods used by WebPAC.
27 dpavlin 352
28     =head1 METHODS
29    
30     =head2 new
31    
32 dpavlin 354 This will create new instance of WebPAC using configuration specified by C<config_file>.
33 dpavlin 352
34 dpavlin 354 my $webpac = new WebPAC(
35 dpavlin 352 config_file => 'name.conf',
36     [code_page => 'ISO-8859-2',]
37     );
38    
39     Default C<code_page> is C<ISO-8859-2>.
40    
41 dpavlin 353 It will also read configuration files
42     C<global.conf> (used by indexer and Web font-end)
43     and configuration file specified by C<config_file>
44     which describes databases to be indexed.
45    
46 dpavlin 352 =cut
47    
48 dpavlin 363 # mapping between data type and tag which specify
49     # format in XML file
50     my %type2tag = (
51     'isis' => 'isis',
52     # 'excel' => 'column',
53     # 'marc' => 'marc',
54     # 'feed' => 'feed'
55     );
56    
57 dpavlin 352 sub new {
58     my $class = shift;
59     my $self = {@_};
60     bless($self, $class);
61    
62 dpavlin 372 my $log_file = $self->{'log'} || "log.conf";
63     Log::Log4perl->init($log_file);
64    
65     my $log = $self->_get_logger();
66    
67 dpavlin 352 # fill in default values
68     # output codepage
69     $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
70    
71 dpavlin 353 #
72     # read global.conf
73     #
74 dpavlin 372 $log->debug("read 'global.conf'");
75 dpavlin 352
76 dpavlin 372 my $config = new Config::IniFiles( -file => 'global.conf' ) || $log->logcroak("can't open 'global.conf'");
77 dpavlin 352
78     # read global config parametars
79     foreach my $var (qw(
80     dbi_dbd
81     dbi_dsn
82     dbi_user
83     dbi_passwd
84     show_progress
85     my_unac_filter
86 dpavlin 370 output_template
87 dpavlin 352 )) {
88 dpavlin 370 $self->{'global_config'}->{$var} = $config->val('global', $var);
89 dpavlin 352 }
90    
91 dpavlin 353 #
92     # read indexer config file
93     #
94 dpavlin 352
95 dpavlin 372 $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || $log->logcroak("can't open '",$self->{config_file},"'");
96 dpavlin 352
97 dpavlin 370 # create UTF-8 convertor for import_xml files
98 dpavlin 366 $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
99 dpavlin 370
100     # create Template toolkit instance
101     $self->{'tt'} = Template->new(
102     INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} || './output_template'),
103     # FILTERS => {
104     # 'foo' => \&foo_filter,
105     # },
106     EVAL_PERL => 1,
107     );
108    
109 dpavlin 352 return $self;
110     }
111    
112     =head2 open_isis
113    
114     Open CDS/ISIS database using OpenIsis module and read all records to memory.
115    
116     $webpac->open_isis(
117     filename => '/data/ISIS/ISIS',
118     code_page => '852',
119     limit_mfn => '500',
120     lookup => [ ... ],
121     );
122    
123     By default, ISIS code page is assumed to be C<852>.
124    
125 dpavlin 353 If optional parametar C<limit_mfn> is set, it will read just 500 records
126     from database in example above.
127 dpavlin 352
128     C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
129     C<val>. Optional parametar C<eval> is perl code to evaluate before storing
130     value in index.
131    
132     lookup => [
133     { 'key' => 'd:v900', 'val' => 'v250^a' },
134     { 'eval' => '"v901^a" eq "Podruèje"',
135     'key' => 'pa:v561^4:v562^4:v461^1',
136     'val' => 'v900' },
137     ]
138    
139 dpavlin 367 Returns number of last record read into memory (size of database, really).
140    
141 dpavlin 352 =cut
142    
143     sub open_isis {
144     my $self = shift;
145     my $arg = {@_};
146    
147 dpavlin 372 my $log = $self->_get_logger();
148    
149     $log->logcroak("need filename") if (! $arg->{'filename'});
150 dpavlin 352 my $code_page = $arg->{'code_page'} || '852';
151    
152 dpavlin 353 use OpenIsis;
153    
154 dpavlin 352 #$self->{'isis_code_page'} = $code_page;
155    
156     # create Text::Iconv object
157     my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
158    
159 dpavlin 372 $log->info("reading ISIS database '",$arg->{'filename'},"'");
160 dpavlin 363
161 dpavlin 352 my $isis_db = OpenIsis::open($arg->{'filename'});
162    
163     my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
164    
165 dpavlin 357 $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
166    
167 dpavlin 372 $log->info("processing $maxmfn records...");
168 dpavlin 363
169 dpavlin 352 # read database
170     for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
171    
172     # read record
173     my $row = OpenIsis::read( $isis_db, $mfn );
174     foreach my $k (keys %{$row}) {
175     if ($k ne "mfn") {
176     foreach my $l (@{$row->{$k}}) {
177     $l = $cp->convert($l);
178     # has subfields?
179     my $val;
180     if ($l =~ m/\^/) {
181     foreach my $t (split(/\^/,$l)) {
182     next if (! $t);
183     $val->{substr($t,0,1)} = substr($t,1);
184     }
185     } else {
186     $val = $l;
187     }
188    
189     push @{$self->{'data'}->{$mfn}->{$k}}, $val;
190     }
191 dpavlin 374 } else {
192     push @{$self->{'data'}->{$mfn}->{'000'}}, $mfn;
193 dpavlin 352 }
194    
195     }
196    
197     # create lookup
198 dpavlin 355 my $rec = $self->{'data'}->{$mfn};
199     $self->create_lookup($rec, @{$arg->{'lookup'}});
200 dpavlin 352
201 dpavlin 389 $self->progress_bar($mfn,$maxmfn);
202    
203 dpavlin 352 }
204    
205 dpavlin 362 $self->{'current_mfn'} = 1;
206 dpavlin 389 $self->{'last_pcnt'} = 0;
207 dpavlin 362
208 dpavlin 352 # store max mfn and return it.
209     return $self->{'max_mfn'} = $maxmfn;
210     }
211    
212 dpavlin 362 =head2 fetch_rec
213    
214     Fetch next record from database. It will also display progress bar (once
215     it's implemented, that is).
216    
217     my $rec = $webpac->fetch_rec;
218    
219     =cut
220    
221     sub fetch_rec {
222     my $self = shift;
223    
224 dpavlin 372 my $log = $self->_get_logger();
225 dpavlin 362
226 dpavlin 372 my $mfn = $self->{'current_mfn'}++ || $log->logconfess("it seems that you didn't load database!");
227    
228 dpavlin 362 if ($mfn > $self->{'max_mfn'}) {
229     $self->{'current_mfn'} = $self->{'max_mfn'};
230 dpavlin 373 $log->debug("at EOF");
231 dpavlin 362 return;
232     }
233    
234 dpavlin 389 $self->progress_bar($mfn,$self->{'max_mfn'});
235    
236 dpavlin 362 return $self->{'data'}->{$mfn};
237     }
238    
239 dpavlin 389 =head2 progress_bar
240    
241     Draw progress bar on STDERR.
242    
243     $webpac->progress_bar($current, $max);
244    
245     =cut
246    
247     sub progress_bar {
248     my $self = shift;
249    
250     my ($curr,$max) = @_;
251    
252     my $log = $self->_get_logger();
253    
254     $log->logconfess("no current value!") if (! $curr);
255     $log->logconfess("no maximum value!") if (! $max);
256    
257     if ($curr > $max) {
258     $max = $curr;
259     $log->debug("overflow to $curr");
260     }
261    
262     $self->{'last_pcnt'} ||= 1;
263    
264     $self->{'last_pcnt'} = $curr if ($curr < $self->{'last_pcnt'});
265    
266     my $p = int($curr * 100 / $max);
267     if ($p != $self->{'last_pcnt'}) {
268     printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$curr,$max,"=" x ($p/2).">", $p );
269     $self->{'last_pcnt'} = $p;
270     }
271 dpavlin 412 print STDERR "\n" if ($p == 100);
272 dpavlin 389 }
273    
274 dpavlin 363 =head2 open_import_xml
275    
276     Read file from C<import_xml/> directory and parse it.
277    
278     $webpac->open_import_xml(type => 'isis');
279    
280     =cut
281    
282     sub open_import_xml {
283     my $self = shift;
284    
285 dpavlin 372 my $log = $self->_get_logger();
286    
287 dpavlin 363 my $arg = {@_};
288 dpavlin 372 $log->logconfess("need type to load file from import_xml/") if (! $arg->{'type'});
289 dpavlin 363
290 dpavlin 366 $self->{'type'} = $arg->{'type'};
291 dpavlin 363
292 dpavlin 366 my $type_base = $arg->{'type'};
293 dpavlin 363 $type_base =~ s/_.*$//g;
294    
295 dpavlin 366 $self->{'tag'} = $type2tag{$type_base};
296    
297 dpavlin 375 $log->info("using type '",$self->{'type'},"' tag <",$self->{'tag'},">");
298 dpavlin 366
299     my $f = "./import_xml/".$self->{'type'}.".xml";
300 dpavlin 372 $log->logconfess("import_xml file '$f' doesn't exist!") if (! -e "$f");
301 dpavlin 363
302 dpavlin 375 $log->info("reading '$f'");
303 dpavlin 363
304 dpavlin 375 $self->{'import_xml_file'} = $f;
305    
306 dpavlin 363 $self->{'import_xml'} = XMLin($f,
307 dpavlin 366 ForceArray => [ $self->{'tag'}, 'config', 'format' ],
308 dpavlin 363 );
309    
310 dpavlin 375 $log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) });
311    
312 dpavlin 363 }
313    
314 dpavlin 355 =head2 create_lookup
315    
316     Create lookup from record using lookup definition.
317    
318 dpavlin 367 $self->create_lookup($rec, @lookups);
319    
320     Called internally by C<open_*> methods.
321    
322 dpavlin 355 =cut
323    
324     sub create_lookup {
325     my $self = shift;
326    
327 dpavlin 372 my $log = $self->_get_logger();
328 dpavlin 355
329 dpavlin 372 my $rec = shift || $log->logconfess("need record to create lookup");
330     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
331    
332 dpavlin 355 foreach my $i (@_) {
333     if ($i->{'eval'}) {
334     my $eval = $self->fill_in($rec,$i->{'eval'});
335     my $key = $self->fill_in($rec,$i->{'key'});
336     my @val = $self->fill_in($rec,$i->{'val'});
337     if ($key && @val && eval $eval) {
338 dpavlin 373 $log->debug("stored $key = ",sub { join(" | ",@val) });
339 dpavlin 355 push @{$self->{'lookup'}->{$key}}, @val;
340     }
341     } else {
342     my $key = $self->fill_in($rec,$i->{'key'});
343     my @val = $self->fill_in($rec,$i->{'val'});
344     if ($key && @val) {
345 dpavlin 373 $log->debug("stored $key = ",sub { join(" | ",@val) });
346 dpavlin 355 push @{$self->{'lookup'}->{$key}}, @val;
347     }
348     }
349     }
350     }
351    
352 dpavlin 356 =head2 get_data
353    
354     Returns value from record.
355    
356 dpavlin 367 my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);
357 dpavlin 356
358     Arguments are:
359     record reference C<$rec>,
360     field C<$f>,
361     optional subfiled C<$sf>,
362     index for repeatable values C<$i>.
363    
364 dpavlin 367 Optinal variable C<$found> will be incremeted if there
365 dpavlin 356 is field.
366    
367     Returns value or empty string.
368    
369     =cut
370    
371     sub get_data {
372     my $self = shift;
373    
374     my ($rec,$f,$sf,$i,$found) = @_;
375 dpavlin 367
376 dpavlin 356 if ($$rec->{$f}) {
377 dpavlin 367 return '' if (! $$rec->{$f}->[$i]);
378 dpavlin 389 no strict 'refs';
379 dpavlin 356 if ($sf && $$rec->{$f}->[$i]->{$sf}) {
380     $$found++ if (defined($$found));
381     return $$rec->{$f}->[$i]->{$sf};
382     } elsif ($$rec->{$f}->[$i]) {
383     $$found++ if (defined($$found));
384 dpavlin 366 # it still might have subfield, just
385     # not specified, so we'll dump all
386     if ($$rec->{$f}->[$i] =~ /HASH/o) {
387     my $out;
388     foreach my $k (keys %{$$rec->{$f}->[$i]}) {
389     $out .= $$rec->{$f}->[$i]->{$k}." ";
390     }
391     return $out;
392     } else {
393     return $$rec->{$f}->[$i];
394     }
395 dpavlin 356 }
396     } else {
397     return '';
398     }
399     }
400    
401 dpavlin 352 =head2 fill_in
402    
403     Workhourse of all: takes record from in-memory structure of database and
404     strings with placeholders and returns string or array of with substituted
405     values from record.
406    
407 dpavlin 367 my $text = $webpac->fill_in($rec,'v250^a');
408 dpavlin 352
409     Optional argument is ordinal number for repeatable fields. By default,
410 dpavlin 353 it's assume to be first repeatable field (fields are perl array, so first
411     element is 0).
412     Following example will read second value from repeatable field.
413 dpavlin 352
414 dpavlin 367 my $text = $webpac->fill_in($rec,'Title: v250^a',1);
415 dpavlin 353
416     This function B<does not> perform parsing of format to inteligenty skip
417     delimiters before fields which aren't used.
418    
419 dpavlin 376 This method will automatically decode UTF-8 string to local code page
420     if needed.
421    
422 dpavlin 352 =cut
423    
424 dpavlin 372 sub fill_in {
425 dpavlin 371 my $self = shift;
426    
427 dpavlin 372 my $log = $self->_get_logger();
428 dpavlin 371
429 dpavlin 372 my $rec = shift || $log->logconfess("need data record");
430     my $format = shift || $log->logconfess("need format to parse");
431 dpavlin 352 # iteration (for repeatable fields)
432     my $i = shift || 0;
433    
434     # FIXME remove for speedup?
435 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
436 dpavlin 352
437 dpavlin 376 if (utf8::is_utf8($format)) {
438     $format = $self->_x($format);
439     }
440    
441 dpavlin 352 my $found = 0;
442    
443 dpavlin 359 my $eval_code;
444     # remove eval{...} from beginning
445     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
446    
447 dpavlin 352 # do actual replacement of placeholders
448 dpavlin 373 $format =~ s/v(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
449 dpavlin 352
450 dpavlin 353 if ($found) {
451 dpavlin 373 $log->debug("format: $format");
452 dpavlin 359 if ($eval_code) {
453     my $eval = $self->fill_in($rec,$eval_code,$i);
454 dpavlin 371 return if (! $self->_eval($eval));
455 dpavlin 359 }
456 dpavlin 353 # do we have lookups?
457 dpavlin 373 if ($format =~ /$LOOKUP_REGEX/o) {
458     $log->debug("format '$format' has lookup");
459 dpavlin 353 return $self->lookup($format);
460     } else {
461     return $format;
462     }
463 dpavlin 352 } else {
464     return;
465     }
466     }
467    
468     =head2 lookup
469    
470 dpavlin 355 Perform lookups on format supplied to it.
471 dpavlin 352
472 dpavlin 367 my $text = $self->lookup('[v900]');
473 dpavlin 352
474 dpavlin 355 Lookups can be nested (like C<[d:[a:[v900]]]>).
475    
476 dpavlin 352 =cut
477    
478     sub lookup {
479     my $self = shift;
480    
481 dpavlin 372 my $log = $self->_get_logger();
482 dpavlin 352
483 dpavlin 372 my $tmp = shift || $log->logconfess("need format");
484    
485 dpavlin 373 if ($tmp =~ /$LOOKUP_REGEX/o) {
486 dpavlin 352 my @in = ( $tmp );
487 dpavlin 372
488     $log->debug("lookup for: ",$tmp);
489    
490 dpavlin 352 my @out;
491     while (my $f = shift @in) {
492 dpavlin 373 if ($f =~ /$LOOKUP_REGEX_SAVE/o) {
493 dpavlin 352 my $k = $1;
494     if ($self->{'lookup'}->{$k}) {
495     foreach my $nv (@{$self->{'lookup'}->{$k}}) {
496     my $tmp2 = $f;
497 dpavlin 373 $tmp2 =~ s/lookup{$k}/$nv/g;
498 dpavlin 352 push @in, $tmp2;
499     }
500     } else {
501     undef $f;
502     }
503     } elsif ($f) {
504     push @out, $f;
505     }
506     }
507 dpavlin 373 $log->logconfess("return is array and it's not expected!") unless wantarray;
508 dpavlin 352 return @out;
509     } else {
510     return $tmp;
511     }
512     }
513    
514 dpavlin 356 =head2 parse
515    
516     Perform smart parsing of string, skipping delimiters for fields which aren't
517     defined. It can also eval code in format starting with C<eval{...}> and
518     return output or nothing depending on eval code.
519    
520 dpavlin 367 my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
521 dpavlin 356
522     =cut
523    
524     sub parse {
525     my $self = shift;
526    
527 dpavlin 366 my ($rec, $format_utf8, $i) = @_;
528 dpavlin 356
529 dpavlin 366 return if (! $format_utf8);
530    
531 dpavlin 372 my $log = $self->_get_logger();
532 dpavlin 358
533 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
534     $log->logconfess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
535    
536 dpavlin 358 $i = 0 if (! $i);
537    
538 dpavlin 375 my $format = $self->_x($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
539 dpavlin 366
540 dpavlin 356 my @out;
541    
542 dpavlin 373 $log->debug("format: $format");
543    
544 dpavlin 356 my $eval_code;
545     # remove eval{...} from beginning
546     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
547    
548 dpavlin 358 my $prefix;
549     my $all_found=0;
550 dpavlin 356
551 dpavlin 373 while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))?//s) {
552 dpavlin 356
553 dpavlin 358 my $del = $1 || '';
554 dpavlin 359 $prefix ||= $del if ($all_found == 0);
555 dpavlin 358
556     my $found = 0;
557     my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
558    
559 dpavlin 356 if ($found) {
560 dpavlin 358 push @out, $del;
561     push @out, $tmp;
562     $all_found += $found;
563 dpavlin 356 }
564     }
565    
566 dpavlin 358 return if (! $all_found);
567 dpavlin 356
568 dpavlin 373 my $out = join('',@out);
569 dpavlin 358
570 dpavlin 373 if ($out) {
571     # add rest of format (suffix)
572     $out .= $format;
573 dpavlin 367
574 dpavlin 373 # add prefix if not there
575     $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
576    
577     $log->debug("result: $out");
578     }
579    
580 dpavlin 359 if ($eval_code) {
581     my $eval = $self->fill_in($rec,$eval_code,$i);
582 dpavlin 373 $log->debug("about to eval{",$eval,"} format: $out");
583 dpavlin 371 return if (! $self->_eval($eval));
584 dpavlin 359 }
585    
586 dpavlin 358 return $out;
587 dpavlin 356 }
588    
589 dpavlin 367 =head2 parse_to_arr
590    
591     Similar to C<parse>, but returns array of all repeatable fields
592    
593     my @arr = $webpac->parse_to_arr($rec,'v250^a');
594    
595     =cut
596    
597     sub parse_to_arr {
598     my $self = shift;
599    
600     my ($rec, $format_utf8) = @_;
601    
602 dpavlin 372 my $log = $self->_get_logger();
603    
604     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
605 dpavlin 367 return if (! $format_utf8);
606    
607     my $i = 0;
608     my @arr;
609    
610     while (my $v = $self->parse($rec,$format_utf8,$i++)) {
611     push @arr, $v;
612     }
613    
614 dpavlin 373 $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
615    
616 dpavlin 367 return @arr;
617     }
618    
619 dpavlin 373 =head2 fill_in_to_arr
620    
621     Similar to C<fill_in>, but returns array of all repeatable fields. Usable
622     for fields which have lookups, so they shouldn't be parsed but rather
623     C<fill_id>ed.
624    
625     my @arr = $webpac->fill_in_to_arr($rec,'[v900];;[v250^a]');
626    
627     =cut
628    
629     sub fill_in_to_arr {
630     my $self = shift;
631    
632     my ($rec, $format_utf8) = @_;
633    
634     my $log = $self->_get_logger();
635    
636     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
637     return if (! $format_utf8);
638    
639     my $i = 0;
640     my @arr;
641    
642     while (my @v = $self->fill_in($rec,$format_utf8,$i++)) {
643     push @arr, @v;
644     }
645    
646     $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
647    
648     return @arr;
649     }
650    
651    
652 dpavlin 366 =head2 data_structure
653    
654     Create in-memory data structure which represents layout from C<import_xml>.
655     It is used later to produce output.
656    
657 dpavlin 368 my @ds = $webpac->data_structure($rec);
658 dpavlin 366
659 dpavlin 374 This method will also set C<$webpac->{'currnet_filename'}> if there is
660 dpavlin 398 <filename> tag in C<import_xml> and C<$webpac->{'headline'}> if there is
661     <headline> tag.
662 dpavlin 374
663 dpavlin 366 =cut
664    
665 dpavlin 372 sub data_structure {
666 dpavlin 366 my $self = shift;
667    
668 dpavlin 372 my $log = $self->_get_logger();
669 dpavlin 366
670     my $rec = shift;
671 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
672 dpavlin 366
673 dpavlin 374 undef $self->{'currnet_filename'};
674 dpavlin 398 undef $self->{'headline'};
675 dpavlin 374
676 dpavlin 366 my @sorted_tags;
677     if ($self->{tags_by_order}) {
678     @sorted_tags = @{$self->{tags_by_order}};
679     } else {
680     @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
681     $self->{tags_by_order} = \@sorted_tags;
682     }
683    
684 dpavlin 368 my @ds;
685 dpavlin 366
686 dpavlin 373 $log->debug("tags: ",sub { join(", ",@sorted_tags) });
687    
688 dpavlin 366 foreach my $field (@sorted_tags) {
689    
690     my $row;
691    
692     #print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
693    
694     foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
695 dpavlin 373 my $format = $tag->{'value'} || $tag->{'content'};
696 dpavlin 366
697 dpavlin 373 $log->debug("format: $format");
698    
699     my @v;
700     if ($format =~ /$LOOKUP_REGEX/o) {
701     @v = $self->fill_in_to_arr($rec,$format);
702     } else {
703     @v = $self->parse_to_arr($rec,$format);
704     }
705 dpavlin 367 next if (! @v);
706 dpavlin 366
707 dpavlin 375 # use format?
708     if ($tag->{'format_name'}) {
709     @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v;
710     }
711    
712 dpavlin 398 if ($field eq 'filename') {
713     $self->{'current_filename'} = join('',@v);
714     $log->debug("filename: ",$self->{'current_filename'});
715     } elsif ($field eq 'headline') {
716     $self->{'headline'} .= join('',@v);
717     $log->debug("headline: ",$self->{'headline'});
718     next; # don't return headline in data_structure!
719     }
720    
721 dpavlin 366 # does tag have type?
722     if ($tag->{'type'}) {
723 dpavlin 367 push @{$row->{$tag->{'type'}}}, @v;
724 dpavlin 366 } else {
725 dpavlin 367 push @{$row->{'display'}}, @v;
726     push @{$row->{'swish'}}, @v;
727 dpavlin 366 }
728 dpavlin 373
729 dpavlin 374
730 dpavlin 366 }
731    
732 dpavlin 368 if ($row) {
733     $row->{'tag'} = $field;
734 dpavlin 375
735     # TODO: name_sigular, name_plural
736     my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'};
737     $row->{'name'} = $name ? $self->_x($name) : $field;
738    
739 dpavlin 368 push @ds, $row;
740 dpavlin 374
741 dpavlin 373 $log->debug("row $field: ",sub { Dumper($row) });
742 dpavlin 368 }
743 dpavlin 366
744     }
745    
746 dpavlin 370 return @ds;
747 dpavlin 366
748     }
749    
750 dpavlin 370 =head2 output
751    
752     Create output from in-memory data structure using Template Toolkit template.
753    
754     my $text = $webpac->output( template => 'text.tt', data => @ds );
755    
756     =cut
757    
758     sub output {
759     my $self = shift;
760    
761     my $args = {@_};
762    
763 dpavlin 372 my $log = $self->_get_logger();
764 dpavlin 370
765 dpavlin 372 $log->logconfess("need template name") if (! $args->{'template'});
766     $log->logconfess("need data array") if (! $args->{'data'});
767    
768 dpavlin 370 my $out;
769    
770     $self->{'tt'}->process(
771     $args->{'template'},
772     $args,
773     \$out
774     ) || confess $self->{'tt'}->error();
775    
776     return $out;
777     }
778    
779 dpavlin 411 =head2 output_file
780    
781     Create output from in-memory data structure using Template Toolkit template
782     to a file.
783    
784     $webpac->output_file(
785     file => 'out.txt',
786     template => 'text.tt',
787     data => @ds
788     );
789    
790     =cut
791    
792     sub output_file {
793     my $self = shift;
794    
795     my $args = {@_};
796    
797     my $log = $self->_get_logger();
798    
799     $log->logconfess("need file name") if (! $args->{'file'});
800    
801     $log->debug("creating file ",$args->{'file'});
802    
803     open(my $fh, ">", $args->{'file'}) || $log->logdie("can't open output file '$self->{'file'}': $!");
804     print $fh $self->output(
805     template => $args->{'template'},
806     data => $args->{'data'},
807     ) || $log->logdie("print: $!");
808     close($fh) || $log->logdie("close: $!");
809     }
810    
811 dpavlin 375 =head2 apply_format
812    
813     Apply format specified in tag with C<format_name="name"> and
814     C<format_delimiter=";;">.
815    
816     my $text = $webpac->apply_format($format_name,$format_delimiter,$data);
817    
818     Formats can contain C<lookup{...}> if you need them.
819    
820     =cut
821    
822     sub apply_format {
823     my $self = shift;
824    
825     my ($name,$delimiter,$data) = @_;
826    
827     my $log = $self->_get_logger();
828    
829     if (! $self->{'import_xml'}->{'format'}->{$name}) {
830     $log->warn("<format name=\"$name\"> is not defined in ",$self->{'import_xml_file'});
831     return $data;
832     }
833    
834     $log->warn("no delimiter for format $name") if (! $delimiter);
835    
836     my $format = $self->_x($self->{'import_xml'}->{'format'}->{$name}->{'content'}) || $log->logdie("can't find format '$name'");
837    
838     my @data = split(/\Q$delimiter\E/, $data);
839    
840     my $out = sprintf($format, @data);
841     $log->debug("using format $name [$format] on $data to produce: $out");
842    
843     if ($out =~ m/$LOOKUP_REGEX/o) {
844     return $self->lookup($out);
845     } else {
846     return $out;
847     }
848    
849     }
850    
851    
852 dpavlin 372 #
853     #
854     #
855    
856     =head1 INTERNAL METHODS
857    
858     Here is a quick list of internal methods, mostly useful to turn debugging
859     on them (see L<LOGGING> below for explanation).
860    
861     =cut
862    
863     =head2 _eval
864    
865     Internal function to eval code without C<strict 'subs'>.
866    
867     =cut
868    
869     sub _eval {
870     my $self = shift;
871    
872     my $code = shift || return;
873    
874     my $log = $self->_get_logger();
875    
876     no strict 'subs';
877     my $ret = eval $code;
878     if ($@) {
879     $log->error("problem with eval code [$code]: $@");
880     }
881    
882     $log->debug("eval: ",$code," [",$ret,"]");
883    
884     return $ret || 0;
885     }
886    
887     =head2 _sort_by_order
888    
889     Sort xml tags data structure accoding to C<order=""> attribute.
890    
891     =cut
892    
893     sub _sort_by_order {
894     my $self = shift;
895    
896     my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
897     $self->{'import_xml'}->{'indexer'}->{$a};
898     my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
899     $self->{'import_xml'}->{'indexer'}->{$b};
900    
901     return $va <=> $vb;
902     }
903    
904 dpavlin 375 =head2 _get_logger
905    
906     Get C<Log::Log4perl> object with a twist: domains are defined for each
907     method
908    
909     my $log = $webpac->_get_logger();
910    
911     =cut
912    
913 dpavlin 372 sub _get_logger {
914     my $self = shift;
915    
916 dpavlin 374 my $name = (caller(1))[3] || caller;
917     return get_logger($name);
918 dpavlin 372 }
919    
920 dpavlin 375 =head2 _x
921    
922     Convert string from UTF-8 to code page defined in C<import_xml>.
923    
924     my $text = $webpac->_x('utf8 text');
925    
926     =cut
927    
928     sub _x {
929     my $self = shift;
930     my $utf8 = shift || return;
931    
932     return $self->{'utf2cp'}->convert($utf8) ||
933     $self->_get_logger()->logwarn("can't convert '$utf8'");
934     }
935    
936 dpavlin 372 #
937     #
938     #
939    
940     =head1 LOGGING
941    
942     Logging in WebPAC is performed by L<Log::Log4perl> with config file
943     C<log.conf>.
944    
945     Methods defined above have different levels of logging, so
946     it's descriptions will be useful to turn (mostry B<debug> logging) on
947     or off to see why WabPAC isn't perforing as you expect it (it might even
948     be a bug!).
949    
950     B<This is different from normal Log4perl behaviour>. To repeat, you can
951     also use method names, and not only classes (which are just few)
952     to filter logging.
953    
954     =cut
955    
956 dpavlin 352 1;

  ViewVC Help
Powered by ViewVC 1.1.26