/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 411 - (hide annotations)
Sun Sep 5 22:22:37 2004 UTC (19 years, 6 months ago) by dpavlin
File size: 20503 byte(s)
implemented filtered sorted indexes

1 dpavlin 354 package WebPAC;
2 dpavlin 352
3 dpavlin 367 use warnings;
4     use strict;
5    
6 dpavlin 352 use Carp;
7 dpavlin 353 use Text::Iconv;
8     use Config::IniFiles;
9 dpavlin 363 use XML::Simple;
10 dpavlin 370 use Template;
11 dpavlin 372 use Log::Log4perl qw(get_logger :levels);
12 dpavlin 352
13 dpavlin 358 use Data::Dumper;
14    
15 dpavlin 373 #my $LOOKUP_REGEX = '\[[^\[\]]+\]';
16     #my $LOOKUP_REGEX_SAVE = '\[([^\[\]]+)\]';
17     my $LOOKUP_REGEX = 'lookup{[^\{\}]+}';
18     my $LOOKUP_REGEX_SAVE = 'lookup{([^\{\}]+)}';
19    
20 dpavlin 352 =head1 NAME
21    
22 dpavlin 354 WebPAC - base class for WebPAC
23 dpavlin 352
24     =head1 DESCRIPTION
25    
26 dpavlin 354 This module implements methods used by WebPAC.
27 dpavlin 352
28     =head1 METHODS
29    
30     =head2 new
31    
32 dpavlin 354 This will create new instance of WebPAC using configuration specified by C<config_file>.
33 dpavlin 352
34 dpavlin 354 my $webpac = new WebPAC(
35 dpavlin 352 config_file => 'name.conf',
36     [code_page => 'ISO-8859-2',]
37     );
38    
39     Default C<code_page> is C<ISO-8859-2>.
40    
41 dpavlin 353 It will also read configuration files
42     C<global.conf> (used by indexer and Web font-end)
43     and configuration file specified by C<config_file>
44     which describes databases to be indexed.
45    
46 dpavlin 352 =cut
47    
48 dpavlin 363 # mapping between data type and tag which specify
49     # format in XML file
50     my %type2tag = (
51     'isis' => 'isis',
52     # 'excel' => 'column',
53     # 'marc' => 'marc',
54     # 'feed' => 'feed'
55     );
56    
57 dpavlin 352 sub new {
58     my $class = shift;
59     my $self = {@_};
60     bless($self, $class);
61    
62 dpavlin 372 my $log_file = $self->{'log'} || "log.conf";
63     Log::Log4perl->init($log_file);
64    
65     my $log = $self->_get_logger();
66    
67 dpavlin 352 # fill in default values
68     # output codepage
69     $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
70    
71 dpavlin 353 #
72     # read global.conf
73     #
74 dpavlin 372 $log->debug("read 'global.conf'");
75 dpavlin 352
76 dpavlin 372 my $config = new Config::IniFiles( -file => 'global.conf' ) || $log->logcroak("can't open 'global.conf'");
77 dpavlin 352
78     # read global config parametars
79     foreach my $var (qw(
80     dbi_dbd
81     dbi_dsn
82     dbi_user
83     dbi_passwd
84     show_progress
85     my_unac_filter
86 dpavlin 370 output_template
87 dpavlin 352 )) {
88 dpavlin 370 $self->{'global_config'}->{$var} = $config->val('global', $var);
89 dpavlin 352 }
90    
91 dpavlin 353 #
92     # read indexer config file
93     #
94 dpavlin 352
95 dpavlin 372 $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || $log->logcroak("can't open '",$self->{config_file},"'");
96 dpavlin 352
97 dpavlin 370 # create UTF-8 convertor for import_xml files
98 dpavlin 366 $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
99 dpavlin 370
100     # create Template toolkit instance
101     $self->{'tt'} = Template->new(
102     INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} || './output_template'),
103     # FILTERS => {
104     # 'foo' => \&foo_filter,
105     # },
106     EVAL_PERL => 1,
107     );
108    
109 dpavlin 352 return $self;
110     }
111    
112     =head2 open_isis
113    
114     Open CDS/ISIS database using OpenIsis module and read all records to memory.
115    
116     $webpac->open_isis(
117     filename => '/data/ISIS/ISIS',
118     code_page => '852',
119     limit_mfn => '500',
120     lookup => [ ... ],
121     );
122    
123     By default, ISIS code page is assumed to be C<852>.
124    
125 dpavlin 353 If optional parametar C<limit_mfn> is set, it will read just 500 records
126     from database in example above.
127 dpavlin 352
128     C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
129     C<val>. Optional parametar C<eval> is perl code to evaluate before storing
130     value in index.
131    
132     lookup => [
133     { 'key' => 'd:v900', 'val' => 'v250^a' },
134     { 'eval' => '"v901^a" eq "Podruèje"',
135     'key' => 'pa:v561^4:v562^4:v461^1',
136     'val' => 'v900' },
137     ]
138    
139 dpavlin 367 Returns number of last record read into memory (size of database, really).
140    
141 dpavlin 352 =cut
142    
143     sub open_isis {
144     my $self = shift;
145     my $arg = {@_};
146    
147 dpavlin 372 my $log = $self->_get_logger();
148    
149     $log->logcroak("need filename") if (! $arg->{'filename'});
150 dpavlin 352 my $code_page = $arg->{'code_page'} || '852';
151    
152 dpavlin 353 use OpenIsis;
153    
154 dpavlin 352 #$self->{'isis_code_page'} = $code_page;
155    
156     # create Text::Iconv object
157     my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
158    
159 dpavlin 372 $log->info("reading ISIS database '",$arg->{'filename'},"'");
160 dpavlin 363
161 dpavlin 352 my $isis_db = OpenIsis::open($arg->{'filename'});
162    
163     my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
164    
165 dpavlin 357 $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
166    
167 dpavlin 372 $log->info("processing $maxmfn records...");
168 dpavlin 363
169 dpavlin 352 # read database
170     for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
171    
172     # read record
173     my $row = OpenIsis::read( $isis_db, $mfn );
174     foreach my $k (keys %{$row}) {
175     if ($k ne "mfn") {
176     foreach my $l (@{$row->{$k}}) {
177     $l = $cp->convert($l);
178     # has subfields?
179     my $val;
180     if ($l =~ m/\^/) {
181     foreach my $t (split(/\^/,$l)) {
182     next if (! $t);
183     $val->{substr($t,0,1)} = substr($t,1);
184     }
185     } else {
186     $val = $l;
187     }
188    
189     push @{$self->{'data'}->{$mfn}->{$k}}, $val;
190     }
191 dpavlin 374 } else {
192     push @{$self->{'data'}->{$mfn}->{'000'}}, $mfn;
193 dpavlin 352 }
194    
195     }
196    
197     # create lookup
198 dpavlin 355 my $rec = $self->{'data'}->{$mfn};
199     $self->create_lookup($rec, @{$arg->{'lookup'}});
200 dpavlin 352
201 dpavlin 389 $self->progress_bar($mfn,$maxmfn);
202    
203 dpavlin 352 }
204    
205 dpavlin 362 $self->{'current_mfn'} = 1;
206 dpavlin 389 $self->{'last_pcnt'} = 0;
207 dpavlin 362
208 dpavlin 352 # store max mfn and return it.
209     return $self->{'max_mfn'} = $maxmfn;
210     }
211    
212 dpavlin 362 =head2 fetch_rec
213    
214     Fetch next record from database. It will also display progress bar (once
215     it's implemented, that is).
216    
217     my $rec = $webpac->fetch_rec;
218    
219     =cut
220    
221     sub fetch_rec {
222     my $self = shift;
223    
224 dpavlin 372 my $log = $self->_get_logger();
225 dpavlin 362
226 dpavlin 372 my $mfn = $self->{'current_mfn'}++ || $log->logconfess("it seems that you didn't load database!");
227    
228 dpavlin 362 if ($mfn > $self->{'max_mfn'}) {
229     $self->{'current_mfn'} = $self->{'max_mfn'};
230 dpavlin 373 $log->debug("at EOF");
231 dpavlin 362 return;
232     }
233    
234 dpavlin 389 $self->progress_bar($mfn,$self->{'max_mfn'});
235    
236 dpavlin 362 return $self->{'data'}->{$mfn};
237     }
238    
239 dpavlin 389 =head2 progress_bar
240    
241     Draw progress bar on STDERR.
242    
243     $webpac->progress_bar($current, $max);
244    
245     =cut
246    
247     sub progress_bar {
248     my $self = shift;
249    
250     my ($curr,$max) = @_;
251    
252     my $log = $self->_get_logger();
253    
254     $log->logconfess("no current value!") if (! $curr);
255     $log->logconfess("no maximum value!") if (! $max);
256    
257     if ($curr > $max) {
258     $max = $curr;
259     $log->debug("overflow to $curr");
260     }
261    
262     $self->{'last_pcnt'} ||= 1;
263    
264     $self->{'last_pcnt'} = $curr if ($curr < $self->{'last_pcnt'});
265    
266     my $p = int($curr * 100 / $max);
267     if ($p != $self->{'last_pcnt'}) {
268     printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$curr,$max,"=" x ($p/2).">", $p );
269     $self->{'last_pcnt'} = $p;
270     }
271     }
272    
273 dpavlin 363 =head2 open_import_xml
274    
275     Read file from C<import_xml/> directory and parse it.
276    
277     $webpac->open_import_xml(type => 'isis');
278    
279     =cut
280    
281     sub open_import_xml {
282     my $self = shift;
283    
284 dpavlin 372 my $log = $self->_get_logger();
285    
286 dpavlin 363 my $arg = {@_};
287 dpavlin 372 $log->logconfess("need type to load file from import_xml/") if (! $arg->{'type'});
288 dpavlin 363
289 dpavlin 366 $self->{'type'} = $arg->{'type'};
290 dpavlin 363
291 dpavlin 366 my $type_base = $arg->{'type'};
292 dpavlin 363 $type_base =~ s/_.*$//g;
293    
294 dpavlin 366 $self->{'tag'} = $type2tag{$type_base};
295    
296 dpavlin 375 $log->info("using type '",$self->{'type'},"' tag <",$self->{'tag'},">");
297 dpavlin 366
298     my $f = "./import_xml/".$self->{'type'}.".xml";
299 dpavlin 372 $log->logconfess("import_xml file '$f' doesn't exist!") if (! -e "$f");
300 dpavlin 363
301 dpavlin 375 $log->info("reading '$f'");
302 dpavlin 363
303 dpavlin 375 $self->{'import_xml_file'} = $f;
304    
305 dpavlin 363 $self->{'import_xml'} = XMLin($f,
306 dpavlin 366 ForceArray => [ $self->{'tag'}, 'config', 'format' ],
307 dpavlin 363 );
308    
309 dpavlin 375 $log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) });
310    
311 dpavlin 363 }
312    
313 dpavlin 355 =head2 create_lookup
314    
315     Create lookup from record using lookup definition.
316    
317 dpavlin 367 $self->create_lookup($rec, @lookups);
318    
319     Called internally by C<open_*> methods.
320    
321 dpavlin 355 =cut
322    
323     sub create_lookup {
324     my $self = shift;
325    
326 dpavlin 372 my $log = $self->_get_logger();
327 dpavlin 355
328 dpavlin 372 my $rec = shift || $log->logconfess("need record to create lookup");
329     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
330    
331 dpavlin 355 foreach my $i (@_) {
332     if ($i->{'eval'}) {
333     my $eval = $self->fill_in($rec,$i->{'eval'});
334     my $key = $self->fill_in($rec,$i->{'key'});
335     my @val = $self->fill_in($rec,$i->{'val'});
336     if ($key && @val && eval $eval) {
337 dpavlin 373 $log->debug("stored $key = ",sub { join(" | ",@val) });
338 dpavlin 355 push @{$self->{'lookup'}->{$key}}, @val;
339     }
340     } else {
341     my $key = $self->fill_in($rec,$i->{'key'});
342     my @val = $self->fill_in($rec,$i->{'val'});
343     if ($key && @val) {
344 dpavlin 373 $log->debug("stored $key = ",sub { join(" | ",@val) });
345 dpavlin 355 push @{$self->{'lookup'}->{$key}}, @val;
346     }
347     }
348     }
349     }
350    
351 dpavlin 356 =head2 get_data
352    
353     Returns value from record.
354    
355 dpavlin 367 my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);
356 dpavlin 356
357     Arguments are:
358     record reference C<$rec>,
359     field C<$f>,
360     optional subfiled C<$sf>,
361     index for repeatable values C<$i>.
362    
363 dpavlin 367 Optinal variable C<$found> will be incremeted if there
364 dpavlin 356 is field.
365    
366     Returns value or empty string.
367    
368     =cut
369    
370     sub get_data {
371     my $self = shift;
372    
373     my ($rec,$f,$sf,$i,$found) = @_;
374 dpavlin 367
375 dpavlin 356 if ($$rec->{$f}) {
376 dpavlin 367 return '' if (! $$rec->{$f}->[$i]);
377 dpavlin 389 no strict 'refs';
378 dpavlin 356 if ($sf && $$rec->{$f}->[$i]->{$sf}) {
379     $$found++ if (defined($$found));
380     return $$rec->{$f}->[$i]->{$sf};
381     } elsif ($$rec->{$f}->[$i]) {
382     $$found++ if (defined($$found));
383 dpavlin 366 # it still might have subfield, just
384     # not specified, so we'll dump all
385     if ($$rec->{$f}->[$i] =~ /HASH/o) {
386     my $out;
387     foreach my $k (keys %{$$rec->{$f}->[$i]}) {
388     $out .= $$rec->{$f}->[$i]->{$k}." ";
389     }
390     return $out;
391     } else {
392     return $$rec->{$f}->[$i];
393     }
394 dpavlin 356 }
395     } else {
396     return '';
397     }
398     }
399    
400 dpavlin 352 =head2 fill_in
401    
402     Workhourse of all: takes record from in-memory structure of database and
403     strings with placeholders and returns string or array of with substituted
404     values from record.
405    
406 dpavlin 367 my $text = $webpac->fill_in($rec,'v250^a');
407 dpavlin 352
408     Optional argument is ordinal number for repeatable fields. By default,
409 dpavlin 353 it's assume to be first repeatable field (fields are perl array, so first
410     element is 0).
411     Following example will read second value from repeatable field.
412 dpavlin 352
413 dpavlin 367 my $text = $webpac->fill_in($rec,'Title: v250^a',1);
414 dpavlin 353
415     This function B<does not> perform parsing of format to inteligenty skip
416     delimiters before fields which aren't used.
417    
418 dpavlin 376 This method will automatically decode UTF-8 string to local code page
419     if needed.
420    
421 dpavlin 352 =cut
422    
423 dpavlin 372 sub fill_in {
424 dpavlin 371 my $self = shift;
425    
426 dpavlin 372 my $log = $self->_get_logger();
427 dpavlin 371
428 dpavlin 372 my $rec = shift || $log->logconfess("need data record");
429     my $format = shift || $log->logconfess("need format to parse");
430 dpavlin 352 # iteration (for repeatable fields)
431     my $i = shift || 0;
432    
433     # FIXME remove for speedup?
434 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
435 dpavlin 352
436 dpavlin 376 if (utf8::is_utf8($format)) {
437     $format = $self->_x($format);
438     }
439    
440 dpavlin 352 my $found = 0;
441    
442 dpavlin 359 my $eval_code;
443     # remove eval{...} from beginning
444     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
445    
446 dpavlin 352 # do actual replacement of placeholders
447 dpavlin 373 $format =~ s/v(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
448 dpavlin 352
449 dpavlin 353 if ($found) {
450 dpavlin 373 $log->debug("format: $format");
451 dpavlin 359 if ($eval_code) {
452     my $eval = $self->fill_in($rec,$eval_code,$i);
453 dpavlin 371 return if (! $self->_eval($eval));
454 dpavlin 359 }
455 dpavlin 353 # do we have lookups?
456 dpavlin 373 if ($format =~ /$LOOKUP_REGEX/o) {
457     $log->debug("format '$format' has lookup");
458 dpavlin 353 return $self->lookup($format);
459     } else {
460     return $format;
461     }
462 dpavlin 352 } else {
463     return;
464     }
465     }
466    
467     =head2 lookup
468    
469 dpavlin 355 Perform lookups on format supplied to it.
470 dpavlin 352
471 dpavlin 367 my $text = $self->lookup('[v900]');
472 dpavlin 352
473 dpavlin 355 Lookups can be nested (like C<[d:[a:[v900]]]>).
474    
475 dpavlin 352 =cut
476    
477     sub lookup {
478     my $self = shift;
479    
480 dpavlin 372 my $log = $self->_get_logger();
481 dpavlin 352
482 dpavlin 372 my $tmp = shift || $log->logconfess("need format");
483    
484 dpavlin 373 if ($tmp =~ /$LOOKUP_REGEX/o) {
485 dpavlin 352 my @in = ( $tmp );
486 dpavlin 372
487     $log->debug("lookup for: ",$tmp);
488    
489 dpavlin 352 my @out;
490     while (my $f = shift @in) {
491 dpavlin 373 if ($f =~ /$LOOKUP_REGEX_SAVE/o) {
492 dpavlin 352 my $k = $1;
493     if ($self->{'lookup'}->{$k}) {
494     foreach my $nv (@{$self->{'lookup'}->{$k}}) {
495     my $tmp2 = $f;
496 dpavlin 373 $tmp2 =~ s/lookup{$k}/$nv/g;
497 dpavlin 352 push @in, $tmp2;
498     }
499     } else {
500     undef $f;
501     }
502     } elsif ($f) {
503     push @out, $f;
504     }
505     }
506 dpavlin 373 $log->logconfess("return is array and it's not expected!") unless wantarray;
507 dpavlin 352 return @out;
508     } else {
509     return $tmp;
510     }
511     }
512    
513 dpavlin 356 =head2 parse
514    
515     Perform smart parsing of string, skipping delimiters for fields which aren't
516     defined. It can also eval code in format starting with C<eval{...}> and
517     return output or nothing depending on eval code.
518    
519 dpavlin 367 my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
520 dpavlin 356
521     =cut
522    
523     sub parse {
524     my $self = shift;
525    
526 dpavlin 366 my ($rec, $format_utf8, $i) = @_;
527 dpavlin 356
528 dpavlin 366 return if (! $format_utf8);
529    
530 dpavlin 372 my $log = $self->_get_logger();
531 dpavlin 358
532 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
533     $log->logconfess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
534    
535 dpavlin 358 $i = 0 if (! $i);
536    
537 dpavlin 375 my $format = $self->_x($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
538 dpavlin 366
539 dpavlin 356 my @out;
540    
541 dpavlin 373 $log->debug("format: $format");
542    
543 dpavlin 356 my $eval_code;
544     # remove eval{...} from beginning
545     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
546    
547 dpavlin 358 my $prefix;
548     my $all_found=0;
549 dpavlin 356
550 dpavlin 373 while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))?//s) {
551 dpavlin 356
552 dpavlin 358 my $del = $1 || '';
553 dpavlin 359 $prefix ||= $del if ($all_found == 0);
554 dpavlin 358
555     my $found = 0;
556     my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
557    
558 dpavlin 356 if ($found) {
559 dpavlin 358 push @out, $del;
560     push @out, $tmp;
561     $all_found += $found;
562 dpavlin 356 }
563     }
564    
565 dpavlin 358 return if (! $all_found);
566 dpavlin 356
567 dpavlin 373 my $out = join('',@out);
568 dpavlin 358
569 dpavlin 373 if ($out) {
570     # add rest of format (suffix)
571     $out .= $format;
572 dpavlin 367
573 dpavlin 373 # add prefix if not there
574     $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
575    
576     $log->debug("result: $out");
577     }
578    
579 dpavlin 359 if ($eval_code) {
580     my $eval = $self->fill_in($rec,$eval_code,$i);
581 dpavlin 373 $log->debug("about to eval{",$eval,"} format: $out");
582 dpavlin 371 return if (! $self->_eval($eval));
583 dpavlin 359 }
584    
585 dpavlin 358 return $out;
586 dpavlin 356 }
587    
588 dpavlin 367 =head2 parse_to_arr
589    
590     Similar to C<parse>, but returns array of all repeatable fields
591    
592     my @arr = $webpac->parse_to_arr($rec,'v250^a');
593    
594     =cut
595    
596     sub parse_to_arr {
597     my $self = shift;
598    
599     my ($rec, $format_utf8) = @_;
600    
601 dpavlin 372 my $log = $self->_get_logger();
602    
603     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
604 dpavlin 367 return if (! $format_utf8);
605    
606     my $i = 0;
607     my @arr;
608    
609     while (my $v = $self->parse($rec,$format_utf8,$i++)) {
610     push @arr, $v;
611     }
612    
613 dpavlin 373 $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
614    
615 dpavlin 367 return @arr;
616     }
617    
618 dpavlin 373 =head2 fill_in_to_arr
619    
620     Similar to C<fill_in>, but returns array of all repeatable fields. Usable
621     for fields which have lookups, so they shouldn't be parsed but rather
622     C<fill_id>ed.
623    
624     my @arr = $webpac->fill_in_to_arr($rec,'[v900];;[v250^a]');
625    
626     =cut
627    
628     sub fill_in_to_arr {
629     my $self = shift;
630    
631     my ($rec, $format_utf8) = @_;
632    
633     my $log = $self->_get_logger();
634    
635     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
636     return if (! $format_utf8);
637    
638     my $i = 0;
639     my @arr;
640    
641     while (my @v = $self->fill_in($rec,$format_utf8,$i++)) {
642     push @arr, @v;
643     }
644    
645     $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
646    
647     return @arr;
648     }
649    
650    
651 dpavlin 366 =head2 data_structure
652    
653     Create in-memory data structure which represents layout from C<import_xml>.
654     It is used later to produce output.
655    
656 dpavlin 368 my @ds = $webpac->data_structure($rec);
657 dpavlin 366
658 dpavlin 374 This method will also set C<$webpac->{'currnet_filename'}> if there is
659 dpavlin 398 <filename> tag in C<import_xml> and C<$webpac->{'headline'}> if there is
660     <headline> tag.
661 dpavlin 374
662 dpavlin 366 =cut
663    
664 dpavlin 372 sub data_structure {
665 dpavlin 366 my $self = shift;
666    
667 dpavlin 372 my $log = $self->_get_logger();
668 dpavlin 366
669     my $rec = shift;
670 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
671 dpavlin 366
672 dpavlin 374 undef $self->{'currnet_filename'};
673 dpavlin 398 undef $self->{'headline'};
674 dpavlin 374
675 dpavlin 366 my @sorted_tags;
676     if ($self->{tags_by_order}) {
677     @sorted_tags = @{$self->{tags_by_order}};
678     } else {
679     @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
680     $self->{tags_by_order} = \@sorted_tags;
681     }
682    
683 dpavlin 368 my @ds;
684 dpavlin 366
685 dpavlin 373 $log->debug("tags: ",sub { join(", ",@sorted_tags) });
686    
687 dpavlin 366 foreach my $field (@sorted_tags) {
688    
689     my $row;
690    
691     #print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
692    
693     foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
694 dpavlin 373 my $format = $tag->{'value'} || $tag->{'content'};
695 dpavlin 366
696 dpavlin 373 $log->debug("format: $format");
697    
698     my @v;
699     if ($format =~ /$LOOKUP_REGEX/o) {
700     @v = $self->fill_in_to_arr($rec,$format);
701     } else {
702     @v = $self->parse_to_arr($rec,$format);
703     }
704 dpavlin 367 next if (! @v);
705 dpavlin 366
706 dpavlin 375 # use format?
707     if ($tag->{'format_name'}) {
708     @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v;
709     }
710    
711 dpavlin 398 if ($field eq 'filename') {
712     $self->{'current_filename'} = join('',@v);
713     $log->debug("filename: ",$self->{'current_filename'});
714     } elsif ($field eq 'headline') {
715     $self->{'headline'} .= join('',@v);
716     $log->debug("headline: ",$self->{'headline'});
717     next; # don't return headline in data_structure!
718     }
719    
720 dpavlin 366 # does tag have type?
721     if ($tag->{'type'}) {
722 dpavlin 367 push @{$row->{$tag->{'type'}}}, @v;
723 dpavlin 366 } else {
724 dpavlin 367 push @{$row->{'display'}}, @v;
725     push @{$row->{'swish'}}, @v;
726 dpavlin 366 }
727 dpavlin 373
728 dpavlin 374
729 dpavlin 366 }
730    
731 dpavlin 368 if ($row) {
732     $row->{'tag'} = $field;
733 dpavlin 375
734     # TODO: name_sigular, name_plural
735     my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'};
736     $row->{'name'} = $name ? $self->_x($name) : $field;
737    
738 dpavlin 368 push @ds, $row;
739 dpavlin 374
740 dpavlin 373 $log->debug("row $field: ",sub { Dumper($row) });
741 dpavlin 368 }
742 dpavlin 366
743     }
744    
745 dpavlin 370 return @ds;
746 dpavlin 366
747     }
748    
749 dpavlin 370 =head2 output
750    
751     Create output from in-memory data structure using Template Toolkit template.
752    
753     my $text = $webpac->output( template => 'text.tt', data => @ds );
754    
755     =cut
756    
757     sub output {
758     my $self = shift;
759    
760     my $args = {@_};
761    
762 dpavlin 372 my $log = $self->_get_logger();
763 dpavlin 370
764 dpavlin 372 $log->logconfess("need template name") if (! $args->{'template'});
765     $log->logconfess("need data array") if (! $args->{'data'});
766    
767 dpavlin 370 my $out;
768    
769     $self->{'tt'}->process(
770     $args->{'template'},
771     $args,
772     \$out
773     ) || confess $self->{'tt'}->error();
774    
775     return $out;
776     }
777    
778 dpavlin 411 =head2 output_file
779    
780     Create output from in-memory data structure using Template Toolkit template
781     to a file.
782    
783     $webpac->output_file(
784     file => 'out.txt',
785     template => 'text.tt',
786     data => @ds
787     );
788    
789     =cut
790    
791     sub output_file {
792     my $self = shift;
793    
794     my $args = {@_};
795    
796     my $log = $self->_get_logger();
797    
798     $log->logconfess("need file name") if (! $args->{'file'});
799    
800     $log->debug("creating file ",$args->{'file'});
801    
802     open(my $fh, ">", $args->{'file'}) || $log->logdie("can't open output file '$self->{'file'}': $!");
803     print $fh $self->output(
804     template => $args->{'template'},
805     data => $args->{'data'},
806     ) || $log->logdie("print: $!");
807     close($fh) || $log->logdie("close: $!");
808     }
809    
810 dpavlin 375 =head2 apply_format
811    
812     Apply format specified in tag with C<format_name="name"> and
813     C<format_delimiter=";;">.
814    
815     my $text = $webpac->apply_format($format_name,$format_delimiter,$data);
816    
817     Formats can contain C<lookup{...}> if you need them.
818    
819     =cut
820    
821     sub apply_format {
822     my $self = shift;
823    
824     my ($name,$delimiter,$data) = @_;
825    
826     my $log = $self->_get_logger();
827    
828     if (! $self->{'import_xml'}->{'format'}->{$name}) {
829     $log->warn("<format name=\"$name\"> is not defined in ",$self->{'import_xml_file'});
830     return $data;
831     }
832    
833     $log->warn("no delimiter for format $name") if (! $delimiter);
834    
835     my $format = $self->_x($self->{'import_xml'}->{'format'}->{$name}->{'content'}) || $log->logdie("can't find format '$name'");
836    
837     my @data = split(/\Q$delimiter\E/, $data);
838    
839     my $out = sprintf($format, @data);
840     $log->debug("using format $name [$format] on $data to produce: $out");
841    
842     if ($out =~ m/$LOOKUP_REGEX/o) {
843     return $self->lookup($out);
844     } else {
845     return $out;
846     }
847    
848     }
849    
850    
851 dpavlin 372 #
852     #
853     #
854    
855     =head1 INTERNAL METHODS
856    
857     Here is a quick list of internal methods, mostly useful to turn debugging
858     on them (see L<LOGGING> below for explanation).
859    
860     =cut
861    
862     =head2 _eval
863    
864     Internal function to eval code without C<strict 'subs'>.
865    
866     =cut
867    
868     sub _eval {
869     my $self = shift;
870    
871     my $code = shift || return;
872    
873     my $log = $self->_get_logger();
874    
875     no strict 'subs';
876     my $ret = eval $code;
877     if ($@) {
878     $log->error("problem with eval code [$code]: $@");
879     }
880    
881     $log->debug("eval: ",$code," [",$ret,"]");
882    
883     return $ret || 0;
884     }
885    
886     =head2 _sort_by_order
887    
888     Sort xml tags data structure accoding to C<order=""> attribute.
889    
890     =cut
891    
892     sub _sort_by_order {
893     my $self = shift;
894    
895     my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
896     $self->{'import_xml'}->{'indexer'}->{$a};
897     my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
898     $self->{'import_xml'}->{'indexer'}->{$b};
899    
900     return $va <=> $vb;
901     }
902    
903 dpavlin 375 =head2 _get_logger
904    
905     Get C<Log::Log4perl> object with a twist: domains are defined for each
906     method
907    
908     my $log = $webpac->_get_logger();
909    
910     =cut
911    
912 dpavlin 372 sub _get_logger {
913     my $self = shift;
914    
915 dpavlin 374 my $name = (caller(1))[3] || caller;
916     return get_logger($name);
917 dpavlin 372 }
918    
919 dpavlin 375 =head2 _x
920    
921     Convert string from UTF-8 to code page defined in C<import_xml>.
922    
923     my $text = $webpac->_x('utf8 text');
924    
925     =cut
926    
927     sub _x {
928     my $self = shift;
929     my $utf8 = shift || return;
930    
931     return $self->{'utf2cp'}->convert($utf8) ||
932     $self->_get_logger()->logwarn("can't convert '$utf8'");
933     }
934    
935 dpavlin 372 #
936     #
937     #
938    
939     =head1 LOGGING
940    
941     Logging in WebPAC is performed by L<Log::Log4perl> with config file
942     C<log.conf>.
943    
944     Methods defined above have different levels of logging, so
945     it's descriptions will be useful to turn (mostry B<debug> logging) on
946     or off to see why WabPAC isn't perforing as you expect it (it might even
947     be a bug!).
948    
949     B<This is different from normal Log4perl behaviour>. To repeat, you can
950     also use method names, and not only classes (which are just few)
951     to filter logging.
952    
953     =cut
954    
955 dpavlin 352 1;

  ViewVC Help
Powered by ViewVC 1.1.26