/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 418 - (hide annotations)
Thu Sep 9 18:08:38 2004 UTC (16 years, 9 months ago) by dpavlin
File size: 20835 byte(s)
more debuging, refactore create_lookup

1 dpavlin 354 package WebPAC;
2 dpavlin 352
3 dpavlin 367 use warnings;
4     use strict;
5    
6 dpavlin 352 use Carp;
7 dpavlin 353 use Text::Iconv;
8     use Config::IniFiles;
9 dpavlin 363 use XML::Simple;
10 dpavlin 370 use Template;
11 dpavlin 372 use Log::Log4perl qw(get_logger :levels);
12 dpavlin 352
13 dpavlin 358 use Data::Dumper;
14    
15 dpavlin 373 #my $LOOKUP_REGEX = '\[[^\[\]]+\]';
16     #my $LOOKUP_REGEX_SAVE = '\[([^\[\]]+)\]';
17     my $LOOKUP_REGEX = 'lookup{[^\{\}]+}';
18     my $LOOKUP_REGEX_SAVE = 'lookup{([^\{\}]+)}';
19    
20 dpavlin 352 =head1 NAME
21    
22 dpavlin 354 WebPAC - base class for WebPAC
23 dpavlin 352
24     =head1 DESCRIPTION
25    
26 dpavlin 354 This module implements methods used by WebPAC.
27 dpavlin 352
28     =head1 METHODS
29    
30     =head2 new
31    
32 dpavlin 354 This will create new instance of WebPAC using configuration specified by C<config_file>.
33 dpavlin 352
34 dpavlin 354 my $webpac = new WebPAC(
35 dpavlin 352 config_file => 'name.conf',
36     [code_page => 'ISO-8859-2',]
37     );
38    
39     Default C<code_page> is C<ISO-8859-2>.
40    
41 dpavlin 353 It will also read configuration files
42     C<global.conf> (used by indexer and Web font-end)
43     and configuration file specified by C<config_file>
44     which describes databases to be indexed.
45    
46 dpavlin 352 =cut
47    
48 dpavlin 363 # mapping between data type and tag which specify
49     # format in XML file
50     my %type2tag = (
51     'isis' => 'isis',
52     # 'excel' => 'column',
53     # 'marc' => 'marc',
54     # 'feed' => 'feed'
55     );
56    
57 dpavlin 352 sub new {
58     my $class = shift;
59     my $self = {@_};
60     bless($self, $class);
61    
62 dpavlin 372 my $log_file = $self->{'log'} || "log.conf";
63     Log::Log4perl->init($log_file);
64    
65     my $log = $self->_get_logger();
66    
67 dpavlin 352 # fill in default values
68     # output codepage
69     $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
70    
71 dpavlin 353 #
72     # read global.conf
73     #
74 dpavlin 372 $log->debug("read 'global.conf'");
75 dpavlin 352
76 dpavlin 372 my $config = new Config::IniFiles( -file => 'global.conf' ) || $log->logcroak("can't open 'global.conf'");
77 dpavlin 352
78     # read global config parametars
79     foreach my $var (qw(
80     dbi_dbd
81     dbi_dsn
82     dbi_user
83     dbi_passwd
84     show_progress
85     my_unac_filter
86 dpavlin 370 output_template
87 dpavlin 352 )) {
88 dpavlin 370 $self->{'global_config'}->{$var} = $config->val('global', $var);
89 dpavlin 352 }
90    
91 dpavlin 353 #
92     # read indexer config file
93     #
94 dpavlin 352
95 dpavlin 372 $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || $log->logcroak("can't open '",$self->{config_file},"'");
96 dpavlin 352
97 dpavlin 370 # create UTF-8 convertor for import_xml files
98 dpavlin 366 $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
99 dpavlin 370
100     # create Template toolkit instance
101     $self->{'tt'} = Template->new(
102     INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} || './output_template'),
103     # FILTERS => {
104     # 'foo' => \&foo_filter,
105     # },
106     EVAL_PERL => 1,
107     );
108    
109 dpavlin 352 return $self;
110     }
111    
112     =head2 open_isis
113    
114     Open CDS/ISIS database using OpenIsis module and read all records to memory.
115    
116     $webpac->open_isis(
117     filename => '/data/ISIS/ISIS',
118     code_page => '852',
119     limit_mfn => '500',
120     lookup => [ ... ],
121     );
122    
123     By default, ISIS code page is assumed to be C<852>.
124    
125 dpavlin 353 If optional parametar C<limit_mfn> is set, it will read just 500 records
126     from database in example above.
127 dpavlin 352
128     C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
129     C<val>. Optional parametar C<eval> is perl code to evaluate before storing
130     value in index.
131    
132     lookup => [
133     { 'key' => 'd:v900', 'val' => 'v250^a' },
134     { 'eval' => '"v901^a" eq "Podru√®je"',
135     'key' => 'pa:v561^4:v562^4:v461^1',
136     'val' => 'v900' },
137     ]
138    
139 dpavlin 367 Returns number of last record read into memory (size of database, really).
140    
141 dpavlin 352 =cut
142    
143     sub open_isis {
144     my $self = shift;
145     my $arg = {@_};
146    
147 dpavlin 372 my $log = $self->_get_logger();
148    
149     $log->logcroak("need filename") if (! $arg->{'filename'});
150 dpavlin 352 my $code_page = $arg->{'code_page'} || '852';
151    
152 dpavlin 353 use OpenIsis;
153    
154 dpavlin 352 #$self->{'isis_code_page'} = $code_page;
155    
156     # create Text::Iconv object
157     my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
158    
159 dpavlin 372 $log->info("reading ISIS database '",$arg->{'filename'},"'");
160 dpavlin 363
161 dpavlin 352 my $isis_db = OpenIsis::open($arg->{'filename'});
162    
163     my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
164    
165 dpavlin 357 $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
166    
167 dpavlin 372 $log->info("processing $maxmfn records...");
168 dpavlin 363
169 dpavlin 352 # read database
170     for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
171    
172 dpavlin 418
173     $log->debug("mfn: $mfn\n");
174    
175 dpavlin 352 # read record
176     my $row = OpenIsis::read( $isis_db, $mfn );
177     foreach my $k (keys %{$row}) {
178     if ($k ne "mfn") {
179     foreach my $l (@{$row->{$k}}) {
180     $l = $cp->convert($l);
181     # has subfields?
182     my $val;
183     if ($l =~ m/\^/) {
184     foreach my $t (split(/\^/,$l)) {
185     next if (! $t);
186     $val->{substr($t,0,1)} = substr($t,1);
187     }
188     } else {
189     $val = $l;
190     }
191    
192     push @{$self->{'data'}->{$mfn}->{$k}}, $val;
193     }
194 dpavlin 374 } else {
195     push @{$self->{'data'}->{$mfn}->{'000'}}, $mfn;
196 dpavlin 352 }
197    
198     }
199    
200     # create lookup
201 dpavlin 418 my $rec = $self->{'data'}->{$mfn} || $log->confess("record $mfn empty?");
202 dpavlin 355 $self->create_lookup($rec, @{$arg->{'lookup'}});
203 dpavlin 352
204 dpavlin 389 $self->progress_bar($mfn,$maxmfn);
205    
206 dpavlin 352 }
207    
208 dpavlin 362 $self->{'current_mfn'} = 1;
209 dpavlin 389 $self->{'last_pcnt'} = 0;
210 dpavlin 362
211 dpavlin 418 $log->debug("max mfn: $maxmfn");
212    
213 dpavlin 352 # store max mfn and return it.
214     return $self->{'max_mfn'} = $maxmfn;
215     }
216    
217 dpavlin 362 =head2 fetch_rec
218    
219     Fetch next record from database. It will also display progress bar (once
220     it's implemented, that is).
221    
222     my $rec = $webpac->fetch_rec;
223    
224     =cut
225    
226     sub fetch_rec {
227     my $self = shift;
228    
229 dpavlin 372 my $log = $self->_get_logger();
230 dpavlin 362
231 dpavlin 372 my $mfn = $self->{'current_mfn'}++ || $log->logconfess("it seems that you didn't load database!");
232    
233 dpavlin 362 if ($mfn > $self->{'max_mfn'}) {
234     $self->{'current_mfn'} = $self->{'max_mfn'};
235 dpavlin 373 $log->debug("at EOF");
236 dpavlin 362 return;
237     }
238    
239 dpavlin 389 $self->progress_bar($mfn,$self->{'max_mfn'});
240    
241 dpavlin 362 return $self->{'data'}->{$mfn};
242     }
243    
244 dpavlin 389 =head2 progress_bar
245    
246     Draw progress bar on STDERR.
247    
248     $webpac->progress_bar($current, $max);
249    
250     =cut
251    
252     sub progress_bar {
253     my $self = shift;
254    
255     my ($curr,$max) = @_;
256    
257     my $log = $self->_get_logger();
258    
259     $log->logconfess("no current value!") if (! $curr);
260     $log->logconfess("no maximum value!") if (! $max);
261    
262     if ($curr > $max) {
263     $max = $curr;
264     $log->debug("overflow to $curr");
265     }
266    
267     $self->{'last_pcnt'} ||= 1;
268    
269     $self->{'last_pcnt'} = $curr if ($curr < $self->{'last_pcnt'});
270    
271     my $p = int($curr * 100 / $max);
272     if ($p != $self->{'last_pcnt'}) {
273     printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$curr,$max,"=" x ($p/2).">", $p );
274     $self->{'last_pcnt'} = $p;
275     }
276 dpavlin 412 print STDERR "\n" if ($p == 100);
277 dpavlin 389 }
278    
279 dpavlin 363 =head2 open_import_xml
280    
281     Read file from C<import_xml/> directory and parse it.
282    
283     $webpac->open_import_xml(type => 'isis');
284    
285     =cut
286    
287     sub open_import_xml {
288     my $self = shift;
289    
290 dpavlin 372 my $log = $self->_get_logger();
291    
292 dpavlin 363 my $arg = {@_};
293 dpavlin 372 $log->logconfess("need type to load file from import_xml/") if (! $arg->{'type'});
294 dpavlin 363
295 dpavlin 366 $self->{'type'} = $arg->{'type'};
296 dpavlin 363
297 dpavlin 366 my $type_base = $arg->{'type'};
298 dpavlin 363 $type_base =~ s/_.*$//g;
299    
300 dpavlin 366 $self->{'tag'} = $type2tag{$type_base};
301    
302 dpavlin 375 $log->info("using type '",$self->{'type'},"' tag <",$self->{'tag'},">");
303 dpavlin 366
304     my $f = "./import_xml/".$self->{'type'}.".xml";
305 dpavlin 372 $log->logconfess("import_xml file '$f' doesn't exist!") if (! -e "$f");
306 dpavlin 363
307 dpavlin 375 $log->info("reading '$f'");
308 dpavlin 363
309 dpavlin 375 $self->{'import_xml_file'} = $f;
310    
311 dpavlin 363 $self->{'import_xml'} = XMLin($f,
312 dpavlin 366 ForceArray => [ $self->{'tag'}, 'config', 'format' ],
313 dpavlin 363 );
314    
315 dpavlin 375 $log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) });
316    
317 dpavlin 363 }
318    
319 dpavlin 355 =head2 create_lookup
320    
321     Create lookup from record using lookup definition.
322    
323 dpavlin 367 $self->create_lookup($rec, @lookups);
324    
325     Called internally by C<open_*> methods.
326    
327 dpavlin 355 =cut
328    
329     sub create_lookup {
330     my $self = shift;
331    
332 dpavlin 372 my $log = $self->_get_logger();
333 dpavlin 355
334 dpavlin 372 my $rec = shift || $log->logconfess("need record to create lookup");
335     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
336    
337 dpavlin 355 foreach my $i (@_) {
338 dpavlin 418 $log->logconfess("need key") unless defined($i->{'key'});
339     $log->logconfess("need val") unless defined($i->{'val'});
340    
341     if (defined($i->{'eval'})) {
342     # eval first, so we can skip fill_in for key and val
343     my $eval = $self->fill_in($rec,$i->{'eval'}) || next;
344     if ($self->_eval($eval)) {
345     my $key = $self->fill_in($rec,$i->{'key'}) || next;
346     my @val = $self->fill_in($rec,$i->{'val'}) || next;
347 dpavlin 373 $log->debug("stored $key = ",sub { join(" | ",@val) });
348 dpavlin 355 push @{$self->{'lookup'}->{$key}}, @val;
349     }
350     } else {
351 dpavlin 418 my $key = $self->fill_in($rec,$i->{'key'}) || next;
352     my @val = $self->fill_in($rec,$i->{'val'}) || next;
353     $log->debug("stored $key = ",sub { join(" | ",@val) });
354     push @{$self->{'lookup'}->{$key}}, @val;
355 dpavlin 355 }
356     }
357     }
358    
359 dpavlin 356 =head2 get_data
360    
361     Returns value from record.
362    
363 dpavlin 367 my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);
364 dpavlin 356
365     Arguments are:
366     record reference C<$rec>,
367     field C<$f>,
368     optional subfiled C<$sf>,
369     index for repeatable values C<$i>.
370    
371 dpavlin 367 Optinal variable C<$found> will be incremeted if there
372 dpavlin 356 is field.
373    
374     Returns value or empty string.
375    
376     =cut
377    
378     sub get_data {
379     my $self = shift;
380    
381     my ($rec,$f,$sf,$i,$found) = @_;
382 dpavlin 367
383 dpavlin 356 if ($$rec->{$f}) {
384 dpavlin 367 return '' if (! $$rec->{$f}->[$i]);
385 dpavlin 389 no strict 'refs';
386 dpavlin 356 if ($sf && $$rec->{$f}->[$i]->{$sf}) {
387     $$found++ if (defined($$found));
388     return $$rec->{$f}->[$i]->{$sf};
389     } elsif ($$rec->{$f}->[$i]) {
390     $$found++ if (defined($$found));
391 dpavlin 366 # it still might have subfield, just
392     # not specified, so we'll dump all
393     if ($$rec->{$f}->[$i] =~ /HASH/o) {
394     my $out;
395     foreach my $k (keys %{$$rec->{$f}->[$i]}) {
396     $out .= $$rec->{$f}->[$i]->{$k}." ";
397     }
398     return $out;
399     } else {
400     return $$rec->{$f}->[$i];
401     }
402 dpavlin 356 }
403     } else {
404     return '';
405     }
406     }
407    
408 dpavlin 352 =head2 fill_in
409    
410     Workhourse of all: takes record from in-memory structure of database and
411     strings with placeholders and returns string or array of with substituted
412     values from record.
413    
414 dpavlin 367 my $text = $webpac->fill_in($rec,'v250^a');
415 dpavlin 352
416     Optional argument is ordinal number for repeatable fields. By default,
417 dpavlin 353 it's assume to be first repeatable field (fields are perl array, so first
418     element is 0).
419     Following example will read second value from repeatable field.
420 dpavlin 352
421 dpavlin 367 my $text = $webpac->fill_in($rec,'Title: v250^a',1);
422 dpavlin 353
423     This function B<does not> perform parsing of format to inteligenty skip
424     delimiters before fields which aren't used.
425    
426 dpavlin 376 This method will automatically decode UTF-8 string to local code page
427     if needed.
428    
429 dpavlin 352 =cut
430    
431 dpavlin 372 sub fill_in {
432 dpavlin 371 my $self = shift;
433    
434 dpavlin 372 my $log = $self->_get_logger();
435 dpavlin 371
436 dpavlin 372 my $rec = shift || $log->logconfess("need data record");
437     my $format = shift || $log->logconfess("need format to parse");
438 dpavlin 352 # iteration (for repeatable fields)
439     my $i = shift || 0;
440    
441     # FIXME remove for speedup?
442 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
443 dpavlin 352
444 dpavlin 376 if (utf8::is_utf8($format)) {
445     $format = $self->_x($format);
446     }
447    
448 dpavlin 352 my $found = 0;
449    
450 dpavlin 359 my $eval_code;
451     # remove eval{...} from beginning
452     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
453    
454 dpavlin 352 # do actual replacement of placeholders
455 dpavlin 373 $format =~ s/v(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
456 dpavlin 352
457 dpavlin 353 if ($found) {
458 dpavlin 373 $log->debug("format: $format");
459 dpavlin 359 if ($eval_code) {
460     my $eval = $self->fill_in($rec,$eval_code,$i);
461 dpavlin 371 return if (! $self->_eval($eval));
462 dpavlin 359 }
463 dpavlin 353 # do we have lookups?
464 dpavlin 373 if ($format =~ /$LOOKUP_REGEX/o) {
465     $log->debug("format '$format' has lookup");
466 dpavlin 353 return $self->lookup($format);
467     } else {
468     return $format;
469     }
470 dpavlin 352 } else {
471     return;
472     }
473     }
474    
475     =head2 lookup
476    
477 dpavlin 355 Perform lookups on format supplied to it.
478 dpavlin 352
479 dpavlin 367 my $text = $self->lookup('[v900]');
480 dpavlin 352
481 dpavlin 355 Lookups can be nested (like C<[d:[a:[v900]]]>).
482    
483 dpavlin 352 =cut
484    
485     sub lookup {
486     my $self = shift;
487    
488 dpavlin 372 my $log = $self->_get_logger();
489 dpavlin 352
490 dpavlin 372 my $tmp = shift || $log->logconfess("need format");
491    
492 dpavlin 373 if ($tmp =~ /$LOOKUP_REGEX/o) {
493 dpavlin 352 my @in = ( $tmp );
494 dpavlin 372
495     $log->debug("lookup for: ",$tmp);
496    
497 dpavlin 352 my @out;
498     while (my $f = shift @in) {
499 dpavlin 373 if ($f =~ /$LOOKUP_REGEX_SAVE/o) {
500 dpavlin 352 my $k = $1;
501     if ($self->{'lookup'}->{$k}) {
502     foreach my $nv (@{$self->{'lookup'}->{$k}}) {
503     my $tmp2 = $f;
504 dpavlin 373 $tmp2 =~ s/lookup{$k}/$nv/g;
505 dpavlin 352 push @in, $tmp2;
506     }
507     } else {
508     undef $f;
509     }
510     } elsif ($f) {
511     push @out, $f;
512     }
513     }
514 dpavlin 373 $log->logconfess("return is array and it's not expected!") unless wantarray;
515 dpavlin 352 return @out;
516     } else {
517     return $tmp;
518     }
519     }
520    
521 dpavlin 356 =head2 parse
522    
523     Perform smart parsing of string, skipping delimiters for fields which aren't
524     defined. It can also eval code in format starting with C<eval{...}> and
525     return output or nothing depending on eval code.
526    
527 dpavlin 367 my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
528 dpavlin 356
529     =cut
530    
531     sub parse {
532     my $self = shift;
533    
534 dpavlin 366 my ($rec, $format_utf8, $i) = @_;
535 dpavlin 356
536 dpavlin 366 return if (! $format_utf8);
537    
538 dpavlin 372 my $log = $self->_get_logger();
539 dpavlin 358
540 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
541     $log->logconfess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
542    
543 dpavlin 358 $i = 0 if (! $i);
544    
545 dpavlin 375 my $format = $self->_x($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
546 dpavlin 366
547 dpavlin 356 my @out;
548    
549 dpavlin 373 $log->debug("format: $format");
550    
551 dpavlin 356 my $eval_code;
552     # remove eval{...} from beginning
553     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
554    
555 dpavlin 358 my $prefix;
556     my $all_found=0;
557 dpavlin 356
558 dpavlin 373 while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))?//s) {
559 dpavlin 356
560 dpavlin 358 my $del = $1 || '';
561 dpavlin 359 $prefix ||= $del if ($all_found == 0);
562 dpavlin 358
563     my $found = 0;
564     my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
565    
566 dpavlin 356 if ($found) {
567 dpavlin 358 push @out, $del;
568     push @out, $tmp;
569     $all_found += $found;
570 dpavlin 356 }
571     }
572    
573 dpavlin 358 return if (! $all_found);
574 dpavlin 356
575 dpavlin 373 my $out = join('',@out);
576 dpavlin 358
577 dpavlin 373 if ($out) {
578     # add rest of format (suffix)
579     $out .= $format;
580 dpavlin 367
581 dpavlin 373 # add prefix if not there
582     $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
583    
584     $log->debug("result: $out");
585     }
586    
587 dpavlin 359 if ($eval_code) {
588     my $eval = $self->fill_in($rec,$eval_code,$i);
589 dpavlin 373 $log->debug("about to eval{",$eval,"} format: $out");
590 dpavlin 371 return if (! $self->_eval($eval));
591 dpavlin 359 }
592    
593 dpavlin 358 return $out;
594 dpavlin 356 }
595    
596 dpavlin 367 =head2 parse_to_arr
597    
598     Similar to C<parse>, but returns array of all repeatable fields
599    
600     my @arr = $webpac->parse_to_arr($rec,'v250^a');
601    
602     =cut
603    
604     sub parse_to_arr {
605     my $self = shift;
606    
607     my ($rec, $format_utf8) = @_;
608    
609 dpavlin 372 my $log = $self->_get_logger();
610    
611     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
612 dpavlin 367 return if (! $format_utf8);
613    
614     my $i = 0;
615     my @arr;
616    
617     while (my $v = $self->parse($rec,$format_utf8,$i++)) {
618     push @arr, $v;
619     }
620    
621 dpavlin 373 $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
622    
623 dpavlin 367 return @arr;
624     }
625    
626 dpavlin 373 =head2 fill_in_to_arr
627    
628     Similar to C<fill_in>, but returns array of all repeatable fields. Usable
629     for fields which have lookups, so they shouldn't be parsed but rather
630     C<fill_id>ed.
631    
632     my @arr = $webpac->fill_in_to_arr($rec,'[v900];;[v250^a]');
633    
634     =cut
635    
636     sub fill_in_to_arr {
637     my $self = shift;
638    
639     my ($rec, $format_utf8) = @_;
640    
641     my $log = $self->_get_logger();
642    
643     $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
644     return if (! $format_utf8);
645    
646     my $i = 0;
647     my @arr;
648    
649     while (my @v = $self->fill_in($rec,$format_utf8,$i++)) {
650     push @arr, @v;
651     }
652    
653     $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
654    
655     return @arr;
656     }
657    
658    
659 dpavlin 366 =head2 data_structure
660    
661     Create in-memory data structure which represents layout from C<import_xml>.
662     It is used later to produce output.
663    
664 dpavlin 368 my @ds = $webpac->data_structure($rec);
665 dpavlin 366
666 dpavlin 374 This method will also set C<$webpac->{'currnet_filename'}> if there is
667 dpavlin 398 <filename> tag in C<import_xml> and C<$webpac->{'headline'}> if there is
668     <headline> tag.
669 dpavlin 374
670 dpavlin 366 =cut
671    
672 dpavlin 372 sub data_structure {
673 dpavlin 366 my $self = shift;
674    
675 dpavlin 372 my $log = $self->_get_logger();
676 dpavlin 366
677     my $rec = shift;
678 dpavlin 372 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
679 dpavlin 366
680 dpavlin 374 undef $self->{'currnet_filename'};
681 dpavlin 398 undef $self->{'headline'};
682 dpavlin 374
683 dpavlin 366 my @sorted_tags;
684     if ($self->{tags_by_order}) {
685     @sorted_tags = @{$self->{tags_by_order}};
686     } else {
687     @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
688     $self->{tags_by_order} = \@sorted_tags;
689     }
690    
691 dpavlin 368 my @ds;
692 dpavlin 366
693 dpavlin 373 $log->debug("tags: ",sub { join(", ",@sorted_tags) });
694    
695 dpavlin 366 foreach my $field (@sorted_tags) {
696    
697     my $row;
698    
699     #print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
700    
701     foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
702 dpavlin 373 my $format = $tag->{'value'} || $tag->{'content'};
703 dpavlin 366
704 dpavlin 373 $log->debug("format: $format");
705    
706     my @v;
707     if ($format =~ /$LOOKUP_REGEX/o) {
708     @v = $self->fill_in_to_arr($rec,$format);
709     } else {
710     @v = $self->parse_to_arr($rec,$format);
711     }
712 dpavlin 367 next if (! @v);
713 dpavlin 366
714 dpavlin 375 # use format?
715     if ($tag->{'format_name'}) {
716     @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v;
717     }
718    
719 dpavlin 398 if ($field eq 'filename') {
720     $self->{'current_filename'} = join('',@v);
721     $log->debug("filename: ",$self->{'current_filename'});
722     } elsif ($field eq 'headline') {
723     $self->{'headline'} .= join('',@v);
724     $log->debug("headline: ",$self->{'headline'});
725     next; # don't return headline in data_structure!
726     }
727    
728 dpavlin 366 # does tag have type?
729     if ($tag->{'type'}) {
730 dpavlin 367 push @{$row->{$tag->{'type'}}}, @v;
731 dpavlin 366 } else {
732 dpavlin 367 push @{$row->{'display'}}, @v;
733     push @{$row->{'swish'}}, @v;
734 dpavlin 366 }
735 dpavlin 373
736 dpavlin 374
737 dpavlin 366 }
738    
739 dpavlin 368 if ($row) {
740     $row->{'tag'} = $field;
741 dpavlin 375
742     # TODO: name_sigular, name_plural
743     my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'};
744     $row->{'name'} = $name ? $self->_x($name) : $field;
745    
746 dpavlin 368 push @ds, $row;
747 dpavlin 374
748 dpavlin 373 $log->debug("row $field: ",sub { Dumper($row) });
749 dpavlin 368 }
750 dpavlin 366
751     }
752    
753 dpavlin 370 return @ds;
754 dpavlin 366
755     }
756    
757 dpavlin 370 =head2 output
758    
759     Create output from in-memory data structure using Template Toolkit template.
760    
761     my $text = $webpac->output( template => 'text.tt', data => @ds );
762    
763     =cut
764    
765     sub output {
766     my $self = shift;
767    
768     my $args = {@_};
769    
770 dpavlin 372 my $log = $self->_get_logger();
771 dpavlin 370
772 dpavlin 372 $log->logconfess("need template name") if (! $args->{'template'});
773     $log->logconfess("need data array") if (! $args->{'data'});
774    
775 dpavlin 370 my $out;
776    
777     $self->{'tt'}->process(
778     $args->{'template'},
779     $args,
780     \$out
781     ) || confess $self->{'tt'}->error();
782    
783     return $out;
784     }
785    
786 dpavlin 411 =head2 output_file
787    
788     Create output from in-memory data structure using Template Toolkit template
789     to a file.
790    
791     $webpac->output_file(
792     file => 'out.txt',
793     template => 'text.tt',
794     data => @ds
795     );
796    
797     =cut
798    
799     sub output_file {
800     my $self = shift;
801    
802     my $args = {@_};
803    
804     my $log = $self->_get_logger();
805    
806     $log->logconfess("need file name") if (! $args->{'file'});
807    
808     $log->debug("creating file ",$args->{'file'});
809    
810     open(my $fh, ">", $args->{'file'}) || $log->logdie("can't open output file '$self->{'file'}': $!");
811     print $fh $self->output(
812     template => $args->{'template'},
813     data => $args->{'data'},
814     ) || $log->logdie("print: $!");
815     close($fh) || $log->logdie("close: $!");
816     }
817    
818 dpavlin 375 =head2 apply_format
819    
820     Apply format specified in tag with C<format_name="name"> and
821     C<format_delimiter=";;">.
822    
823     my $text = $webpac->apply_format($format_name,$format_delimiter,$data);
824    
825     Formats can contain C<lookup{...}> if you need them.
826    
827     =cut
828    
829     sub apply_format {
830     my $self = shift;
831    
832     my ($name,$delimiter,$data) = @_;
833    
834     my $log = $self->_get_logger();
835    
836     if (! $self->{'import_xml'}->{'format'}->{$name}) {
837     $log->warn("<format name=\"$name\"> is not defined in ",$self->{'import_xml_file'});
838     return $data;
839     }
840    
841     $log->warn("no delimiter for format $name") if (! $delimiter);
842    
843     my $format = $self->_x($self->{'import_xml'}->{'format'}->{$name}->{'content'}) || $log->logdie("can't find format '$name'");
844    
845     my @data = split(/\Q$delimiter\E/, $data);
846    
847     my $out = sprintf($format, @data);
848     $log->debug("using format $name [$format] on $data to produce: $out");
849    
850     if ($out =~ m/$LOOKUP_REGEX/o) {
851     return $self->lookup($out);
852     } else {
853     return $out;
854     }
855    
856     }
857    
858    
859 dpavlin 372 #
860     #
861     #
862    
863     =head1 INTERNAL METHODS
864    
865     Here is a quick list of internal methods, mostly useful to turn debugging
866     on them (see L<LOGGING> below for explanation).
867    
868     =cut
869    
870     =head2 _eval
871    
872     Internal function to eval code without C<strict 'subs'>.
873    
874     =cut
875    
876     sub _eval {
877     my $self = shift;
878    
879     my $code = shift || return;
880    
881     my $log = $self->_get_logger();
882    
883     no strict 'subs';
884     my $ret = eval $code;
885     if ($@) {
886     $log->error("problem with eval code [$code]: $@");
887     }
888    
889     $log->debug("eval: ",$code," [",$ret,"]");
890    
891     return $ret || 0;
892     }
893    
894     =head2 _sort_by_order
895    
896     Sort xml tags data structure accoding to C<order=""> attribute.
897    
898     =cut
899    
900     sub _sort_by_order {
901     my $self = shift;
902    
903     my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
904     $self->{'import_xml'}->{'indexer'}->{$a};
905     my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
906     $self->{'import_xml'}->{'indexer'}->{$b};
907    
908     return $va <=> $vb;
909     }
910    
911 dpavlin 375 =head2 _get_logger
912    
913     Get C<Log::Log4perl> object with a twist: domains are defined for each
914     method
915    
916     my $log = $webpac->_get_logger();
917    
918     =cut
919    
920 dpavlin 372 sub _get_logger {
921     my $self = shift;
922    
923 dpavlin 374 my $name = (caller(1))[3] || caller;
924     return get_logger($name);
925 dpavlin 372 }
926    
927 dpavlin 375 =head2 _x
928    
929     Convert string from UTF-8 to code page defined in C<import_xml>.
930    
931     my $text = $webpac->_x('utf8 text');
932    
933     =cut
934    
935     sub _x {
936     my $self = shift;
937     my $utf8 = shift || return;
938    
939     return $self->{'utf2cp'}->convert($utf8) ||
940     $self->_get_logger()->logwarn("can't convert '$utf8'");
941     }
942    
943 dpavlin 372 #
944     #
945     #
946    
947     =head1 LOGGING
948    
949     Logging in WebPAC is performed by L<Log::Log4perl> with config file
950     C<log.conf>.
951    
952     Methods defined above have different levels of logging, so
953     it's descriptions will be useful to turn (mostry B<debug> logging) on
954     or off to see why WabPAC isn't perforing as you expect it (it might even
955     be a bug!).
956    
957     B<This is different from normal Log4perl behaviour>. To repeat, you can
958     also use method names, and not only classes (which are just few)
959     to filter logging.
960    
961     =cut
962    
963 dpavlin 352 1;

  ViewVC Help
Powered by ViewVC 1.1.26