/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 421 - (show annotations)
Fri Sep 10 22:24:42 2004 UTC (15 years, 5 months ago) by dpavlin
File size: 22721 byte(s)
low_mem option for desktop class-machines

1 package WebPAC;
2
3 use warnings;
4 use strict;
5
6 use Carp;
7 use Text::Iconv;
8 use Config::IniFiles;
9 use XML::Simple;
10 use Template;
11 use Log::Log4perl qw(get_logger :levels);
12
13 use Data::Dumper;
14
15 #my $LOOKUP_REGEX = '\[[^\[\]]+\]';
16 #my $LOOKUP_REGEX_SAVE = '\[([^\[\]]+)\]';
17 my $LOOKUP_REGEX = 'lookup{[^\{\}]+}';
18 my $LOOKUP_REGEX_SAVE = 'lookup{([^\{\}]+)}';
19
20 =head1 NAME
21
22 WebPAC - base class for WebPAC
23
24 =head1 DESCRIPTION
25
26 This module implements methods used by WebPAC.
27
28 =head1 METHODS
29
30 =head2 new
31
32 Create new instance of WebPAC using configuration specified by C<config_file>.
33
34 my $webpac = new WebPAC(
35 config_file => 'name.conf',
36 [code_page => 'ISO-8859-2',]
37 [low_mem => 1,]
38 );
39
40 Default C<code_page> is C<ISO-8859-2>.
41
42 This method will also read configuration files
43 C<global.conf> (used by indexer and Web font-end)
44 and configuration file specified by C<config_file>
45 which describes databases to be indexed.
46
47 C<low_mem> options is double-edged sword. If enabled, WebPAC
48 will run on memory constraint machines (which doesn't have enough
49 physical RAM to create memory structure for whole ISIS database).
50
51 If your machine has 512Mb or more and database is around 10000 records,
52 memory shouldn't be an issue. If you don't have enough physical RAM, you
53 might consider using virtual memory (if your operating system is handling it
54 well, like on FreeBSD or Linux) instead of dropping to L<DBD::Deep> to handle
55 parsed structure of ISIS database.
56
57 However, when WebPAC is running on desktop machines (or laptops :-), it's
58 highly undesireable for system to start swapping. Using C<low_mem> option can
59 reduce WecPAC memory usage to 16Mb for same database with lookup fields and
60 sorted indexes which stay in RAM. Performance will suffer, but memory usage
61 will really be minimal. It might be also more confortable to run WebPAC reniced
62 on those machines.
63
64 =cut
65
66 # mapping between data type and tag which specify
67 # format in XML file
68 my %type2tag = (
69 'isis' => 'isis',
70 # 'excel' => 'column',
71 # 'marc' => 'marc',
72 # 'feed' => 'feed'
73 );
74
75 sub new {
76 my $class = shift;
77 my $self = {@_};
78 bless($self, $class);
79
80 my $log_file = $self->{'log'} || "log.conf";
81 Log::Log4perl->init($log_file);
82
83 my $log = $self->_get_logger();
84
85 # fill in default values
86 # output codepage
87 $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
88
89 #
90 # read global.conf
91 #
92 $log->debug("read 'global.conf'");
93
94 my $config = new Config::IniFiles( -file => 'global.conf' ) || $log->logcroak("can't open 'global.conf'");
95
96 # read global config parametars
97 foreach my $var (qw(
98 dbi_dbd
99 dbi_dsn
100 dbi_user
101 dbi_passwd
102 show_progress
103 my_unac_filter
104 output_template
105 )) {
106 $self->{'global_config'}->{$var} = $config->val('global', $var);
107 }
108
109 #
110 # read indexer config file
111 #
112
113 $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || $log->logcroak("can't open '",$self->{config_file},"'");
114
115 # create UTF-8 convertor for import_xml files
116 $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
117
118 # create Template toolkit instance
119 $self->{'tt'} = Template->new(
120 INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} || './output_template'),
121 # FILTERS => {
122 # 'foo' => \&foo_filter,
123 # },
124 EVAL_PERL => 1,
125 );
126
127 # running with low_mem flag? well, use DBM::Deep then.
128 if ($self->{'low_mem'}) {
129 $log->info("running with low_mem which impacts performance (<64 Mb memory usage)");
130
131 my $db_file = "data.db";
132
133 if (-e $db_file) {
134 unlink $db_file or $log->logdie("can't remove '$db_file' from last run");
135 $log->debug("removed '$db_file' from last run");
136 }
137
138 use DBM::Deep;
139
140 my $db = new DBM::Deep $db_file;
141
142 $log->logdie("DBM::Deep error: $!") unless ($db);
143
144 if ($db->error()) {
145 $log->logdie("can't open '$db_file' under low_mem: ",$db->error());
146 } else {
147 $log->debug("using file $db_file for DBM::Deep");
148 }
149
150 $self->{'db'} = $db;
151 }
152
153 return $self;
154 }
155
156 =head2 open_isis
157
158 Open CDS/ISIS database using OpenIsis module and read all records to memory.
159
160 $webpac->open_isis(
161 filename => '/data/ISIS/ISIS',
162 code_page => '852',
163 limit_mfn => '500',
164 lookup => [ ... ],
165 );
166
167 By default, ISIS code page is assumed to be C<852>.
168
169 If optional parametar C<limit_mfn> is set, it will read just 500 records
170 from database in example above.
171
172 C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
173 C<val>. Optional parametar C<eval> is perl code to evaluate before storing
174 value in index.
175
176 lookup => [
177 { 'key' => 'd:v900', 'val' => 'v250^a' },
178 { 'eval' => '"v901^a" eq "Podruèje"',
179 'key' => 'pa:v561^4:v562^4:v461^1',
180 'val' => 'v900' },
181 ]
182
183 Returns number of last record read into memory (size of database, really).
184
185 =cut
186
187 sub open_isis {
188 my $self = shift;
189 my $arg = {@_};
190
191 my $log = $self->_get_logger();
192
193 $log->logcroak("need filename") if (! $arg->{'filename'});
194 my $code_page = $arg->{'code_page'} || '852';
195
196 # store data in object
197 $self->{'isis_filename'} = $arg->{'filename'};
198 $self->{'isis_code_page'} = $code_page;
199
200 use OpenIsis;
201
202 #$self->{'isis_code_page'} = $code_page;
203
204 # create Text::Iconv object
205 my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
206
207 $log->info("reading ISIS database '",$arg->{'filename'},"'");
208 $log->debug("isis code page: $code_page");
209
210 my $isis_db = OpenIsis::open($arg->{'filename'});
211
212 my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
213
214 $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
215
216 $log->info("processing $maxmfn records...");
217
218 # read database
219 for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
220
221
222 $log->debug("mfn: $mfn\n");
223
224 my $rec;
225
226 # read record
227 my $row = OpenIsis::read( $isis_db, $mfn );
228 foreach my $k (keys %{$row}) {
229 if ($k ne "mfn") {
230 foreach my $l (@{$row->{$k}}) {
231 $l = $cp->convert($l);
232 # has subfields?
233 my $val;
234 if ($l =~ m/\^/) {
235 foreach my $t (split(/\^/,$l)) {
236 next if (! $t);
237 $val->{substr($t,0,1)} = substr($t,1);
238 }
239 } else {
240 $val = $l;
241 }
242
243 push @{$rec->{$k}}, $val;
244 }
245 } else {
246 push @{$rec->{'000'}}, $mfn;
247 }
248
249 }
250
251 $log->confess("record $mfn empty?") unless ($rec);
252
253 # store
254 if ($self->{'low_mem'}) {
255 $self->{'db'}->put($mfn, $rec);
256 } else {
257 $self->{'data'}->{$mfn} = $rec;
258 }
259
260 # create lookup
261 $self->create_lookup($rec, @{$arg->{'lookup'}});
262
263 $self->progress_bar($mfn,$maxmfn);
264
265 }
266
267 $self->{'current_mfn'} = 1;
268 $self->{'last_pcnt'} = 0;
269
270 $log->debug("max mfn: $maxmfn");
271
272 # store max mfn and return it.
273 return $self->{'max_mfn'} = $maxmfn;
274 }
275
276 =head2 fetch_rec
277
278 Fetch next record from database. It will also display progress bar (once
279 it's implemented, that is).
280
281 my $rec = $webpac->fetch_rec;
282
283 =cut
284
285 sub fetch_rec {
286 my $self = shift;
287
288 my $log = $self->_get_logger();
289
290 my $mfn = $self->{'current_mfn'}++ || $log->logconfess("it seems that you didn't load database!");
291
292 if ($mfn > $self->{'max_mfn'}) {
293 $self->{'current_mfn'} = $self->{'max_mfn'};
294 $log->debug("at EOF");
295 return;
296 }
297
298 $self->progress_bar($mfn,$self->{'max_mfn'});
299
300 if ($self->{'low_mem'}) {
301 return $self->{'db'}->get($mfn);
302 } else {
303 return $self->{'data'}->{$mfn};
304 }
305 }
306
307 =head2 progress_bar
308
309 Draw progress bar on STDERR.
310
311 $webpac->progress_bar($current, $max);
312
313 =cut
314
315 sub progress_bar {
316 my $self = shift;
317
318 my ($curr,$max) = @_;
319
320 my $log = $self->_get_logger();
321
322 $log->logconfess("no current value!") if (! $curr);
323 $log->logconfess("no maximum value!") if (! $max);
324
325 if ($curr > $max) {
326 $max = $curr;
327 $log->debug("overflow to $curr");
328 }
329
330 $self->{'last_pcnt'} ||= 1;
331
332 $self->{'last_pcnt'} = $curr if ($curr < $self->{'last_pcnt'});
333
334 my $p = int($curr * 100 / $max);
335 if ($p != $self->{'last_pcnt'}) {
336 printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$curr,$max,"=" x ($p/2).">", $p );
337 $self->{'last_pcnt'} = $p;
338 }
339 print STDERR "\n" if ($p == 100);
340 }
341
342 =head2 open_import_xml
343
344 Read file from C<import_xml/> directory and parse it.
345
346 $webpac->open_import_xml(type => 'isis');
347
348 =cut
349
350 sub open_import_xml {
351 my $self = shift;
352
353 my $log = $self->_get_logger();
354
355 my $arg = {@_};
356 $log->logconfess("need type to load file from import_xml/") if (! $arg->{'type'});
357
358 $self->{'type'} = $arg->{'type'};
359
360 my $type_base = $arg->{'type'};
361 $type_base =~ s/_.*$//g;
362
363 $self->{'tag'} = $type2tag{$type_base};
364
365 $log->info("using type '",$self->{'type'},"' tag <",$self->{'tag'},">");
366
367 my $f = "./import_xml/".$self->{'type'}.".xml";
368 $log->logconfess("import_xml file '$f' doesn't exist!") if (! -e "$f");
369
370 $log->info("reading '$f'");
371
372 $self->{'import_xml_file'} = $f;
373
374 $self->{'import_xml'} = XMLin($f,
375 ForceArray => [ $self->{'tag'}, 'config', 'format' ],
376 );
377
378 $log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) });
379
380 }
381
382 =head2 create_lookup
383
384 Create lookup from record using lookup definition.
385
386 $self->create_lookup($rec, @lookups);
387
388 Called internally by C<open_*> methods.
389
390 =cut
391
392 sub create_lookup {
393 my $self = shift;
394
395 my $log = $self->_get_logger();
396
397 my $rec = shift || $log->logconfess("need record to create lookup");
398 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
399
400 foreach my $i (@_) {
401 $log->logconfess("need key") unless defined($i->{'key'});
402 $log->logconfess("need val") unless defined($i->{'val'});
403
404 if (defined($i->{'eval'})) {
405 # eval first, so we can skip fill_in for key and val
406 my $eval = $self->fill_in($rec,$i->{'eval'}) || next;
407 if ($self->_eval($eval)) {
408 my $key = $self->fill_in($rec,$i->{'key'}) || next;
409 my @val = $self->fill_in($rec,$i->{'val'}) || next;
410 $log->debug("stored $key = ",sub { join(" | ",@val) });
411 push @{$self->{'lookup'}->{$key}}, @val;
412 }
413 } else {
414 my $key = $self->fill_in($rec,$i->{'key'}) || next;
415 my @val = $self->fill_in($rec,$i->{'val'}) || next;
416 $log->debug("stored $key = ",sub { join(" | ",@val) });
417 push @{$self->{'lookup'}->{$key}}, @val;
418 }
419 }
420 }
421
422 =head2 get_data
423
424 Returns value from record.
425
426 my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);
427
428 Arguments are:
429 record reference C<$rec>,
430 field C<$f>,
431 optional subfiled C<$sf>,
432 index for repeatable values C<$i>.
433
434 Optinal variable C<$found> will be incremeted if there
435 is field.
436
437 Returns value or empty string.
438
439 =cut
440
441 sub get_data {
442 my $self = shift;
443
444 my ($rec,$f,$sf,$i,$found) = @_;
445
446 if ($$rec->{$f}) {
447 return '' if (! $$rec->{$f}->[$i]);
448 no strict 'refs';
449 if ($sf && $$rec->{$f}->[$i]->{$sf}) {
450 $$found++ if (defined($$found));
451 return $$rec->{$f}->[$i]->{$sf};
452 } elsif ($$rec->{$f}->[$i]) {
453 $$found++ if (defined($$found));
454 # it still might have subfield, just
455 # not specified, so we'll dump all
456 if ($$rec->{$f}->[$i] =~ /HASH/o) {
457 my $out;
458 foreach my $k (keys %{$$rec->{$f}->[$i]}) {
459 $out .= $$rec->{$f}->[$i]->{$k}." ";
460 }
461 return $out;
462 } else {
463 return $$rec->{$f}->[$i];
464 }
465 }
466 } else {
467 return '';
468 }
469 }
470
471 =head2 fill_in
472
473 Workhourse of all: takes record from in-memory structure of database and
474 strings with placeholders and returns string or array of with substituted
475 values from record.
476
477 my $text = $webpac->fill_in($rec,'v250^a');
478
479 Optional argument is ordinal number for repeatable fields. By default,
480 it's assume to be first repeatable field (fields are perl array, so first
481 element is 0).
482 Following example will read second value from repeatable field.
483
484 my $text = $webpac->fill_in($rec,'Title: v250^a',1);
485
486 This function B<does not> perform parsing of format to inteligenty skip
487 delimiters before fields which aren't used.
488
489 This method will automatically decode UTF-8 string to local code page
490 if needed.
491
492 =cut
493
494 sub fill_in {
495 my $self = shift;
496
497 my $log = $self->_get_logger();
498
499 my $rec = shift || $log->logconfess("need data record");
500 my $format = shift || $log->logconfess("need format to parse");
501 # iteration (for repeatable fields)
502 my $i = shift || 0;
503
504 # FIXME remove for speedup?
505 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
506
507 if (utf8::is_utf8($format)) {
508 $format = $self->_x($format);
509 }
510
511 my $found = 0;
512
513 my $eval_code;
514 # remove eval{...} from beginning
515 $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
516
517 # do actual replacement of placeholders
518 $format =~ s/v(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
519
520 if ($found) {
521 $log->debug("format: $format");
522 if ($eval_code) {
523 my $eval = $self->fill_in($rec,$eval_code,$i);
524 return if (! $self->_eval($eval));
525 }
526 # do we have lookups?
527 if ($format =~ /$LOOKUP_REGEX/o) {
528 $log->debug("format '$format' has lookup");
529 return $self->lookup($format);
530 } else {
531 return $format;
532 }
533 } else {
534 return;
535 }
536 }
537
538 =head2 lookup
539
540 Perform lookups on format supplied to it.
541
542 my $text = $self->lookup('[v900]');
543
544 Lookups can be nested (like C<[d:[a:[v900]]]>).
545
546 =cut
547
548 sub lookup {
549 my $self = shift;
550
551 my $log = $self->_get_logger();
552
553 my $tmp = shift || $log->logconfess("need format");
554
555 if ($tmp =~ /$LOOKUP_REGEX/o) {
556 my @in = ( $tmp );
557
558 $log->debug("lookup for: ",$tmp);
559
560 my @out;
561 while (my $f = shift @in) {
562 if ($f =~ /$LOOKUP_REGEX_SAVE/o) {
563 my $k = $1;
564 if ($self->{'lookup'}->{$k}) {
565 foreach my $nv (@{$self->{'lookup'}->{$k}}) {
566 my $tmp2 = $f;
567 $tmp2 =~ s/lookup{$k}/$nv/g;
568 push @in, $tmp2;
569 }
570 } else {
571 undef $f;
572 }
573 } elsif ($f) {
574 push @out, $f;
575 }
576 }
577 $log->logconfess("return is array and it's not expected!") unless wantarray;
578 return @out;
579 } else {
580 return $tmp;
581 }
582 }
583
584 =head2 parse
585
586 Perform smart parsing of string, skipping delimiters for fields which aren't
587 defined. It can also eval code in format starting with C<eval{...}> and
588 return output or nothing depending on eval code.
589
590 my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
591
592 =cut
593
594 sub parse {
595 my $self = shift;
596
597 my ($rec, $format_utf8, $i) = @_;
598
599 return if (! $format_utf8);
600
601 my $log = $self->_get_logger();
602
603 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
604 $log->logconfess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
605
606 $i = 0 if (! $i);
607
608 my $format = $self->_x($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
609
610 my @out;
611
612 $log->debug("format: $format");
613
614 my $eval_code;
615 # remove eval{...} from beginning
616 $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
617
618 my $prefix;
619 my $all_found=0;
620
621 while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))?//s) {
622
623 my $del = $1 || '';
624 $prefix ||= $del if ($all_found == 0);
625
626 my $found = 0;
627 my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
628
629 if ($found) {
630 push @out, $del;
631 push @out, $tmp;
632 $all_found += $found;
633 }
634 }
635
636 return if (! $all_found);
637
638 my $out = join('',@out);
639
640 if ($out) {
641 # add rest of format (suffix)
642 $out .= $format;
643
644 # add prefix if not there
645 $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
646
647 $log->debug("result: $out");
648 }
649
650 if ($eval_code) {
651 my $eval = $self->fill_in($rec,$eval_code,$i);
652 $log->debug("about to eval{",$eval,"} format: $out");
653 return if (! $self->_eval($eval));
654 }
655
656 return $out;
657 }
658
659 =head2 parse_to_arr
660
661 Similar to C<parse>, but returns array of all repeatable fields
662
663 my @arr = $webpac->parse_to_arr($rec,'v250^a');
664
665 =cut
666
667 sub parse_to_arr {
668 my $self = shift;
669
670 my ($rec, $format_utf8) = @_;
671
672 my $log = $self->_get_logger();
673
674 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
675 return if (! $format_utf8);
676
677 my $i = 0;
678 my @arr;
679
680 while (my $v = $self->parse($rec,$format_utf8,$i++)) {
681 push @arr, $v;
682 }
683
684 $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
685
686 return @arr;
687 }
688
689 =head2 fill_in_to_arr
690
691 Similar to C<fill_in>, but returns array of all repeatable fields. Usable
692 for fields which have lookups, so they shouldn't be parsed but rather
693 C<fill_id>ed.
694
695 my @arr = $webpac->fill_in_to_arr($rec,'[v900];;[v250^a]');
696
697 =cut
698
699 sub fill_in_to_arr {
700 my $self = shift;
701
702 my ($rec, $format_utf8) = @_;
703
704 my $log = $self->_get_logger();
705
706 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
707 return if (! $format_utf8);
708
709 my $i = 0;
710 my @arr;
711
712 while (my @v = $self->fill_in($rec,$format_utf8,$i++)) {
713 push @arr, @v;
714 }
715
716 $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
717
718 return @arr;
719 }
720
721
722 =head2 data_structure
723
724 Create in-memory data structure which represents layout from C<import_xml>.
725 It is used later to produce output.
726
727 my @ds = $webpac->data_structure($rec);
728
729 This method will also set C<$webpac->{'currnet_filename'}> if there is
730 <filename> tag in C<import_xml> and C<$webpac->{'headline'}> if there is
731 <headline> tag.
732
733 =cut
734
735 sub data_structure {
736 my $self = shift;
737
738 my $log = $self->_get_logger();
739
740 my $rec = shift;
741 $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
742
743 undef $self->{'currnet_filename'};
744 undef $self->{'headline'};
745
746 my @sorted_tags;
747 if ($self->{tags_by_order}) {
748 @sorted_tags = @{$self->{tags_by_order}};
749 } else {
750 @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
751 $self->{tags_by_order} = \@sorted_tags;
752 }
753
754 my @ds;
755
756 $log->debug("tags: ",sub { join(", ",@sorted_tags) });
757
758 foreach my $field (@sorted_tags) {
759
760 my $row;
761
762 #print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
763
764 foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
765 my $format = $tag->{'value'} || $tag->{'content'};
766
767 $log->debug("format: $format");
768
769 my @v;
770 if ($format =~ /$LOOKUP_REGEX/o) {
771 @v = $self->fill_in_to_arr($rec,$format);
772 } else {
773 @v = $self->parse_to_arr($rec,$format);
774 }
775 next if (! @v);
776
777 # use format?
778 if ($tag->{'format_name'}) {
779 @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v;
780 }
781
782 if ($field eq 'filename') {
783 $self->{'current_filename'} = join('',@v);
784 $log->debug("filename: ",$self->{'current_filename'});
785 } elsif ($field eq 'headline') {
786 $self->{'headline'} .= join('',@v);
787 $log->debug("headline: ",$self->{'headline'});
788 next; # don't return headline in data_structure!
789 }
790
791 # does tag have type?
792 if ($tag->{'type'}) {
793 push @{$row->{$tag->{'type'}}}, @v;
794 } else {
795 push @{$row->{'display'}}, @v;
796 push @{$row->{'swish'}}, @v;
797 }
798
799
800 }
801
802 if ($row) {
803 $row->{'tag'} = $field;
804
805 # TODO: name_sigular, name_plural
806 my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'};
807 $row->{'name'} = $name ? $self->_x($name) : $field;
808
809 push @ds, $row;
810
811 $log->debug("row $field: ",sub { Dumper($row) });
812 }
813
814 }
815
816 return @ds;
817
818 }
819
820 =head2 output
821
822 Create output from in-memory data structure using Template Toolkit template.
823
824 my $text = $webpac->output( template => 'text.tt', data => @ds );
825
826 =cut
827
828 sub output {
829 my $self = shift;
830
831 my $args = {@_};
832
833 my $log = $self->_get_logger();
834
835 $log->logconfess("need template name") if (! $args->{'template'});
836 $log->logconfess("need data array") if (! $args->{'data'});
837
838 my $out;
839
840 $self->{'tt'}->process(
841 $args->{'template'},
842 $args,
843 \$out
844 ) || confess $self->{'tt'}->error();
845
846 return $out;
847 }
848
849 =head2 output_file
850
851 Create output from in-memory data structure using Template Toolkit template
852 to a file.
853
854 $webpac->output_file(
855 file => 'out.txt',
856 template => 'text.tt',
857 data => @ds
858 );
859
860 =cut
861
862 sub output_file {
863 my $self = shift;
864
865 my $args = {@_};
866
867 my $log = $self->_get_logger();
868
869 my $file = $args->{'file'} || $log->logconfess("need file name");
870
871 $log->debug("creating file ",$file);
872
873 open(my $fh, ">", $file) || $log->logdie("can't open output file '$file': $!");
874 print $fh $self->output(
875 template => $args->{'template'},
876 data => $args->{'data'},
877 ) || $log->logdie("print: $!");
878 close($fh) || $log->logdie("close: $!");
879 }
880
881 =head2 apply_format
882
883 Apply format specified in tag with C<format_name="name"> and
884 C<format_delimiter=";;">.
885
886 my $text = $webpac->apply_format($format_name,$format_delimiter,$data);
887
888 Formats can contain C<lookup{...}> if you need them.
889
890 =cut
891
892 sub apply_format {
893 my $self = shift;
894
895 my ($name,$delimiter,$data) = @_;
896
897 my $log = $self->_get_logger();
898
899 if (! $self->{'import_xml'}->{'format'}->{$name}) {
900 $log->warn("<format name=\"$name\"> is not defined in ",$self->{'import_xml_file'});
901 return $data;
902 }
903
904 $log->warn("no delimiter for format $name") if (! $delimiter);
905
906 my $format = $self->_x($self->{'import_xml'}->{'format'}->{$name}->{'content'}) || $log->logdie("can't find format '$name'");
907
908 my @data = split(/\Q$delimiter\E/, $data);
909
910 my $out = sprintf($format, @data);
911 $log->debug("using format $name [$format] on $data to produce: $out");
912
913 if ($out =~ m/$LOOKUP_REGEX/o) {
914 return $self->lookup($out);
915 } else {
916 return $out;
917 }
918
919 }
920
921
922 #
923 #
924 #
925
926 =head1 INTERNAL METHODS
927
928 Here is a quick list of internal methods, mostly useful to turn debugging
929 on them (see L<LOGGING> below for explanation).
930
931 =cut
932
933 =head2 _eval
934
935 Internal function to eval code without C<strict 'subs'>.
936
937 =cut
938
939 sub _eval {
940 my $self = shift;
941
942 my $code = shift || return;
943
944 my $log = $self->_get_logger();
945
946 no strict 'subs';
947 my $ret = eval $code;
948 if ($@) {
949 $log->error("problem with eval code [$code]: $@");
950 }
951
952 $log->debug("eval: ",$code," [",$ret,"]");
953
954 return $ret || 0;
955 }
956
957 =head2 _sort_by_order
958
959 Sort xml tags data structure accoding to C<order=""> attribute.
960
961 =cut
962
963 sub _sort_by_order {
964 my $self = shift;
965
966 my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
967 $self->{'import_xml'}->{'indexer'}->{$a};
968 my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
969 $self->{'import_xml'}->{'indexer'}->{$b};
970
971 return $va <=> $vb;
972 }
973
974 =head2 _get_logger
975
976 Get C<Log::Log4perl> object with a twist: domains are defined for each
977 method
978
979 my $log = $webpac->_get_logger();
980
981 =cut
982
983 sub _get_logger {
984 my $self = shift;
985
986 my $name = (caller(1))[3] || caller;
987 return get_logger($name);
988 }
989
990 =head2 _x
991
992 Convert string from UTF-8 to code page defined in C<import_xml>.
993
994 my $text = $webpac->_x('utf8 text');
995
996 =cut
997
998 sub _x {
999 my $self = shift;
1000 my $utf8 = shift || return;
1001
1002 return $self->{'utf2cp'}->convert($utf8) ||
1003 $self->_get_logger()->logwarn("can't convert '$utf8'");
1004 }
1005
1006 #
1007 #
1008 #
1009
1010 =head1 LOGGING
1011
1012 Logging in WebPAC is performed by L<Log::Log4perl> with config file
1013 C<log.conf>.
1014
1015 Methods defined above have different levels of logging, so
1016 it's descriptions will be useful to turn (mostry B<debug> logging) on
1017 or off to see why WabPAC isn't perforing as you expect it (it might even
1018 be a bug!).
1019
1020 B<This is different from normal Log4perl behaviour>. To repeat, you can
1021 also use method names, and not only classes (which are just few)
1022 to filter logging.
1023
1024 =cut
1025
1026 1;

  ViewVC Help
Powered by ViewVC 1.1.26