/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 366 by dpavlin, Thu Jun 17 01:44:25 2004 UTC revision 421 by dpavlin, Fri Sep 10 22:24:42 2004 UTC
# Line 1  Line 1 
1  package WebPAC;  package WebPAC;
2    
3    use warnings;
4    use strict;
5    
6  use Carp;  use Carp;
7  use Text::Iconv;  use Text::Iconv;
8  use Config::IniFiles;  use Config::IniFiles;
9  use XML::Simple;  use XML::Simple;
10    use Template;
11    use Log::Log4perl qw(get_logger :levels);
12    
13  use Data::Dumper;  use Data::Dumper;
14    
15    #my $LOOKUP_REGEX = '\[[^\[\]]+\]';
16    #my $LOOKUP_REGEX_SAVE = '\[([^\[\]]+)\]';
17    my $LOOKUP_REGEX = 'lookup{[^\{\}]+}';
18    my $LOOKUP_REGEX_SAVE = 'lookup{([^\{\}]+)}';
19    
20  =head1 NAME  =head1 NAME
21    
22  WebPAC - base class for WebPAC  WebPAC - base class for WebPAC
# Line 19  This module implements methods used by W Line 29  This module implements methods used by W
29    
30  =head2 new  =head2 new
31    
32  This will create new instance of WebPAC using configuration specified by C<config_file>.  Create new instance of WebPAC using configuration specified by C<config_file>.
33    
34   my $webpac = new WebPAC(   my $webpac = new WebPAC(
35          config_file => 'name.conf',          config_file => 'name.conf',
36          [code_page => 'ISO-8859-2',]          [code_page => 'ISO-8859-2',]
37            [low_mem => 1,]
38   );   );
39    
40  Default C<code_page> is C<ISO-8859-2>.  Default C<code_page> is C<ISO-8859-2>.
41    
42  It will also read configuration files  This method will also read configuration files
43  C<global.conf> (used by indexer and Web font-end)  C<global.conf> (used by indexer and Web font-end)
44  and configuration file specified by C<config_file>  and configuration file specified by C<config_file>
45  which describes databases to be indexed.  which describes databases to be indexed.
46    
47    C<low_mem> options is double-edged sword. If enabled, WebPAC
48    will run on memory constraint machines (which doesn't have enough
49    physical RAM to create memory structure for whole ISIS database).
50    
51    If your machine has 512Mb or more and database is around 10000 records,
52    memory shouldn't be an issue. If you don't have enough physical RAM, you
53    might consider using virtual memory (if your operating system is handling it
54    well, like on FreeBSD or Linux) instead of dropping to L<DBD::Deep> to handle
55    parsed structure of ISIS database.
56    
57    However, when WebPAC is running on desktop machines (or laptops :-), it's
58    highly undesireable for system to start swapping. Using C<low_mem> option can
59    reduce WecPAC memory usage to 16Mb for same database with lookup fields and
60    sorted indexes which stay in RAM. Performance will suffer, but memory usage
61    will really be minimal. It might be also more confortable to run WebPAC reniced
62    on those machines.
63    
64  =cut  =cut
65    
66  # mapping between data type and tag which specify  # mapping between data type and tag which specify
# Line 49  sub new { Line 77  sub new {
77          my $self = {@_};          my $self = {@_};
78          bless($self, $class);          bless($self, $class);
79    
80            my $log_file = $self->{'log'} || "log.conf";
81            Log::Log4perl->init($log_file);
82    
83            my $log = $self->_get_logger();
84    
85          # fill in default values          # fill in default values
86          # output codepage          # output codepage
87          $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});          $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
# Line 56  sub new { Line 89  sub new {
89          #          #
90          # read global.conf          # read global.conf
91          #          #
92            $log->debug("read 'global.conf'");
93    
94          $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";          my $config = new Config::IniFiles( -file => 'global.conf' ) || $log->logcroak("can't open 'global.conf'");
95    
96          # read global config parametars          # read global config parametars
97          foreach my $var (qw(          foreach my $var (qw(
# Line 67  sub new { Line 101  sub new {
101                          dbi_passwd                          dbi_passwd
102                          show_progress                          show_progress
103                          my_unac_filter                          my_unac_filter
104                            output_template
105                  )) {                  )) {
106                  $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);                  $self->{'global_config'}->{$var} = $config->val('global', $var);
107          }          }
108    
109          #          #
110          # read indexer config file          # read indexer config file
111          #          #
112    
113          $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";          $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || $log->logcroak("can't open '",$self->{config_file},"'");
114    
115            # create UTF-8 convertor for import_xml files
116          $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});          $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
117    
118            # create Template toolkit instance
119            $self->{'tt'} = Template->new(
120                    INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} || './output_template'),
121    #               FILTERS => {
122    #                       'foo' => \&foo_filter,
123    #               },
124                    EVAL_PERL => 1,
125            );
126    
127            # running with low_mem flag? well, use DBM::Deep then.
128            if ($self->{'low_mem'}) {
129                    $log->info("running with low_mem which impacts performance (<64 Mb memory usage)");
130    
131                    my $db_file = "data.db";
132    
133                    if (-e $db_file) {
134                            unlink $db_file or $log->logdie("can't remove '$db_file' from last run");
135                            $log->debug("removed '$db_file' from last run");
136                    }
137    
138                    use DBM::Deep;
139    
140                    my $db = new DBM::Deep $db_file;
141    
142                    $log->logdie("DBM::Deep error: $!") unless ($db);
143    
144                    if ($db->error()) {
145                            $log->logdie("can't open '$db_file' under low_mem: ",$db->error());
146                    } else {
147                            $log->debug("using file $db_file for DBM::Deep");
148                    }
149    
150                    $self->{'db'} = $db;
151            }
152    
153          return $self;          return $self;
154  }  }
155    
# Line 97  By default, ISIS code page is assumed to Line 169  By default, ISIS code page is assumed to
169  If optional parametar C<limit_mfn> is set, it will read just 500 records  If optional parametar C<limit_mfn> is set, it will read just 500 records
170  from database in example above.  from database in example above.
171    
 Returns number of last record read into memory (size of database, really).  
   
172  C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and  C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
173  C<val>. Optional parametar C<eval> is perl code to evaluate before storing  C<val>. Optional parametar C<eval> is perl code to evaluate before storing
174  value in index.  value in index.
# Line 110  value in index. Line 180  value in index.
180      'val' => 'v900' },      'val' => 'v900' },
181   ]   ]
182    
183    Returns number of last record read into memory (size of database, really).
184    
185  =cut  =cut
186    
187  sub open_isis {  sub open_isis {
188          my $self = shift;          my $self = shift;
189          my $arg = {@_};          my $arg = {@_};
190    
191          croak "need filename" if (! $arg->{'filename'});          my $log = $self->_get_logger();
192    
193            $log->logcroak("need filename") if (! $arg->{'filename'});
194          my $code_page = $arg->{'code_page'} || '852';          my $code_page = $arg->{'code_page'} || '852';
195    
196            # store data in object
197            $self->{'isis_filename'} = $arg->{'filename'};
198            $self->{'isis_code_page'} = $code_page;
199    
200          use OpenIsis;          use OpenIsis;
201    
202          #$self->{'isis_code_page'} = $code_page;          #$self->{'isis_code_page'} = $code_page;
# Line 126  sub open_isis { Line 204  sub open_isis {
204          # create Text::Iconv object          # create Text::Iconv object
205          my $cp = Text::Iconv->new($code_page,$self->{'code_page'});          my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
206    
207          print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'});          $log->info("reading ISIS database '",$arg->{'filename'},"'");
208            $log->debug("isis code page: $code_page");
209    
210          my $isis_db = OpenIsis::open($arg->{'filename'});          my $isis_db = OpenIsis::open($arg->{'filename'});
211    
# Line 134  sub open_isis { Line 213  sub open_isis {
213    
214          $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});          $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
215    
216          print STDERR "processing $maxmfn records...\n" if ($self->{'debug'});          $log->info("processing $maxmfn records...");
217    
218          # read database          # read database
219          for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {          for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
220    
221    
222                    $log->debug("mfn: $mfn\n");
223    
224                    my $rec;
225    
226                  # read record                  # read record
227                  my $row = OpenIsis::read( $isis_db, $mfn );                  my $row = OpenIsis::read( $isis_db, $mfn );
228                  foreach my $k (keys %{$row}) {                  foreach my $k (keys %{$row}) {
# Line 156  sub open_isis { Line 240  sub open_isis {
240                                                  $val = $l;                                                  $val = $l;
241                                          }                                          }
242    
243                                          push @{$self->{'data'}->{$mfn}->{$k}}, $val;                                          push @{$rec->{$k}}, $val;
244                                  }                                  }
245                            } else {
246                                    push @{$rec->{'000'}}, $mfn;
247                          }                          }
248    
249                  }                  }
250    
251                    $log->confess("record $mfn empty?") unless ($rec);
252    
253                    # store
254                    if ($self->{'low_mem'}) {
255                            $self->{'db'}->put($mfn, $rec);
256                    } else {
257                            $self->{'data'}->{$mfn} = $rec;
258                    }
259    
260                  # create lookup                  # create lookup
                 my $rec = $self->{'data'}->{$mfn};  
261                  $self->create_lookup($rec, @{$arg->{'lookup'}});                  $self->create_lookup($rec, @{$arg->{'lookup'}});
262    
263                    $self->progress_bar($mfn,$maxmfn);
264    
265          }          }
266    
267          $self->{'current_mfn'} = 1;          $self->{'current_mfn'} = 1;
268            $self->{'last_pcnt'} = 0;
269    
270            $log->debug("max mfn: $maxmfn");
271    
272          # store max mfn and return it.          # store max mfn and return it.
273          return $self->{'max_mfn'} = $maxmfn;          return $self->{'max_mfn'} = $maxmfn;
# Line 186  it's implemented, that is). Line 285  it's implemented, that is).
285  sub fetch_rec {  sub fetch_rec {
286          my $self = shift;          my $self = shift;
287    
288          my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!";          my $log = $self->_get_logger();
289    
290            my $mfn = $self->{'current_mfn'}++ || $log->logconfess("it seems that you didn't load database!");
291    
292          if ($mfn > $self->{'max_mfn'}) {          if ($mfn > $self->{'max_mfn'}) {
293                  $self->{'current_mfn'} = $self->{'max_mfn'};                  $self->{'current_mfn'} = $self->{'max_mfn'};
294                    $log->debug("at EOF");
295                  return;                  return;
296          }          }
297    
298          return $self->{'data'}->{$mfn};          $self->progress_bar($mfn,$self->{'max_mfn'});
299    
300            if ($self->{'low_mem'}) {
301                    return $self->{'db'}->get($mfn);
302            } else {
303                    return $self->{'data'}->{$mfn};
304            }
305    }
306    
307    =head2 progress_bar
308    
309    Draw progress bar on STDERR.
310    
311     $webpac->progress_bar($current, $max);
312    
313    =cut
314    
315    sub progress_bar {
316            my $self = shift;
317    
318            my ($curr,$max) = @_;
319    
320            my $log = $self->_get_logger();
321    
322            $log->logconfess("no current value!") if (! $curr);
323            $log->logconfess("no maximum value!") if (! $max);
324    
325            if ($curr > $max) {
326                    $max = $curr;
327                    $log->debug("overflow to $curr");
328            }
329    
330            $self->{'last_pcnt'} ||= 1;
331    
332            $self->{'last_pcnt'} = $curr if ($curr < $self->{'last_pcnt'});
333    
334            my $p = int($curr * 100 / $max);
335            if ($p != $self->{'last_pcnt'}) {
336                    printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$curr,$max,"=" x ($p/2).">", $p );
337                    $self->{'last_pcnt'} = $p;
338            }
339            print STDERR "\n" if ($p == 100);
340  }  }
341    
342  =head2 open_import_xml  =head2 open_import_xml
# Line 207  Read file from C<import_xml/> directory Line 350  Read file from C<import_xml/> directory
350  sub open_import_xml {  sub open_import_xml {
351          my $self = shift;          my $self = shift;
352    
353            my $log = $self->_get_logger();
354    
355          my $arg = {@_};          my $arg = {@_};
356          confess "need type to load file from import_xml/" if (! $arg->{'type'});          $log->logconfess("need type to load file from import_xml/") if (! $arg->{'type'});
357    
358          $self->{'type'} = $arg->{'type'};          $self->{'type'} = $arg->{'type'};
359    
# Line 217  sub open_import_xml { Line 362  sub open_import_xml {
362    
363          $self->{'tag'} = $type2tag{$type_base};          $self->{'tag'} = $type2tag{$type_base};
364    
365          print STDERR "using type ",$self->{'type'}," tag ",$self->{'tag'},"\n" if ($self->{'debug'});          $log->info("using type '",$self->{'type'},"' tag <",$self->{'tag'},">");
366    
367          my $f = "./import_xml/".$self->{'type'}.".xml";          my $f = "./import_xml/".$self->{'type'}.".xml";
368          confess "import_xml file '$f' doesn't exist!" if (! -e "$f");          $log->logconfess("import_xml file '$f' doesn't exist!") if (! -e "$f");
369    
370            $log->info("reading '$f'");
371    
372          print STDERR "reading '$f'\n" if ($self->{'debug'});          $self->{'import_xml_file'} = $f;
373    
374          $self->{'import_xml'} = XMLin($f,          $self->{'import_xml'} = XMLin($f,
375                  ForceArray => [ $self->{'tag'}, 'config', 'format' ],                  ForceArray => [ $self->{'tag'}, 'config', 'format' ],
                 ForceContent => 1  
376          );          );
377    
378          print Dumper($self->{'import_xml'});          $log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) });
379    
380  }  }
381    
# Line 237  sub open_import_xml { Line 383  sub open_import_xml {
383    
384  Create lookup from record using lookup definition.  Create lookup from record using lookup definition.
385    
386     $self->create_lookup($rec, @lookups);
387    
388    Called internally by C<open_*> methods.
389    
390  =cut  =cut
391    
392  sub create_lookup {  sub create_lookup {
393          my $self = shift;          my $self = shift;
394    
395          my $rec = shift || confess "need record to create lookup";          my $log = $self->_get_logger();
396          confess("need HASH as first argument!") if ($rec !~ /HASH/o);  
397            my $rec = shift || $log->logconfess("need record to create lookup");
398            $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
399    
400          foreach my $i (@_) {          foreach my $i (@_) {
401                  if ($i->{'eval'}) {                  $log->logconfess("need key") unless defined($i->{'key'});
402                          my $eval = $self->fill_in($rec,$i->{'eval'});                  $log->logconfess("need val") unless defined($i->{'val'});
403                          my $key = $self->fill_in($rec,$i->{'key'});  
404                          my @val = $self->fill_in($rec,$i->{'val'});                  if (defined($i->{'eval'})) {
405                          if ($key && @val && eval $eval) {                          # eval first, so we can skip fill_in for key and val
406                            my $eval = $self->fill_in($rec,$i->{'eval'}) || next;
407                            if ($self->_eval($eval)) {
408                                    my $key = $self->fill_in($rec,$i->{'key'}) || next;
409                                    my @val = $self->fill_in($rec,$i->{'val'}) || next;
410                                    $log->debug("stored $key = ",sub { join(" | ",@val) });
411                                  push @{$self->{'lookup'}->{$key}}, @val;                                  push @{$self->{'lookup'}->{$key}}, @val;
412                          }                          }
413                  } else {                  } else {
414                          my $key = $self->fill_in($rec,$i->{'key'});                          my $key = $self->fill_in($rec,$i->{'key'}) || next;
415                          my @val = $self->fill_in($rec,$i->{'val'});                          my @val = $self->fill_in($rec,$i->{'val'}) || next;
416                          if ($key && @val) {                          $log->debug("stored $key = ",sub { join(" | ",@val) });
417                                  push @{$self->{'lookup'}->{$key}}, @val;                          push @{$self->{'lookup'}->{$key}}, @val;
                         }  
418                  }                  }
419          }          }
420  }  }
# Line 267  sub create_lookup { Line 423  sub create_lookup {
423    
424  Returns value from record.  Returns value from record.
425    
426   $self->get_data(\$rec,$f,$sf,$i,\$found);   my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);
427    
428  Arguments are:  Arguments are:
429  record reference C<$rec>,  record reference C<$rec>,
# Line 275  field C<$f>, Line 431  field C<$f>,
431  optional subfiled C<$sf>,  optional subfiled C<$sf>,
432  index for repeatable values C<$i>.  index for repeatable values C<$i>.
433    
434  Optinal variable C<$found> will be incremeted if thre  Optinal variable C<$found> will be incremeted if there
435  is field.  is field.
436    
437  Returns value or empty string.  Returns value or empty string.
# Line 286  sub get_data { Line 442  sub get_data {
442          my $self = shift;          my $self = shift;
443    
444          my ($rec,$f,$sf,$i,$found) = @_;          my ($rec,$f,$sf,$i,$found) = @_;
445    
446          if ($$rec->{$f}) {          if ($$rec->{$f}) {
447                    return '' if (! $$rec->{$f}->[$i]);
448                    no strict 'refs';
449                  if ($sf && $$rec->{$f}->[$i]->{$sf}) {                  if ($sf && $$rec->{$f}->[$i]->{$sf}) {
450                          $$found++ if (defined($$found));                          $$found++ if (defined($$found));
451                          return $$rec->{$f}->[$i]->{$sf};                          return $$rec->{$f}->[$i]->{$sf};
# Line 315  Workhourse of all: takes record from in- Line 474  Workhourse of all: takes record from in-
474  strings with placeholders and returns string or array of with substituted  strings with placeholders and returns string or array of with substituted
475  values from record.  values from record.
476    
477   $webpac->fill_in($rec,'v250^a');   my $text = $webpac->fill_in($rec,'v250^a');
478    
479  Optional argument is ordinal number for repeatable fields. By default,  Optional argument is ordinal number for repeatable fields. By default,
480  it's assume to be first repeatable field (fields are perl array, so first  it's assume to be first repeatable field (fields are perl array, so first
481  element is 0).  element is 0).
482  Following example will read second value from repeatable field.  Following example will read second value from repeatable field.
483    
484   $webpac->fill_in($rec,'Title: v250^a',1);   my $text = $webpac->fill_in($rec,'Title: v250^a',1);
485    
486  This function B<does not> perform parsing of format to inteligenty skip  This function B<does not> perform parsing of format to inteligenty skip
487  delimiters before fields which aren't used.  delimiters before fields which aren't used.
488    
489    This method will automatically decode UTF-8 string to local code page
490    if needed.
491    
492  =cut  =cut
493    
494  sub fill_in {  sub fill_in {
495          my $self = shift;          my $self = shift;
496    
497          my $rec = shift || confess "need data record";          my $log = $self->_get_logger();
498          my $format = shift || confess "need format to parse";  
499            my $rec = shift || $log->logconfess("need data record");
500            my $format = shift || $log->logconfess("need format to parse");
501          # iteration (for repeatable fields)          # iteration (for repeatable fields)
502          my $i = shift || 0;          my $i = shift || 0;
503    
504          # FIXME remove for speedup?          # FIXME remove for speedup?
505          confess("need HASH as first argument!") if ($rec !~ /HASH/o);          $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
506    
507            if (utf8::is_utf8($format)) {
508                    $format = $self->_x($format);
509            }
510    
511          my $found = 0;          my $found = 0;
512    
# Line 347  sub fill_in { Line 515  sub fill_in {
515          $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);          $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
516    
517          # do actual replacement of placeholders          # do actual replacement of placeholders
518          $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;          $format =~ s/v(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
519    
520          if ($found) {          if ($found) {
521                    $log->debug("format: $format");
522                  if ($eval_code) {                  if ($eval_code) {
523                          my $eval = $self->fill_in($rec,$eval_code,$i);                          my $eval = $self->fill_in($rec,$eval_code,$i);
524                          return if (! eval $eval);                          return if (! $self->_eval($eval));
525                  }                  }
526                  # do we have lookups?                  # do we have lookups?
527                  if ($format =~ /\[[^\[\]]+\]/o) {                  if ($format =~ /$LOOKUP_REGEX/o) {
528                            $log->debug("format '$format' has lookup");
529                          return $self->lookup($format);                          return $self->lookup($format);
530                  } else {                  } else {
531                          return $format;                          return $format;
# Line 369  sub fill_in { Line 539  sub fill_in {
539    
540  Perform lookups on format supplied to it.  Perform lookups on format supplied to it.
541    
542   my $txt = $self->lookup('[v900]');   my $text = $self->lookup('[v900]');
543    
544  Lookups can be nested (like C<[d:[a:[v900]]]>).  Lookups can be nested (like C<[d:[a:[v900]]]>).
545    
# Line 378  Lookups can be nested (like C<[d:[a:[v90 Line 548  Lookups can be nested (like C<[d:[a:[v90
548  sub lookup {  sub lookup {
549          my $self = shift;          my $self = shift;
550    
551          my $tmp = shift || confess "need format";          my $log = $self->_get_logger();
552    
553          if ($tmp =~ /\[[^\[\]]+\]/o) {          my $tmp = shift || $log->logconfess("need format");
554    
555            if ($tmp =~ /$LOOKUP_REGEX/o) {
556                  my @in = ( $tmp );                  my @in = ( $tmp );
557  #print "##lookup $tmp\n";  
558                    $log->debug("lookup for: ",$tmp);
559    
560                  my @out;                  my @out;
561                  while (my $f = shift @in) {                  while (my $f = shift @in) {
562                          if ($f =~ /\[([^\[\]]+)\]/) {                          if ($f =~ /$LOOKUP_REGEX_SAVE/o) {
563                                  my $k = $1;                                  my $k = $1;
564                                  if ($self->{'lookup'}->{$k}) {                                  if ($self->{'lookup'}->{$k}) {
 #print "## lookup key = $k\n";  
565                                          foreach my $nv (@{$self->{'lookup'}->{$k}}) {                                          foreach my $nv (@{$self->{'lookup'}->{$k}}) {
566                                                  my $tmp2 = $f;                                                  my $tmp2 = $f;
567                                                  $tmp2 =~ s/\[$k\]/$nv/g;                                                  $tmp2 =~ s/lookup{$k}/$nv/g;
568                                                  push @in, $tmp2;                                                  push @in, $tmp2;
 #print "## lookup in => $tmp2\n";  
569                                          }                                          }
570                                  } else {                                  } else {
571                                          undef $f;                                          undef $f;
572                                  }                                  }
573                          } elsif ($f) {                          } elsif ($f) {
574                                  push @out, $f;                                  push @out, $f;
 #print "## lookup out => $f\n";  
575                          }                          }
576                  }                  }
577                    $log->logconfess("return is array and it's not expected!") unless wantarray;
578                  return @out;                  return @out;
579          } else {          } else {
580                  return $tmp;                  return $tmp;
# Line 415  Perform smart parsing of string, skippin Line 587  Perform smart parsing of string, skippin
587  defined. It can also eval code in format starting with C<eval{...}> and  defined. It can also eval code in format starting with C<eval{...}> and
588  return output or nothing depending on eval code.  return output or nothing depending on eval code.
589    
590   $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);   my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
591    
592  =cut  =cut
593    
# Line 426  sub parse { Line 598  sub parse {
598    
599          return if (! $format_utf8);          return if (! $format_utf8);
600    
601          confess("need HASH as first argument!") if ($rec !~ /HASH/o);          my $log = $self->_get_logger();
602          confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});  
603            $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
604            $log->logconfess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
605    
606          $i = 0 if (! $i);          $i = 0 if (! $i);
607    
608          my $format = $self->{'utf2cp'}->convert($format_utf8) || confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});          my $format = $self->_x($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
609    
610          my @out;          my @out;
611    
612            $log->debug("format: $format");
613    
614          my $eval_code;          my $eval_code;
615          # remove eval{...} from beginning          # remove eval{...} from beginning
616          $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);          $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
# Line 442  sub parse { Line 618  sub parse {
618          my $prefix;          my $prefix;
619          my $all_found=0;          my $all_found=0;
620    
621  #print "## $format\n";          while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))?//s) {
         while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {  
 #print "## [ $1 | $2 | $3 ] $format\n";  
622    
623                  my $del = $1 || '';                  my $del = $1 || '';
624                  $prefix ||= $del if ($all_found == 0);                  $prefix ||= $del if ($all_found == 0);
# Line 461  sub parse { Line 635  sub parse {
635    
636          return if (! $all_found);          return if (! $all_found);
637    
638          my $out = join('',@out) . $format;          my $out = join('',@out);
639    
640            if ($out) {
641                    # add rest of format (suffix)
642                    $out .= $format;
643    
644                    # add prefix if not there
645                    $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
646    
647                    $log->debug("result: $out");
648            }
649    
         # add prefix if not there  
         $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);  
           
650          if ($eval_code) {          if ($eval_code) {
651                  my $eval = $self->fill_in($rec,$eval_code,$i);                  my $eval = $self->fill_in($rec,$eval_code,$i);
652                  return if (! eval $eval);                  $log->debug("about to eval{",$eval,"} format: $out");
653                    return if (! $self->_eval($eval));
654          }          }
655    
656          return $out;          return $out;
657  }  }
658    
659  =head2 data_structure  =head2 parse_to_arr
660    
661  Create in-memory data structure which represents layout from C<import_xml>.  Similar to C<parse>, but returns array of all repeatable fields
 It is used later to produce output.  
662    
663   my $ds = $webpac->data_structure($rec);   my @arr = $webpac->parse_to_arr($rec,'v250^a');
664    
665  =cut  =cut
666    
667  # private method _sort_by_order  sub parse_to_arr {
 # sort subrouting using order="" attribute  
 sub _sort_by_order {  
668          my $self = shift;          my $self = shift;
669    
670          my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||          my ($rec, $format_utf8) = @_;
                 $self->{'import_xml'}->{'indexer'}->{$a};  
         my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||  
                 $self->{'import_xml'}->{'indexer'}->{$b};  
671    
672          return $va <=> $vb;          my $log = $self->_get_logger();
673    
674            $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
675            return if (! $format_utf8);
676    
677            my $i = 0;
678            my @arr;
679    
680            while (my $v = $self->parse($rec,$format_utf8,$i++)) {
681                    push @arr, $v;
682            }
683    
684            $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
685    
686            return @arr;
687    }
688    
689    =head2 fill_in_to_arr
690    
691    Similar to C<fill_in>, but returns array of all repeatable fields. Usable
692    for fields which have lookups, so they shouldn't be parsed but rather
693    C<fill_id>ed.
694    
695     my @arr = $webpac->fill_in_to_arr($rec,'[v900];;[v250^a]');
696    
697    =cut
698    
699    sub fill_in_to_arr {
700            my $self = shift;
701    
702            my ($rec, $format_utf8) = @_;
703    
704            my $log = $self->_get_logger();
705    
706            $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
707            return if (! $format_utf8);
708    
709            my $i = 0;
710            my @arr;
711    
712            while (my @v = $self->fill_in($rec,$format_utf8,$i++)) {
713                    push @arr, @v;
714            }
715    
716            $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
717    
718            return @arr;
719  }  }
720    
721    
722    =head2 data_structure
723    
724    Create in-memory data structure which represents layout from C<import_xml>.
725    It is used later to produce output.
726    
727     my @ds = $webpac->data_structure($rec);
728    
729    This method will also set C<$webpac->{'currnet_filename'}> if there is
730    <filename> tag in C<import_xml> and C<$webpac->{'headline'}> if there is
731    <headline> tag.
732    
733    =cut
734    
735  sub data_structure {  sub data_structure {
736          my $self = shift;          my $self = shift;
737    
738            my $log = $self->_get_logger();
739    
740          my $rec = shift;          my $rec = shift;
741          confess("need HASH as first argument!") if ($rec !~ /HASH/o);          $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
742    
743            undef $self->{'currnet_filename'};
744            undef $self->{'headline'};
745    
746          my @sorted_tags;          my @sorted_tags;
747          if ($self->{tags_by_order}) {          if ($self->{tags_by_order}) {
# Line 510  sub data_structure { Line 751  sub data_structure {
751                  $self->{tags_by_order} = \@sorted_tags;                  $self->{tags_by_order} = \@sorted_tags;
752          }          }
753    
754          my $ds;          my @ds;
755    
756            $log->debug("tags: ",sub { join(", ",@sorted_tags) });
757    
758          foreach my $field (@sorted_tags) {          foreach my $field (@sorted_tags) {
759    
760                  my $row;                  my $row;
                 my $i = 0;  
761    
762  #print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});  #print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
763    
764                  foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {                  foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
765                            my $format = $tag->{'value'} || $tag->{'content'};
766    
767                            $log->debug("format: $format");
768    
769                          my $v = $self->parse($rec,$tag->{'content'},$i);                          my @v;
770  print "## $i:",$tag->{'content'}," = ",($v || 'null'),"\n";                          if ($format =~ /$LOOKUP_REGEX/o) {
771                                    @v = $self->fill_in_to_arr($rec,$format);
772                            } else {
773                                    @v = $self->parse_to_arr($rec,$format);
774                            }
775                            next if (! @v);
776    
777                          next if (!$v || $v && $v eq '');                          # use format?
778                            if ($tag->{'format_name'}) {
779                                    @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v;
780                            }
781    
782                            if ($field eq 'filename') {
783                                    $self->{'current_filename'} = join('',@v);
784                                    $log->debug("filename: ",$self->{'current_filename'});
785                            } elsif ($field eq 'headline') {
786                                    $self->{'headline'} .= join('',@v);
787                                    $log->debug("headline: ",$self->{'headline'});
788                                    next; # don't return headline in data_structure!
789                            }
790    
791                          # does tag have type?                          # does tag have type?
792                          if ($tag->{'type'}) {                          if ($tag->{'type'}) {
793                                  push @{$row->{$tag->{'type'}}}, $v;                                  push @{$row->{$tag->{'type'}}}, @v;
794                          } else {                          } else {
795                                  push @{$row->{'display'}}, $v;                                  push @{$row->{'display'}}, @v;
796                                  push @{$row->{'swish'}}, $v;                                  push @{$row->{'swish'}}, @v;
797                          }                          }
798    
799    
800                    }
801    
802                    if ($row) {
803                            $row->{'tag'} = $field;
804    
805                            # TODO: name_sigular, name_plural
806                            my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'};
807                            $row->{'name'} = $name ? $self->_x($name) : $field;
808    
809                            push @ds, $row;
810    
811                            $log->debug("row $field: ",sub { Dumper($row) });
812                  }                  }
813    
814                  push @{$ds->{$field}}, $row if ($row);          }
815    
816            return @ds;
817    
818    }
819    
820    =head2 output
821    
822    Create output from in-memory data structure using Template Toolkit template.
823    
824    my $text = $webpac->output( template => 'text.tt', data => @ds );
825    
826    =cut
827    
828    sub output {
829            my $self = shift;
830    
831            my $args = {@_};
832    
833            my $log = $self->_get_logger();
834    
835            $log->logconfess("need template name") if (! $args->{'template'});
836            $log->logconfess("need data array") if (! $args->{'data'});
837    
838            my $out;
839    
840            $self->{'tt'}->process(
841                    $args->{'template'},
842                    $args,
843                    \$out
844            ) || confess $self->{'tt'}->error();
845    
846            return $out;
847    }
848    
849    =head2 output_file
850    
851    Create output from in-memory data structure using Template Toolkit template
852    to a file.
853    
854     $webpac->output_file(
855            file => 'out.txt',
856            template => 'text.tt',
857            data => @ds
858     );
859    
860    =cut
861    
862    sub output_file {
863            my $self = shift;
864    
865            my $args = {@_};
866    
867            my $log = $self->_get_logger();
868    
869            my $file = $args->{'file'} || $log->logconfess("need file name");
870    
871            $log->debug("creating file ",$file);
872    
873            open(my $fh, ">", $file) || $log->logdie("can't open output file '$file': $!");
874            print $fh $self->output(
875                    template => $args->{'template'},
876                    data => $args->{'data'},
877            ) || $log->logdie("print: $!");
878            close($fh) || $log->logdie("close: $!");
879    }
880    
881    =head2 apply_format
882    
883    Apply format specified in tag with C<format_name="name"> and
884    C<format_delimiter=";;">.
885    
886     my $text = $webpac->apply_format($format_name,$format_delimiter,$data);
887    
888    Formats can contain C<lookup{...}> if you need them.
889    
890    =cut
891    
892    sub apply_format {
893            my $self = shift;
894    
895            my ($name,$delimiter,$data) = @_;
896    
897            my $log = $self->_get_logger();
898    
899            if (! $self->{'import_xml'}->{'format'}->{$name}) {
900                    $log->warn("<format name=\"$name\"> is not defined in ",$self->{'import_xml_file'});
901                    return $data;
902            }
903    
904            $log->warn("no delimiter for format $name") if (! $delimiter);
905    
906            my $format = $self->_x($self->{'import_xml'}->{'format'}->{$name}->{'content'}) || $log->logdie("can't find format '$name'");
907    
908            my @data = split(/\Q$delimiter\E/, $data);
909    
910            my $out = sprintf($format, @data);
911            $log->debug("using format $name [$format] on $data to produce: $out");
912    
913            if ($out =~ m/$LOOKUP_REGEX/o) {
914                    return $self->lookup($out);
915            } else {
916                    return $out;
917            }
918    
919    }
920    
921    
922    #
923    #
924    #
925    
926    =head1 INTERNAL METHODS
927    
928    Here is a quick list of internal methods, mostly useful to turn debugging
929    on them (see L<LOGGING> below for explanation).
930    
931    =cut
932    
933    =head2 _eval
934    
935    Internal function to eval code without C<strict 'subs'>.
936    
937    =cut
938    
939    sub _eval {
940            my $self = shift;
941    
942            my $code = shift || return;
943    
944            my $log = $self->_get_logger();
945    
946            no strict 'subs';
947            my $ret = eval $code;
948            if ($@) {
949                    $log->error("problem with eval code [$code]: $@");
950          }          }
951    
952          print Dumper($ds);          $log->debug("eval: ",$code," [",$ret,"]");
953    
954            return $ret || 0;
955    }
956    
957    =head2 _sort_by_order
958    
959    Sort xml tags data structure accoding to C<order=""> attribute.
960    
961    =cut
962    
963    sub _sort_by_order {
964            my $self = shift;
965    
966            my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
967                    $self->{'import_xml'}->{'indexer'}->{$a};
968            my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
969                    $self->{'import_xml'}->{'indexer'}->{$b};
970    
971            return $va <=> $vb;
972    }
973    
974    =head2 _get_logger
975    
976    Get C<Log::Log4perl> object with a twist: domains are defined for each
977    method
978    
979     my $log = $webpac->_get_logger();
980    
981    =cut
982    
983    sub _get_logger {
984            my $self = shift;
985    
986            my $name = (caller(1))[3] || caller;
987            return get_logger($name);
988  }  }
989    
990    =head2 _x
991    
992    Convert string from UTF-8 to code page defined in C<import_xml>.
993    
994     my $text = $webpac->_x('utf8 text');
995    
996    =cut
997    
998    sub _x {
999            my $self = shift;
1000            my $utf8 = shift || return;
1001    
1002            return $self->{'utf2cp'}->convert($utf8) ||
1003                    $self->_get_logger()->logwarn("can't convert '$utf8'");
1004    }
1005    
1006    #
1007    #
1008    #
1009    
1010    =head1 LOGGING
1011    
1012    Logging in WebPAC is performed by L<Log::Log4perl> with config file
1013    C<log.conf>.
1014    
1015    Methods defined above have different levels of logging, so
1016    it's descriptions will be useful to turn (mostry B<debug> logging) on
1017    or off to see why WabPAC isn't perforing as you expect it (it might even
1018    be a bug!).
1019    
1020    B<This is different from normal Log4perl behaviour>. To repeat, you can
1021    also use method names, and not only classes (which are just few)
1022    to filter logging.
1023    
1024    =cut
1025    
1026  1;  1;

Legend:
Removed from v.366  
changed lines
  Added in v.421

  ViewVC Help
Powered by ViewVC 1.1.26