/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 366 by dpavlin, Thu Jun 17 01:44:25 2004 UTC revision 389 by dpavlin, Tue Jul 20 17:15:48 2004 UTC
# Line 1  Line 1 
1  package WebPAC;  package WebPAC;
2    
3    use warnings;
4    use strict;
5    
6  use Carp;  use Carp;
7  use Text::Iconv;  use Text::Iconv;
8  use Config::IniFiles;  use Config::IniFiles;
9  use XML::Simple;  use XML::Simple;
10    use Template;
11    use Log::Log4perl qw(get_logger :levels);
12    
13  use Data::Dumper;  use Data::Dumper;
14    
15    #my $LOOKUP_REGEX = '\[[^\[\]]+\]';
16    #my $LOOKUP_REGEX_SAVE = '\[([^\[\]]+)\]';
17    my $LOOKUP_REGEX = 'lookup{[^\{\}]+}';
18    my $LOOKUP_REGEX_SAVE = 'lookup{([^\{\}]+)}';
19    
20  =head1 NAME  =head1 NAME
21    
22  WebPAC - base class for WebPAC  WebPAC - base class for WebPAC
# Line 49  sub new { Line 59  sub new {
59          my $self = {@_};          my $self = {@_};
60          bless($self, $class);          bless($self, $class);
61    
62            my $log_file = $self->{'log'} || "log.conf";
63            Log::Log4perl->init($log_file);
64    
65            my $log = $self->_get_logger();
66    
67          # fill in default values          # fill in default values
68          # output codepage          # output codepage
69          $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});          $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
# Line 56  sub new { Line 71  sub new {
71          #          #
72          # read global.conf          # read global.conf
73          #          #
74            $log->debug("read 'global.conf'");
75    
76          $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";          my $config = new Config::IniFiles( -file => 'global.conf' ) || $log->logcroak("can't open 'global.conf'");
77    
78          # read global config parametars          # read global config parametars
79          foreach my $var (qw(          foreach my $var (qw(
# Line 67  sub new { Line 83  sub new {
83                          dbi_passwd                          dbi_passwd
84                          show_progress                          show_progress
85                          my_unac_filter                          my_unac_filter
86                            output_template
87                  )) {                  )) {
88                  $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);                  $self->{'global_config'}->{$var} = $config->val('global', $var);
89          }          }
90    
91          #          #
92          # read indexer config file          # read indexer config file
93          #          #
94    
95          $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";          $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || $log->logcroak("can't open '",$self->{config_file},"'");
96    
97            # create UTF-8 convertor for import_xml files
98          $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});          $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
99    
100            # create Template toolkit instance
101            $self->{'tt'} = Template->new(
102                    INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} || './output_template'),
103    #               FILTERS => {
104    #                       'foo' => \&foo_filter,
105    #               },
106                    EVAL_PERL => 1,
107            );
108    
109          return $self;          return $self;
110  }  }
111    
# Line 97  By default, ISIS code page is assumed to Line 125  By default, ISIS code page is assumed to
125  If optional parametar C<limit_mfn> is set, it will read just 500 records  If optional parametar C<limit_mfn> is set, it will read just 500 records
126  from database in example above.  from database in example above.
127    
 Returns number of last record read into memory (size of database, really).  
   
128  C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and  C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
129  C<val>. Optional parametar C<eval> is perl code to evaluate before storing  C<val>. Optional parametar C<eval> is perl code to evaluate before storing
130  value in index.  value in index.
# Line 110  value in index. Line 136  value in index.
136      'val' => 'v900' },      'val' => 'v900' },
137   ]   ]
138    
139    Returns number of last record read into memory (size of database, really).
140    
141  =cut  =cut
142    
143  sub open_isis {  sub open_isis {
144          my $self = shift;          my $self = shift;
145          my $arg = {@_};          my $arg = {@_};
146    
147          croak "need filename" if (! $arg->{'filename'});          my $log = $self->_get_logger();
148    
149            $log->logcroak("need filename") if (! $arg->{'filename'});
150          my $code_page = $arg->{'code_page'} || '852';          my $code_page = $arg->{'code_page'} || '852';
151    
152          use OpenIsis;          use OpenIsis;
# Line 126  sub open_isis { Line 156  sub open_isis {
156          # create Text::Iconv object          # create Text::Iconv object
157          my $cp = Text::Iconv->new($code_page,$self->{'code_page'});          my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
158    
159          print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'});          $log->info("reading ISIS database '",$arg->{'filename'},"'");
160    
161          my $isis_db = OpenIsis::open($arg->{'filename'});          my $isis_db = OpenIsis::open($arg->{'filename'});
162    
# Line 134  sub open_isis { Line 164  sub open_isis {
164    
165          $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});          $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
166    
167          print STDERR "processing $maxmfn records...\n" if ($self->{'debug'});          $log->info("processing $maxmfn records...");
168    
169          # read database          # read database
170          for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {          for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
# Line 158  sub open_isis { Line 188  sub open_isis {
188    
189                                          push @{$self->{'data'}->{$mfn}->{$k}}, $val;                                          push @{$self->{'data'}->{$mfn}->{$k}}, $val;
190                                  }                                  }
191                            } else {
192                                    push @{$self->{'data'}->{$mfn}->{'000'}}, $mfn;
193                          }                          }
194    
195                  }                  }
# Line 166  sub open_isis { Line 198  sub open_isis {
198                  my $rec = $self->{'data'}->{$mfn};                  my $rec = $self->{'data'}->{$mfn};
199                  $self->create_lookup($rec, @{$arg->{'lookup'}});                  $self->create_lookup($rec, @{$arg->{'lookup'}});
200    
201                    $self->progress_bar($mfn,$maxmfn);
202    
203          }          }
204    
205          $self->{'current_mfn'} = 1;          $self->{'current_mfn'} = 1;
206            $self->{'last_pcnt'} = 0;
207    
208          # store max mfn and return it.          # store max mfn and return it.
209          return $self->{'max_mfn'} = $maxmfn;          return $self->{'max_mfn'} = $maxmfn;
# Line 186  it's implemented, that is). Line 221  it's implemented, that is).
221  sub fetch_rec {  sub fetch_rec {
222          my $self = shift;          my $self = shift;
223    
224          my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!";          my $log = $self->_get_logger();
225    
226            my $mfn = $self->{'current_mfn'}++ || $log->logconfess("it seems that you didn't load database!");
227    
228          if ($mfn > $self->{'max_mfn'}) {          if ($mfn > $self->{'max_mfn'}) {
229                  $self->{'current_mfn'} = $self->{'max_mfn'};                  $self->{'current_mfn'} = $self->{'max_mfn'};
230                    $log->debug("at EOF");
231                  return;                  return;
232          }          }
233    
234            $self->progress_bar($mfn,$self->{'max_mfn'});
235    
236          return $self->{'data'}->{$mfn};          return $self->{'data'}->{$mfn};
237  }  }
238    
239    =head2 progress_bar
240    
241    Draw progress bar on STDERR.
242    
243     $webpac->progress_bar($current, $max);
244    
245    =cut
246    
247    sub progress_bar {
248            my $self = shift;
249    
250            my ($curr,$max) = @_;
251    
252            my $log = $self->_get_logger();
253    
254            $log->logconfess("no current value!") if (! $curr);
255            $log->logconfess("no maximum value!") if (! $max);
256    
257            if ($curr > $max) {
258                    $max = $curr;
259                    $log->debug("overflow to $curr");
260            }
261    
262            $self->{'last_pcnt'} ||= 1;
263    
264            $self->{'last_pcnt'} = $curr if ($curr < $self->{'last_pcnt'});
265    
266            my $p = int($curr * 100 / $max);
267            if ($p != $self->{'last_pcnt'}) {
268                    printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$curr,$max,"=" x ($p/2).">", $p );
269                    $self->{'last_pcnt'} = $p;
270            }
271    }
272    
273  =head2 open_import_xml  =head2 open_import_xml
274    
275  Read file from C<import_xml/> directory and parse it.  Read file from C<import_xml/> directory and parse it.
# Line 207  Read file from C<import_xml/> directory Line 281  Read file from C<import_xml/> directory
281  sub open_import_xml {  sub open_import_xml {
282          my $self = shift;          my $self = shift;
283    
284            my $log = $self->_get_logger();
285    
286          my $arg = {@_};          my $arg = {@_};
287          confess "need type to load file from import_xml/" if (! $arg->{'type'});          $log->logconfess("need type to load file from import_xml/") if (! $arg->{'type'});
288    
289          $self->{'type'} = $arg->{'type'};          $self->{'type'} = $arg->{'type'};
290    
# Line 217  sub open_import_xml { Line 293  sub open_import_xml {
293    
294          $self->{'tag'} = $type2tag{$type_base};          $self->{'tag'} = $type2tag{$type_base};
295    
296          print STDERR "using type ",$self->{'type'}," tag ",$self->{'tag'},"\n" if ($self->{'debug'});          $log->info("using type '",$self->{'type'},"' tag <",$self->{'tag'},">");
297    
298          my $f = "./import_xml/".$self->{'type'}.".xml";          my $f = "./import_xml/".$self->{'type'}.".xml";
299          confess "import_xml file '$f' doesn't exist!" if (! -e "$f");          $log->logconfess("import_xml file '$f' doesn't exist!") if (! -e "$f");
300    
301            $log->info("reading '$f'");
302    
303          print STDERR "reading '$f'\n" if ($self->{'debug'});          $self->{'import_xml_file'} = $f;
304    
305          $self->{'import_xml'} = XMLin($f,          $self->{'import_xml'} = XMLin($f,
306                  ForceArray => [ $self->{'tag'}, 'config', 'format' ],                  ForceArray => [ $self->{'tag'}, 'config', 'format' ],
                 ForceContent => 1  
307          );          );
308    
309          print Dumper($self->{'import_xml'});          $log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) });
310    
311  }  }
312    
# Line 237  sub open_import_xml { Line 314  sub open_import_xml {
314    
315  Create lookup from record using lookup definition.  Create lookup from record using lookup definition.
316    
317     $self->create_lookup($rec, @lookups);
318    
319    Called internally by C<open_*> methods.
320    
321  =cut  =cut
322    
323  sub create_lookup {  sub create_lookup {
324          my $self = shift;          my $self = shift;
325    
326          my $rec = shift || confess "need record to create lookup";          my $log = $self->_get_logger();
327          confess("need HASH as first argument!") if ($rec !~ /HASH/o);  
328            my $rec = shift || $log->logconfess("need record to create lookup");
329            $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
330    
331          foreach my $i (@_) {          foreach my $i (@_) {
332                  if ($i->{'eval'}) {                  if ($i->{'eval'}) {
# Line 251  sub create_lookup { Line 334  sub create_lookup {
334                          my $key = $self->fill_in($rec,$i->{'key'});                          my $key = $self->fill_in($rec,$i->{'key'});
335                          my @val = $self->fill_in($rec,$i->{'val'});                          my @val = $self->fill_in($rec,$i->{'val'});
336                          if ($key && @val && eval $eval) {                          if ($key && @val && eval $eval) {
337                                    $log->debug("stored $key = ",sub { join(" | ",@val) });
338                                  push @{$self->{'lookup'}->{$key}}, @val;                                  push @{$self->{'lookup'}->{$key}}, @val;
339                          }                          }
340                  } else {                  } else {
341                          my $key = $self->fill_in($rec,$i->{'key'});                          my $key = $self->fill_in($rec,$i->{'key'});
342                          my @val = $self->fill_in($rec,$i->{'val'});                          my @val = $self->fill_in($rec,$i->{'val'});
343                          if ($key && @val) {                          if ($key && @val) {
344                                    $log->debug("stored $key = ",sub { join(" | ",@val) });
345                                  push @{$self->{'lookup'}->{$key}}, @val;                                  push @{$self->{'lookup'}->{$key}}, @val;
346                          }                          }
347                  }                  }
# Line 267  sub create_lookup { Line 352  sub create_lookup {
352    
353  Returns value from record.  Returns value from record.
354    
355   $self->get_data(\$rec,$f,$sf,$i,\$found);   my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);
356    
357  Arguments are:  Arguments are:
358  record reference C<$rec>,  record reference C<$rec>,
# Line 275  field C<$f>, Line 360  field C<$f>,
360  optional subfiled C<$sf>,  optional subfiled C<$sf>,
361  index for repeatable values C<$i>.  index for repeatable values C<$i>.
362    
363  Optinal variable C<$found> will be incremeted if thre  Optinal variable C<$found> will be incremeted if there
364  is field.  is field.
365    
366  Returns value or empty string.  Returns value or empty string.
# Line 286  sub get_data { Line 371  sub get_data {
371          my $self = shift;          my $self = shift;
372    
373          my ($rec,$f,$sf,$i,$found) = @_;          my ($rec,$f,$sf,$i,$found) = @_;
374    
375          if ($$rec->{$f}) {          if ($$rec->{$f}) {
376                    return '' if (! $$rec->{$f}->[$i]);
377                    no strict 'refs';
378                  if ($sf && $$rec->{$f}->[$i]->{$sf}) {                  if ($sf && $$rec->{$f}->[$i]->{$sf}) {
379                          $$found++ if (defined($$found));                          $$found++ if (defined($$found));
380                          return $$rec->{$f}->[$i]->{$sf};                          return $$rec->{$f}->[$i]->{$sf};
# Line 315  Workhourse of all: takes record from in- Line 403  Workhourse of all: takes record from in-
403  strings with placeholders and returns string or array of with substituted  strings with placeholders and returns string or array of with substituted
404  values from record.  values from record.
405    
406   $webpac->fill_in($rec,'v250^a');   my $text = $webpac->fill_in($rec,'v250^a');
407    
408  Optional argument is ordinal number for repeatable fields. By default,  Optional argument is ordinal number for repeatable fields. By default,
409  it's assume to be first repeatable field (fields are perl array, so first  it's assume to be first repeatable field (fields are perl array, so first
410  element is 0).  element is 0).
411  Following example will read second value from repeatable field.  Following example will read second value from repeatable field.
412    
413   $webpac->fill_in($rec,'Title: v250^a',1);   my $text = $webpac->fill_in($rec,'Title: v250^a',1);
414    
415  This function B<does not> perform parsing of format to inteligenty skip  This function B<does not> perform parsing of format to inteligenty skip
416  delimiters before fields which aren't used.  delimiters before fields which aren't used.
417    
418    This method will automatically decode UTF-8 string to local code page
419    if needed.
420    
421  =cut  =cut
422    
423  sub fill_in {  sub fill_in {
424          my $self = shift;          my $self = shift;
425    
426          my $rec = shift || confess "need data record";          my $log = $self->_get_logger();
427          my $format = shift || confess "need format to parse";  
428            my $rec = shift || $log->logconfess("need data record");
429            my $format = shift || $log->logconfess("need format to parse");
430          # iteration (for repeatable fields)          # iteration (for repeatable fields)
431          my $i = shift || 0;          my $i = shift || 0;
432    
433          # FIXME remove for speedup?          # FIXME remove for speedup?
434          confess("need HASH as first argument!") if ($rec !~ /HASH/o);          $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
435    
436            if (utf8::is_utf8($format)) {
437                    $format = $self->_x($format);
438            }
439    
440          my $found = 0;          my $found = 0;
441    
# Line 347  sub fill_in { Line 444  sub fill_in {
444          $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);          $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
445    
446          # do actual replacement of placeholders          # do actual replacement of placeholders
447          $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;          $format =~ s/v(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
448    
449          if ($found) {          if ($found) {
450                    $log->debug("format: $format");
451                  if ($eval_code) {                  if ($eval_code) {
452                          my $eval = $self->fill_in($rec,$eval_code,$i);                          my $eval = $self->fill_in($rec,$eval_code,$i);
453                          return if (! eval $eval);                          return if (! $self->_eval($eval));
454                  }                  }
455                  # do we have lookups?                  # do we have lookups?
456                  if ($format =~ /\[[^\[\]]+\]/o) {                  if ($format =~ /$LOOKUP_REGEX/o) {
457                            $log->debug("format '$format' has lookup");
458                          return $self->lookup($format);                          return $self->lookup($format);
459                  } else {                  } else {
460                          return $format;                          return $format;
# Line 369  sub fill_in { Line 468  sub fill_in {
468    
469  Perform lookups on format supplied to it.  Perform lookups on format supplied to it.
470    
471   my $txt = $self->lookup('[v900]');   my $text = $self->lookup('[v900]');
472    
473  Lookups can be nested (like C<[d:[a:[v900]]]>).  Lookups can be nested (like C<[d:[a:[v900]]]>).
474    
# Line 378  Lookups can be nested (like C<[d:[a:[v90 Line 477  Lookups can be nested (like C<[d:[a:[v90
477  sub lookup {  sub lookup {
478          my $self = shift;          my $self = shift;
479    
480          my $tmp = shift || confess "need format";          my $log = $self->_get_logger();
481    
482            my $tmp = shift || $log->logconfess("need format");
483    
484          if ($tmp =~ /\[[^\[\]]+\]/o) {          if ($tmp =~ /$LOOKUP_REGEX/o) {
485                  my @in = ( $tmp );                  my @in = ( $tmp );
486  #print "##lookup $tmp\n";  
487                    $log->debug("lookup for: ",$tmp);
488    
489                  my @out;                  my @out;
490                  while (my $f = shift @in) {                  while (my $f = shift @in) {
491                          if ($f =~ /\[([^\[\]]+)\]/) {                          if ($f =~ /$LOOKUP_REGEX_SAVE/o) {
492                                  my $k = $1;                                  my $k = $1;
493                                  if ($self->{'lookup'}->{$k}) {                                  if ($self->{'lookup'}->{$k}) {
 #print "## lookup key = $k\n";  
494                                          foreach my $nv (@{$self->{'lookup'}->{$k}}) {                                          foreach my $nv (@{$self->{'lookup'}->{$k}}) {
495                                                  my $tmp2 = $f;                                                  my $tmp2 = $f;
496                                                  $tmp2 =~ s/\[$k\]/$nv/g;                                                  $tmp2 =~ s/lookup{$k}/$nv/g;
497                                                  push @in, $tmp2;                                                  push @in, $tmp2;
 #print "## lookup in => $tmp2\n";  
498                                          }                                          }
499                                  } else {                                  } else {
500                                          undef $f;                                          undef $f;
501                                  }                                  }
502                          } elsif ($f) {                          } elsif ($f) {
503                                  push @out, $f;                                  push @out, $f;
 #print "## lookup out => $f\n";  
504                          }                          }
505                  }                  }
506                    $log->logconfess("return is array and it's not expected!") unless wantarray;
507                  return @out;                  return @out;
508          } else {          } else {
509                  return $tmp;                  return $tmp;
# Line 415  Perform smart parsing of string, skippin Line 516  Perform smart parsing of string, skippin
516  defined. It can also eval code in format starting with C<eval{...}> and  defined. It can also eval code in format starting with C<eval{...}> and
517  return output or nothing depending on eval code.  return output or nothing depending on eval code.
518    
519   $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);   my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
520    
521  =cut  =cut
522    
# Line 426  sub parse { Line 527  sub parse {
527    
528          return if (! $format_utf8);          return if (! $format_utf8);
529    
530          confess("need HASH as first argument!") if ($rec !~ /HASH/o);          my $log = $self->_get_logger();
531          confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});  
532            $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
533            $log->logconfess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
534    
535          $i = 0 if (! $i);          $i = 0 if (! $i);
536    
537          my $format = $self->{'utf2cp'}->convert($format_utf8) || confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});          my $format = $self->_x($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
538    
539          my @out;          my @out;
540    
541            $log->debug("format: $format");
542    
543          my $eval_code;          my $eval_code;
544          # remove eval{...} from beginning          # remove eval{...} from beginning
545          $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);          $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
# Line 442  sub parse { Line 547  sub parse {
547          my $prefix;          my $prefix;
548          my $all_found=0;          my $all_found=0;
549    
550  #print "## $format\n";          while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))?//s) {
         while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {  
 #print "## [ $1 | $2 | $3 ] $format\n";  
551    
552                  my $del = $1 || '';                  my $del = $1 || '';
553                  $prefix ||= $del if ($all_found == 0);                  $prefix ||= $del if ($all_found == 0);
# Line 461  sub parse { Line 564  sub parse {
564    
565          return if (! $all_found);          return if (! $all_found);
566    
567          my $out = join('',@out) . $format;          my $out = join('',@out);
568    
569            if ($out) {
570                    # add rest of format (suffix)
571                    $out .= $format;
572    
573                    # add prefix if not there
574                    $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
575    
576                    $log->debug("result: $out");
577            }
578    
         # add prefix if not there  
         $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);  
           
579          if ($eval_code) {          if ($eval_code) {
580                  my $eval = $self->fill_in($rec,$eval_code,$i);                  my $eval = $self->fill_in($rec,$eval_code,$i);
581                  return if (! eval $eval);                  $log->debug("about to eval{",$eval,"} format: $out");
582                    return if (! $self->_eval($eval));
583          }          }
584    
585          return $out;          return $out;
586  }  }
587    
588  =head2 data_structure  =head2 parse_to_arr
589    
590  Create in-memory data structure which represents layout from C<import_xml>.  Similar to C<parse>, but returns array of all repeatable fields
 It is used later to produce output.  
591    
592   my $ds = $webpac->data_structure($rec);   my @arr = $webpac->parse_to_arr($rec,'v250^a');
593    
594  =cut  =cut
595    
596  # private method _sort_by_order  sub parse_to_arr {
 # sort subrouting using order="" attribute  
 sub _sort_by_order {  
597          my $self = shift;          my $self = shift;
598    
599          my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||          my ($rec, $format_utf8) = @_;
                 $self->{'import_xml'}->{'indexer'}->{$a};  
         my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||  
                 $self->{'import_xml'}->{'indexer'}->{$b};  
600    
601          return $va <=> $vb;          my $log = $self->_get_logger();
602    
603            $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
604            return if (! $format_utf8);
605    
606            my $i = 0;
607            my @arr;
608    
609            while (my $v = $self->parse($rec,$format_utf8,$i++)) {
610                    push @arr, $v;
611            }
612    
613            $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
614    
615            return @arr;
616  }  }
617    
618    =head2 fill_in_to_arr
619    
620    Similar to C<fill_in>, but returns array of all repeatable fields. Usable
621    for fields which have lookups, so they shouldn't be parsed but rather
622    C<fill_id>ed.
623    
624     my @arr = $webpac->fill_in_to_arr($rec,'[v900];;[v250^a]');
625    
626    =cut
627    
628    sub fill_in_to_arr {
629            my $self = shift;
630    
631            my ($rec, $format_utf8) = @_;
632    
633            my $log = $self->_get_logger();
634    
635            $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
636            return if (! $format_utf8);
637    
638            my $i = 0;
639            my @arr;
640    
641            while (my @v = $self->fill_in($rec,$format_utf8,$i++)) {
642                    push @arr, @v;
643            }
644    
645            $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
646    
647            return @arr;
648    }
649    
650    
651    =head2 data_structure
652    
653    Create in-memory data structure which represents layout from C<import_xml>.
654    It is used later to produce output.
655    
656     my @ds = $webpac->data_structure($rec);
657    
658    This method will also set C<$webpac->{'currnet_filename'}> if there is
659    <filename> tag in C<import_xml>.
660    
661    =cut
662    
663  sub data_structure {  sub data_structure {
664          my $self = shift;          my $self = shift;
665    
666            my $log = $self->_get_logger();
667    
668          my $rec = shift;          my $rec = shift;
669          confess("need HASH as first argument!") if ($rec !~ /HASH/o);          $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
670    
671            undef $self->{'currnet_filename'};
672    
673          my @sorted_tags;          my @sorted_tags;
674          if ($self->{tags_by_order}) {          if ($self->{tags_by_order}) {
# Line 510  sub data_structure { Line 678  sub data_structure {
678                  $self->{tags_by_order} = \@sorted_tags;                  $self->{tags_by_order} = \@sorted_tags;
679          }          }
680    
681          my $ds;          my @ds;
682    
683            $log->debug("tags: ",sub { join(", ",@sorted_tags) });
684    
685          foreach my $field (@sorted_tags) {          foreach my $field (@sorted_tags) {
686    
687                  my $row;                  my $row;
                 my $i = 0;  
688    
689  #print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});  #print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
690    
691                  foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {                  foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
692                            my $format = $tag->{'value'} || $tag->{'content'};
693    
694                            $log->debug("format: $format");
695    
696                          my $v = $self->parse($rec,$tag->{'content'},$i);                          my @v;
697  print "## $i:",$tag->{'content'}," = ",($v || 'null'),"\n";                          if ($format =~ /$LOOKUP_REGEX/o) {
698                                    @v = $self->fill_in_to_arr($rec,$format);
699                            } else {
700                                    @v = $self->parse_to_arr($rec,$format);
701                            }
702                            next if (! @v);
703    
704                          next if (!$v || $v && $v eq '');                          # use format?
705                            if ($tag->{'format_name'}) {
706                                    @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v;
707                            }
708    
709                          # does tag have type?                          # does tag have type?
710                          if ($tag->{'type'}) {                          if ($tag->{'type'}) {
711                                  push @{$row->{$tag->{'type'}}}, $v;                                  push @{$row->{$tag->{'type'}}}, @v;
712                          } else {                          } else {
713                                  push @{$row->{'display'}}, $v;                                  push @{$row->{'display'}}, @v;
714                                  push @{$row->{'swish'}}, $v;                                  push @{$row->{'swish'}}, @v;
715                          }                          }
716    
717                            if ($field eq 'filename') {
718                                    $self->{'current_filename'} = join('',@v);
719                                    $log->debug("filename: ",$self->{'current_filename'});
720                            }
721    
722                  }                  }
723    
724                  push @{$ds->{$field}}, $row if ($row);                  if ($row) {
725                            $row->{'tag'} = $field;
726    
727                            # TODO: name_sigular, name_plural
728                            my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'};
729                            $row->{'name'} = $name ? $self->_x($name) : $field;
730    
731                            push @ds, $row;
732    
733                            $log->debug("row $field: ",sub { Dumper($row) });
734                    }
735    
736            }
737    
738            return @ds;
739    
740    }
741    
742    =head2 output
743    
744    Create output from in-memory data structure using Template Toolkit template.
745    
746    my $text = $webpac->output( template => 'text.tt', data => @ds );
747    
748    =cut
749    
750    sub output {
751            my $self = shift;
752    
753            my $args = {@_};
754    
755            my $log = $self->_get_logger();
756    
757            $log->logconfess("need template name") if (! $args->{'template'});
758            $log->logconfess("need data array") if (! $args->{'data'});
759    
760            my $out;
761    
762            $self->{'tt'}->process(
763                    $args->{'template'},
764                    $args,
765                    \$out
766            ) || confess $self->{'tt'}->error();
767    
768            return $out;
769    }
770    
771    =head2 apply_format
772    
773    Apply format specified in tag with C<format_name="name"> and
774    C<format_delimiter=";;">.
775    
776     my $text = $webpac->apply_format($format_name,$format_delimiter,$data);
777    
778    Formats can contain C<lookup{...}> if you need them.
779    
780    =cut
781    
782    sub apply_format {
783            my $self = shift;
784    
785            my ($name,$delimiter,$data) = @_;
786    
787            my $log = $self->_get_logger();
788    
789            if (! $self->{'import_xml'}->{'format'}->{$name}) {
790                    $log->warn("<format name=\"$name\"> is not defined in ",$self->{'import_xml_file'});
791                    return $data;
792            }
793    
794            $log->warn("no delimiter for format $name") if (! $delimiter);
795    
796            my $format = $self->_x($self->{'import_xml'}->{'format'}->{$name}->{'content'}) || $log->logdie("can't find format '$name'");
797    
798            my @data = split(/\Q$delimiter\E/, $data);
799    
800            my $out = sprintf($format, @data);
801            $log->debug("using format $name [$format] on $data to produce: $out");
802    
803            if ($out =~ m/$LOOKUP_REGEX/o) {
804                    return $self->lookup($out);
805            } else {
806                    return $out;
807            }
808    
809    }
810    
811    
812    #
813    #
814    #
815    
816    =head1 INTERNAL METHODS
817    
818    Here is a quick list of internal methods, mostly useful to turn debugging
819    on them (see L<LOGGING> below for explanation).
820    
821    =cut
822    
823    =head2 _eval
824    
825    Internal function to eval code without C<strict 'subs'>.
826    
827    =cut
828    
829    sub _eval {
830            my $self = shift;
831    
832            my $code = shift || return;
833    
834            my $log = $self->_get_logger();
835    
836            no strict 'subs';
837            my $ret = eval $code;
838            if ($@) {
839                    $log->error("problem with eval code [$code]: $@");
840          }          }
841    
842          print Dumper($ds);          $log->debug("eval: ",$code," [",$ret,"]");
843    
844            return $ret || 0;
845    }
846    
847    =head2 _sort_by_order
848    
849    Sort xml tags data structure accoding to C<order=""> attribute.
850    
851    =cut
852    
853    sub _sort_by_order {
854            my $self = shift;
855    
856            my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
857                    $self->{'import_xml'}->{'indexer'}->{$a};
858            my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
859                    $self->{'import_xml'}->{'indexer'}->{$b};
860    
861            return $va <=> $vb;
862  }  }
863    
864    =head2 _get_logger
865    
866    Get C<Log::Log4perl> object with a twist: domains are defined for each
867    method
868    
869     my $log = $webpac->_get_logger();
870    
871    =cut
872    
873    sub _get_logger {
874            my $self = shift;
875    
876            my $name = (caller(1))[3] || caller;
877            return get_logger($name);
878    }
879    
880    =head2 _x
881    
882    Convert string from UTF-8 to code page defined in C<import_xml>.
883    
884     my $text = $webpac->_x('utf8 text');
885    
886    =cut
887    
888    sub _x {
889            my $self = shift;
890            my $utf8 = shift || return;
891    
892            return $self->{'utf2cp'}->convert($utf8) ||
893                    $self->_get_logger()->logwarn("can't convert '$utf8'");
894    }
895    
896    #
897    #
898    #
899    
900    =head1 LOGGING
901    
902    Logging in WebPAC is performed by L<Log::Log4perl> with config file
903    C<log.conf>.
904    
905    Methods defined above have different levels of logging, so
906    it's descriptions will be useful to turn (mostry B<debug> logging) on
907    or off to see why WabPAC isn't perforing as you expect it (it might even
908    be a bug!).
909    
910    B<This is different from normal Log4perl behaviour>. To repeat, you can
911    also use method names, and not only classes (which are just few)
912    to filter logging.
913    
914    =cut
915    
916  1;  1;

Legend:
Removed from v.366  
changed lines
  Added in v.389

  ViewVC Help
Powered by ViewVC 1.1.26