/[webpac]/trunk2/lib/WebPAC.pm

This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!

Diff of /trunk2/lib/WebPAC.pm

Parent Directory | Revision Log | View Patch Patch

-revision 355 by dpavlin,
Wed Jun 16 11:41:50 2004 UTC
+revision 371 by dpavlin,
Thu Jun 17 20:44:45 2004 UTC
 Line 1
  package WebPAC;
+ use warnings;
+ use strict;
  use Carp;
  use Text::Iconv;
  use Config::IniFiles;
+ use XML::Simple;
+ use Template;
+ use Data::Dumper;
  =head1 NAME
-Line 32 
 which describes databases to be indexed.
+Line 39 
 which describes databases to be indexed.
  =cut
+ # mapping between data type and tag which specify
+ # format in XML file
+ my %type2tag = (
+         'isis' => 'isis',
+ #       'excel' => 'column',
+ #       'marc' => 'marc',
+ #       'feed' => 'feed'
+ );
  sub new {
          my $class = shift;
          my $self = {@_};
-Line 45 
 sub new {
+Line 61 
 sub new {
          # read global.conf
          #
-         $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
+         my $config = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
          # read global config parametars
          foreach my $var (qw(
-Line 55 
 sub new {
+Line 71 
 sub new {
                          dbi_passwd
                          show_progress
                          my_unac_filter
+                         output_template
                  )) {
-                 $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
+                 $self->{'global_config'}->{$var} = $config->val('global', $var);
          }
          #
-Line 65 
 sub new {
+Line 82 
 sub new {
          $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
-         # read global config parametars
+         # create UTF-8 convertor for import_xml files
-         foreach my $var (qw(
+         $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
-                         dbi_dbd
-                         dbi_dsn
+         # create Template toolkit instance
-                         dbi_user
+         $self->{'tt'} = Template->new(
-                         dbi_passwd
+                 INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} || './output_template'),
-                         show_progress
+ #               FILTERS => {
-                         my_unac_filter
+ #                       'foo' => \&foo_filter,
-                 )) {
+ #               },
-                 $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
+                 EVAL_PERL => 1,
-         }
+         );
          return $self;
  }
-Line 96 
 By default, ISIS code page is assumed to
+Line 113 
 By default, ISIS code page is assumed to
  If optional parametar C<limit_mfn> is set, it will read just 500 records
  from database in example above.
- Returns number of last record read into memory (size of database, really).
  C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
  C<val>. Optional parametar C<eval> is perl code to evaluate before storing
  value in index.
-Line 109 
 value in index.
+Line 124 
 value in index.
      'val' => 'v900' },
   ]
+ Returns number of last record read into memory (size of database, really).
  =cut
  sub open_isis {
-Line 125 
 sub open_isis {
+Line 142 
 sub open_isis {
          # create Text::Iconv object
          my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
+         print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'});
          my $isis_db = OpenIsis::open($arg->{'filename'});
          my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
+         $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
+         print STDERR "processing $maxmfn records...\n" if ($self->{'debug'});
          # read database
          for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
-Line 161 
 sub open_isis {
+Line 184 
 sub open_isis {
          }
+         $self->{'current_mfn'} = 1;
          # store max mfn and return it.
          return $self->{'max_mfn'} = $maxmfn;
  }
+ =head2 fetch_rec
+ Fetch next record from database. It will also display progress bar (once
+ it's implemented, that is).
+  my $rec = $webpac->fetch_rec;
+ =cut
+ sub fetch_rec {
+         my $self = shift;
+         my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!";
+         if ($mfn > $self->{'max_mfn'}) {
+                 $self->{'current_mfn'} = $self->{'max_mfn'};
+                 return;
+         }
+         return $self->{'data'}->{$mfn};
+ }
+ =head2 open_import_xml
+ Read file from C<import_xml/> directory and parse it.
+  $webpac->open_import_xml(type => 'isis');
+ =cut
+ sub open_import_xml {
+         my $self = shift;
+         my $arg = {@_};
+         confess "need type to load file from import_xml/" if (! $arg->{'type'});
+         $self->{'type'} = $arg->{'type'};
+         my $type_base = $arg->{'type'};
+         $type_base =~ s/_.*$//g;
+         $self->{'tag'} = $type2tag{$type_base};
+         print STDERR "using type '",$self->{'type'},"' tag <",$self->{'tag'},">\n" if ($self->{'debug'});
+         my $f = "./import_xml/".$self->{'type'}.".xml";
+         confess "import_xml file '$f' doesn't exist!" if (! -e "$f");
+         print STDERR "reading '$f'\n" if ($self->{'debug'});
+         $self->{'import_xml'} = XMLin($f,
+                 ForceArray => [ $self->{'tag'}, 'config', 'format' ],
+                 ForceContent => 1
+         );
+ }
  =head2 create_lookup
  Create lookup from record using lookup definition.
+  $self->create_lookup($rec, @lookups);
+ Called internally by C<open_*> methods.
  =cut
  sub create_lookup {
-Line 195 
 sub create_lookup {
+Line 281 
 sub create_lookup {
          }
  }
+ =head2 get_data
+ Returns value from record.
+  my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);
+ Arguments are:
+ record reference C<$rec>,
+ field C<$f>,
+ optional subfiled C<$sf>,
+ index for repeatable values C<$i>.
+ Optinal variable C<$found> will be incremeted if there
+ is field.
+ Returns value or empty string.
+ =cut
+ sub get_data {
+         my $self = shift;
+         my ($rec,$f,$sf,$i,$found) = @_;
+         if ($$rec->{$f}) {
+                 return '' if (! $$rec->{$f}->[$i]);
+                 if ($sf && $$rec->{$f}->[$i]->{$sf}) {
+                         $$found++ if (defined($$found));
+                         return $$rec->{$f}->[$i]->{$sf};
+                 } elsif ($$rec->{$f}->[$i]) {
+                         $$found++ if (defined($$found));
+                         # it still might have subfield, just
+                         # not specified, so we'll dump all
+                         if ($$rec->{$f}->[$i] =~ /HASH/o) {
+                                 my $out;
+                                 foreach my $k (keys %{$$rec->{$f}->[$i]}) {
+                                         $out .= $$rec->{$f}->[$i]->{$k}." ";
+                                 }
+                                 return $out;
+                         } else {
+                                 return $$rec->{$f}->[$i];
+                         }
+                 }
+         } else {
+                 return '';
+         }
+ }
  =head2 fill_in
  Workhourse of all: takes record from in-memory structure of database and
  strings with placeholders and returns string or array of with substituted
  values from record.
-  $webpac->fill_in($rec,'v250^a');
+  my $text = $webpac->fill_in($rec,'v250^a');
  Optional argument is ordinal number for repeatable fields. By default,
  it's assume to be first repeatable field (fields are perl array, so first
  element is 0).
  Following example will read second value from repeatable field.
-  $webpac->fill_in($rec,'Title: v250^a',1);
+  my $text = $webpac->fill_in($rec,'Title: v250^a',1);
  This function B<does not> perform parsing of format to inteligenty skip
  delimiters before fields which aren't used.
  =cut
+ # internal function to eval code
+ sub _eval {
+         my $self = shift;
+         my $code = shift || return;
+         no strict 'subs';
+         my $ret = eval $code;
+         if ($@) {
+                 print STDERR "problem with eval code [$code]: $@\n";
+         }
+         return $ret;
+ }
  sub fill_in {
          my $self = shift;
-Line 228 
 sub fill_in {
+Line 375 
 sub fill_in {
          my $found = 0;
-         # get field with subfield
+         my $eval_code;
-         sub get_sf {
+         # remove eval{...} from beginning
-                 my ($found,$rec,$f,$sf,$i) = @_;
+         $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
-                 if ($$rec->{$f} && $$rec->{$f}->[$i]->{$sf}) {
-                         $$found++;
-                         return $$rec->{$f}->[$i]->{$sf};
-                 } else {
-                         return '';
-                 }
-         }
-         # get field (without subfield)
-         sub get_nosf {
-                 my ($found,$rec,$f,$i) = @_;
-                 if ($$rec->{$f} && $$rec->{$f}->[$i]) {
-                         $$found++;
-                         return $$rec->{$f}->[$i];
-                 } else {
-                         return '';
-                 }
-         }
          # do actual replacement of placeholders
-         $format =~ s/v(\d+)\^(\w)/get_sf(\$found,\$rec,$1,$2,$i)/ges;
+         $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
-         $format =~ s/v(\d+)/get_nosf(\$found,\$rec,$1,$i)/ges;
          if ($found) {
+                 if ($eval_code) {
+                         my $eval = $self->fill_in($rec,$eval_code,$i);
+                         return if (! $self->_eval($eval));
+                 }
                  # do we have lookups?
                  if ($format =~ /\[[^\[\]]+\]/o) {
+ print "## probable lookup: $format\n";
                          return $self->lookup($format);
                  } else {
                          return $format;
-Line 270 
 sub fill_in {
+Line 403 
 sub fill_in {
  Perform lookups on format supplied to it.
-  my $txt = $self->lookup('[v900]');
+  my $text = $self->lookup('[v900]');
  Lookups can be nested (like C<[d:[a:[v900]]]>).
-Line 283 
 sub lookup {
+Line 416 
 sub lookup {
          if ($tmp =~ /\[[^\[\]]+\]/o) {
                  my @in = ( $tmp );
- #print "##lookup $tmp\n";
+ print "## lookup $tmp\n";
                  my @out;
                  while (my $f = shift @in) {
                          if ($f =~ /\[([^\[\]]+)\]/) {
                                  my $k = $1;
                                  if ($self->{'lookup'}->{$k}) {
- #print "## lookup key = $k\n";
                                          foreach my $nv (@{$self->{'lookup'}->{$k}}) {
                                                  my $tmp2 = $f;
                                                  $tmp2 =~ s/\[$k\]/$nv/g;
                                                  push @in, $tmp2;
- #print "## lookup in => $tmp2\n";
                                          }
                                  } else {
                                          undef $f;
                                  }
                          } elsif ($f) {
                                  push @out, $f;
- #print "## lookup out => $f\n";
                          }
                  }
                  return @out;
-Line 310 
 sub lookup {
+Line 440 
 sub lookup {
          }
  }
+ =head2 parse
+ Perform smart parsing of string, skipping delimiters for fields which aren't
+ defined. It can also eval code in format starting with C<eval{...}> and
+ return output or nothing depending on eval code.
+  my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
+ =cut
+ sub parse {
+         my $self = shift;
+         my ($rec, $format_utf8, $i) = @_;
+         return if (! $format_utf8);
+         confess("need HASH as first argument!") if ($rec !~ /HASH/o);
+         confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
+         $i = 0 if (! $i);
+         my $format = $self->{'utf2cp'}->convert($format_utf8) || confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
+         my @out;
+         my $eval_code;
+         # remove eval{...} from beginning
+         $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
+         my $prefix;
+         my $all_found=0;
+         while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {
+                 my $del = $1 || '';
+                 $prefix ||= $del if ($all_found == 0);
+                 my $found = 0;
+                 my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
+                 if ($found) {
+                         push @out, $del;
+                         push @out, $tmp;
+                         $all_found += $found;
+                 }
+         }
+         return if (! $all_found);
+         my $out = join('',@out) . $format;
+         # add prefix if not there
+         $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
+         if ($eval_code) {
+                 my $eval = $self->fill_in($rec,$eval_code,$i);
+                 return if (! $self->_eval($eval));
+         }
+         return $out;
+ }
+ =head2 parse_to_arr
+ Similar to C<parse>, but returns array of all repeatable fields
+  my @arr = $webpac->parse_to_arr($rec,'v250^a');
+ =cut
+ sub parse_to_arr {
+         my $self = shift;
+         my ($rec, $format_utf8) = @_;
+         confess("need HASH as first argument!") if ($rec !~ /HASH/o);
+         return if (! $format_utf8);
+         my $i = 0;
+         my @arr;
+         while (my $v = $self->parse($rec,$format_utf8,$i++)) {
+                 push @arr, $v;
+         }
+         return @arr;
+ }
+ =head2 data_structure
+ Create in-memory data structure which represents layout from C<import_xml>.
+ It is used later to produce output.
+  my @ds = $webpac->data_structure($rec);
+ =cut
+ # private method _sort_by_order
+ # sort subrouting using order="" attribute
+ sub _sort_by_order {
+         my $self = shift;
+         my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
+                 $self->{'import_xml'}->{'indexer'}->{$a};
+         my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
+                 $self->{'import_xml'}->{'indexer'}->{$b};
+         return $va <=> $vb;
+ }
+ sub data_structure {
+         my $self = shift;
+         my $rec = shift;
+         confess("need HASH as first argument!") if ($rec !~ /HASH/o);
+         my @sorted_tags;
+         if ($self->{tags_by_order}) {
+                 @sorted_tags = @{$self->{tags_by_order}};
+         } else {
+                 @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
+                 $self->{tags_by_order} = \@sorted_tags;
+         }
+         my @ds;
+         foreach my $field (@sorted_tags) {
+                 my $row;
+ #print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
+                 foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
+                         my @v = $self->parse_to_arr($rec,$tag->{'content'});
+                         next if (! @v);
+                         # does tag have type?
+                         if ($tag->{'type'}) {
+                                 push @{$row->{$tag->{'type'}}}, @v;
+                         } else {
+                                 push @{$row->{'display'}}, @v;
+                                 push @{$row->{'swish'}}, @v;
+                         }
+                 }
+                 if ($row) {
+                         $row->{'tag'} = $field;
+                         push @ds, $row;
+                 }
+         }
+         return @ds;
+ }
+ =head2 output
+ Create output from in-memory data structure using Template Toolkit template.
+ my $text = $webpac->output( template => 'text.tt', data => @ds );
+ =cut
+ sub output {
+         my $self = shift;
+         my $args = {@_};
+         confess("need template name") if (! $args->{'template'});
+         confess("need data array") if (! $args->{'data'});
+         my $out;
+         $self->{'tt'}->process(
+                 $args->{'template'},
+                 $args,
+                 \$out
+         ) || confess $self->{'tt'}->error();
+         return $out;
+ }
 ;

 Legend:



Removed from v.355
 


changed lines


 
Added in v.371
 Legend:



Removed from v.355
 


changed lines


 
Added in v.371
-Removed from v.355
+Added in v.371

	ViewVC Help
Powered by ViewVC 1.1.26