/[webpac]/trunk2/lib/WebPAC.pm

This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!

Diff of /trunk2/lib/WebPAC.pm

Parent Directory | Revision Log | View Patch Patch

-revision 352 by dpavlin,
Tue Jun 15 22:40:07 2004 UTC
+revision 363 by dpavlin,
Wed Jun 16 20:05:19 2004 UTC
 Line 1
- package WebPac;
+ package WebPAC;
  use Carp;
+ use Text::Iconv;
+ use Config::IniFiles;
+ use XML::Simple;
+ use Data::Dumper;
  =head1 NAME
- WebPac - base class for WebPac
+ WebPAC - base class for WebPAC
  =head1 DESCRIPTION
- This class does basic thing for WebPac.
+ This module implements methods used by WebPAC.
  =head1 METHODS
  =head2 new
- This will create new instance of WebPac using configuration specified by C<config_file>.
+ This will create new instance of WebPAC using configuration specified by C<config_file>.
-  my $webpac = new WebPac(
+  my $webpac = new WebPAC(
          config_file => 'name.conf',
          [code_page => 'ISO-8859-2',]
   );
  Default C<code_page> is C<ISO-8859-2>.
+ It will also read configuration files
+ C<global.conf> (used by indexer and Web font-end)
+ and configuration file specified by C<config_file>
+ which describes databases to be indexed.
  =cut
+ # mapping between data type and tag which specify
+ # format in XML file
+ my %type2tag = (
+         'isis' => 'isis',
+ #       'excel' => 'column',
+ #       'marc' => 'marc',
+ #       'feed' => 'feed'
+ );
  sub new {
          my $class = shift;
          my $self = {@_};
-Line 34 
 sub new {
+Line 53 
 sub new {
          # output codepage
          $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
-         return $self;
+         #
- }
+         # read global.conf
+         #
- =head2 read_global_config
- Read global configuration (used by indexer and Web font-end)
- =cut
- sub read_global_config {
-         my $self = shift;
          $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
-Line 60 
 sub read_global_config {
+Line 71 
 sub read_global_config {
                  $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
          }
-         return $self;
+         #
- }
+         # read indexer config file
+         #
- =head2 read_indexer_config
- Read indexer configuration (specify databases, types etc.)
- =cut
- sub read_indexer_config {
-         my $self = shift;
          $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
-         # read global config parametars
-         foreach my $var (qw(
-                         dbi_dbd
-                         dbi_dsn
-                         dbi_user
-                         dbi_passwd
-                         show_progress
-                         my_unac_filter
-                 )) {
-                 $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
-         }
          return $self;
  }
-Line 102 
 Open CDS/ISIS database using OpenIsis mo
+Line 93 
 Open CDS/ISIS database using OpenIsis mo
  By default, ISIS code page is assumed to be C<852>.
- If C<limit_mfn> is set, it will read just 500 records from
+ If optional parametar C<limit_mfn> is set, it will read just 500 records
- database in example above.
+ from database in example above.
  Returns number of last record read into memory (size of database, really).
-Line 127 
 sub open_isis {
+Line 118 
 sub open_isis {
          croak "need filename" if (! $arg->{'filename'});
          my $code_page = $arg->{'code_page'} || '852';
+         use OpenIsis;
          #$self->{'isis_code_page'} = $code_page;
          # create Text::Iconv object
          my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
+         print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'});
          my $isis_db = OpenIsis::open($arg->{'filename'});
          my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
+         $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
+         print STDERR "processing $maxmfn records...\n" if ($self->{'debug'});
          # read database
          for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
-Line 163 
 sub open_isis {
+Line 162 
 sub open_isis {
                  }
                  # create lookup
+                 my $rec = $self->{'data'}->{$mfn};
+                 $self->create_lookup($rec, @{$arg->{'lookup'}});
-                 foreach my $i (@{$arg->{lookup}}) {
-                         my $rec = $self->{'data'}->{$mfn};
-                         if ($i->{'eval'}) {
-                                 my $eval = $self->fill_in($rec,$i->{'eval'});
-                                 my $key = $self->fill_in($rec,$i->{'key'});
-                                 my @val = $self->fill_in($rec,$i->{'val'});
-                                 if ($key && @val && eval $eval) {
-                                         push @{$self->{'lookup'}->{$key}}, @val;
-                                 }
-                         } else {
-                                 my $key = $self->fill_in($rec,$i->{'key'});
-                                 my @val = $self->fill_in($rec,$i->{'val'});
-                                 if ($key && @val) {
-                                         push @{$self->{'lookup'}->{$key}}, @val;
-                                 }
-                         }
-                 }
          }
+         $self->{'current_mfn'} = 1;
          # store max mfn and return it.
          return $self->{'max_mfn'} = $maxmfn;
  }
+ =head2 fetch_rec
+ Fetch next record from database. It will also display progress bar (once
+ it's implemented, that is).
+  my $rec = $webpac->fetch_rec;
+ =cut
+ sub fetch_rec {
+         my $self = shift;
+         my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!";
+         if ($mfn > $self->{'max_mfn'}) {
+                 $self->{'current_mfn'} = $self->{'max_mfn'};
+                 return;
+         }
+         return $self->{'data'}->{$mfn};
+ }
+ =head2 open_import_xml
+ Read file from C<import_xml/> directory and parse it.
+  $webpac->open_import_xml(type => 'isis');
+ =cut
+ sub open_import_xml {
+         my $self = shift;
+         my $arg = {@_};
+         confess "need type to load file from import_xml/" if (! $arg->{'type'});
+         my $type = $arg->{'type'};
+         my $type_base = $type;
+         $type_base =~ s/_.*$//g;
+         my $f = "./import_xml/$type.xml";
+         confess "import_xml file '$f' doesn't exist!" if (! -e "$f");
+         print STDERR "reading '$f'\n" if ($self->{'debug'});
+         $self->{'import_xml'} = XMLin($f,
+                 ForceArray => [ $type2tag{$type_base}, 'config', 'format' ],
+                 ForceContent => 1
+         );
+         print Dumper($self->{'import_xml'});
+ }
+ =head2 create_lookup
+ Create lookup from record using lookup definition.
+ =cut
+ sub create_lookup {
+         my $self = shift;
+         my $rec = shift || confess "need record to create lookup";
+         confess("need HASH as first argument!") if ($rec !~ /HASH/o);
+         foreach my $i (@_) {
+                 if ($i->{'eval'}) {
+                         my $eval = $self->fill_in($rec,$i->{'eval'});
+                         my $key = $self->fill_in($rec,$i->{'key'});
+                         my @val = $self->fill_in($rec,$i->{'val'});
+                         if ($key && @val && eval $eval) {
+                                 push @{$self->{'lookup'}->{$key}}, @val;
+                         }
+                 } else {
+                         my $key = $self->fill_in($rec,$i->{'key'});
+                         my @val = $self->fill_in($rec,$i->{'val'});
+                         if ($key && @val) {
+                                 push @{$self->{'lookup'}->{$key}}, @val;
+                         }
+                 }
+         }
+ }
+ =head2 get_data
+ Returns value from record.
+  $self->get_data(\$rec,$f,$sf,$i,\$found);
+ Arguments are:
+ record reference C<$rec>,
+ field C<$f>,
+ optional subfiled C<$sf>,
+ index for repeatable values C<$i>.
+ Optinal variable C<$found> will be incremeted if thre
+ is field.
+ Returns value or empty string.
+ =cut
+ sub get_data {
+         my $self = shift;
+         my ($rec,$f,$sf,$i,$found) = @_;
+         if ($$rec->{$f}) {
+                 if ($sf && $$rec->{$f}->[$i]->{$sf}) {
+                         $$found++ if (defined($$found));
+                         return $$rec->{$f}->[$i]->{$sf};
+                 } elsif ($$rec->{$f}->[$i]) {
+                         $$found++ if (defined($$found));
+                         return $$rec->{$f}->[$i];
+                 }
+         } else {
+                 return '';
+         }
+ }
  =head2 fill_in
  Workhourse of all: takes record from in-memory structure of database and
-Line 196 
 values from record.
+Line 303 
 values from record.
   $webpac->fill_in($rec,'v250^a');
  Optional argument is ordinal number for repeatable fields. By default,
- it's assume to be first repeatable field.
+ it's assume to be first repeatable field (fields are perl array, so first
+ element is 0).
+ Following example will read second value from repeatable field.
+  $webpac->fill_in($rec,'Title: v250^a',1);
+ This function B<does not> perform parsing of format to inteligenty skip
+ delimiters before fields which aren't used.
  =cut
-Line 209 
 sub fill_in {
+Line 323 
 sub fill_in {
          my $i = shift || 0;
          # FIXME remove for speedup?
-         if ($rec !~ /HASH/) {
+         confess("need HASH as first argument!") if ($rec !~ /HASH/o);
-                 confess("need HASH as first argument!");
-         }
          my $found = 0;
-         # get field with subfield
+         my $eval_code;
-         sub get_sf {
+         # remove eval{...} from beginning
-                 my ($found,$rec,$f,$sf,$i) = @_;
+         $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
-                 if ($$rec->{$f} && $$rec->{$f}->[$i]->{$sf}) {
-                         $$found++;
-                         return $$rec->{$f}->[$i]->{$sf};
-                 } else {
-                         return '';
-                 }
-         }
-         # get field (without subfield)
-         sub get_nosf {
-                 my ($found,$rec,$f,$i) = @_;
-                 if ($$rec->{$f} && $$rec->{$f}->[$i]) {
-                         $$found++;
-                         return $$rec->{$f}->[$i];
-                 } else {
-                         return '';
-                 }
-         }
          # do actual replacement of placeholders
-         $format =~ s/v(\d+)\^(\w)/get_sf(\$found,\$rec,$1,$2,$i)/ges;
+         $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
-         $format =~ s/v(\d+)/get_nosf(\$found,\$rec,$1,$i)/ges;
          if ($found) {
-                 return $format;
+                 if ($eval_code) {
+                         my $eval = $self->fill_in($rec,$eval_code,$i);
+                         return if (! eval $eval);
+                 }
+                 # do we have lookups?
+                 if ($format =~ /\[[^\[\]]+\]/o) {
+                         return $self->lookup($format);
+                 } else {
+                         return $format;
+                 }
          } else {
                  return;
          }
-Line 250 
 sub fill_in {
+Line 352 
 sub fill_in {
  =head2 lookup
- This function will perform lookups on format supplied to it.
+ Perform lookups on format supplied to it.
   my $txt = $self->lookup('[v900]');
+ Lookups can be nested (like C<[d:[a:[v900]]]>).
  =cut
  sub lookup {
-Line 261 
 sub lookup {
+Line 365 
 sub lookup {
          my $tmp = shift || confess "need format";
-         if ($tmp =~ /\[[^\[\]]+\]/) {
+         if ($tmp =~ /\[[^\[\]]+\]/o) {
                  my @in = ( $tmp );
- print "##lookup $tmp\n";
+ #print "##lookup $tmp\n";
                  my @out;
                  while (my $f = shift @in) {
                          if ($f =~ /\[([^\[\]]+)\]/) {
                                  my $k = $1;
                                  if ($self->{'lookup'}->{$k}) {
- print "## lookup key = $k\n";
+ #print "## lookup key = $k\n";
                                          foreach my $nv (@{$self->{'lookup'}->{$k}}) {
                                                  my $tmp2 = $f;
                                                  $tmp2 =~ s/\[$k\]/$nv/g;
                                                  push @in, $tmp2;
- print "## lookup in => $tmp2\n";
+ #print "## lookup in => $tmp2\n";
                                          }
                                  } else {
                                          undef $f;
                                  }
                          } elsif ($f) {
                                  push @out, $f;
- print "## lookup out => $f\n";
+ #print "## lookup out => $f\n";
                          }
                  }
                  return @out;
-Line 290 
 print "## lookup out => $f\n";
+Line 394 
 print "## lookup out => $f\n";
          }
  }
+ =head2 parse
+ Perform smart parsing of string, skipping delimiters for fields which aren't
+ defined. It can also eval code in format starting with C<eval{...}> and
+ return output or nothing depending on eval code.
+  $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
+ =cut
+ sub parse {
+         my $self = shift;
+         my ($rec, $format, $i) = @_;
+         confess("need HASH as first argument!") if ($rec !~ /HASH/o);
+         $i = 0 if (! $i);
+         my @out;
+         my $eval_code;
+         # remove eval{...} from beginning
+         $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
+         my $prefix;
+         my $all_found=0;
+ #print "## $format\n";
+         while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {
+ #print "## [ $1 | $2 | $3 ] $format\n";
+                 my $del = $1 || '';
+                 $prefix ||= $del if ($all_found == 0);
+                 my $found = 0;
+                 my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
+                 if ($found) {
+                         push @out, $del;
+                         push @out, $tmp;
+                         $all_found += $found;
+                 }
+         }
+         return if (! $all_found);
+         my $out = join('',@out) . $format;
+         # add prefix if not there
+         $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
+         if ($eval_code) {
+                 my $eval = $self->fill_in($rec,$eval_code,$i);
+                 return if (! eval $eval);
+         }
+         return $out;
+ }
 ;

 Legend:



Removed from v.352
 


changed lines


 
Added in v.363
 Legend:



Removed from v.352
 


changed lines


 
Added in v.363
-Removed from v.352
+Added in v.363

	ViewVC Help
Powered by ViewVC 1.1.26