/[webpac]/trunk2/lib/WebPAC.pm

This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!

Diff of /trunk2/lib/WebPAC.pm

Parent Directory | Revision Log | View Patch Patch

-revision 352 by dpavlin,
Tue Jun 15 22:40:07 2004 UTC
+revision 357 by dpavlin,
Wed Jun 16 13:39:17 2004 UTC
 Line 1
- package WebPac;
+ package WebPAC;
  use Carp;
+ use Text::Iconv;
+ use Config::IniFiles;
  =head1 NAME
- WebPac - base class for WebPac
+ WebPAC - base class for WebPAC
  =head1 DESCRIPTION
- This class does basic thing for WebPac.
+ This module implements methods used by WebPAC.
  =head1 METHODS
  =head2 new
- This will create new instance of WebPac using configuration specified by C<config_file>.
+ This will create new instance of WebPAC using configuration specified by C<config_file>.
-  my $webpac = new WebPac(
+  my $webpac = new WebPAC(
          config_file => 'name.conf',
          [code_page => 'ISO-8859-2',]
   );
  Default C<code_page> is C<ISO-8859-2>.
+ It will also read configuration files
+ C<global.conf> (used by indexer and Web font-end)
+ and configuration file specified by C<config_file>
+ which describes databases to be indexed.
  =cut
  sub new {
-Line 34 
 sub new {
+Line 41 
 sub new {
          # output codepage
          $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
-         return $self;
+         #
- }
+         # read global.conf
+         #
- =head2 read_global_config
- Read global configuration (used by indexer and Web font-end)
- =cut
- sub read_global_config {
-         my $self = shift;
          $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
-Line 60 
 sub read_global_config {
+Line 59 
 sub read_global_config {
                  $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
          }
-         return $self;
+         #
- }
+         # read indexer config file
+         #
- =head2 read_indexer_config
- Read indexer configuration (specify databases, types etc.)
- =cut
- sub read_indexer_config {
-         my $self = shift;
          $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
-Line 102 
 Open CDS/ISIS database using OpenIsis mo
+Line 93 
 Open CDS/ISIS database using OpenIsis mo
  By default, ISIS code page is assumed to be C<852>.
- If C<limit_mfn> is set, it will read just 500 records from
+ If optional parametar C<limit_mfn> is set, it will read just 500 records
- database in example above.
+ from database in example above.
  Returns number of last record read into memory (size of database, really).
-Line 127 
 sub open_isis {
+Line 118 
 sub open_isis {
          croak "need filename" if (! $arg->{'filename'});
          my $code_page = $arg->{'code_page'} || '852';
+         use OpenIsis;
          #$self->{'isis_code_page'} = $code_page;
          # create Text::Iconv object
-Line 136 
 sub open_isis {
+Line 129 
 sub open_isis {
          my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
+         $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
          # read database
          for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
-Line 163 
 sub open_isis {
+Line 158 
 sub open_isis {
                  }
                  # create lookup
+                 my $rec = $self->{'data'}->{$mfn};
+                 $self->create_lookup($rec, @{$arg->{'lookup'}});
-                 foreach my $i (@{$arg->{lookup}}) {
-                         my $rec = $self->{'data'}->{$mfn};
-                         if ($i->{'eval'}) {
-                                 my $eval = $self->fill_in($rec,$i->{'eval'});
-                                 my $key = $self->fill_in($rec,$i->{'key'});
-                                 my @val = $self->fill_in($rec,$i->{'val'});
-                                 if ($key && @val && eval $eval) {
-                                         push @{$self->{'lookup'}->{$key}}, @val;
-                                 }
-                         } else {
-                                 my $key = $self->fill_in($rec,$i->{'key'});
-                                 my @val = $self->fill_in($rec,$i->{'val'});
-                                 if ($key && @val) {
-                                         push @{$self->{'lookup'}->{$key}}, @val;
-                                 }
-                         }
-                 }
          }
          # store max mfn and return it.
          return $self->{'max_mfn'} = $maxmfn;
  }
+ =head2 create_lookup
+ Create lookup from record using lookup definition.
+ =cut
+ sub create_lookup {
+         my $self = shift;
+         my $rec = shift || confess "need record to create lookup";
+         confess("need HASH as first argument!") if ($rec !~ /HASH/o);
+         foreach my $i (@_) {
+                 if ($i->{'eval'}) {
+                         my $eval = $self->fill_in($rec,$i->{'eval'});
+                         my $key = $self->fill_in($rec,$i->{'key'});
+                         my @val = $self->fill_in($rec,$i->{'val'});
+                         if ($key && @val && eval $eval) {
+                                 push @{$self->{'lookup'}->{$key}}, @val;
+                         }
+                 } else {
+                         my $key = $self->fill_in($rec,$i->{'key'});
+                         my @val = $self->fill_in($rec,$i->{'val'});
+                         if ($key && @val) {
+                                 push @{$self->{'lookup'}->{$key}}, @val;
+                         }
+                 }
+         }
+ }
+ =head2 get_data
+ Returns value from record.
+  $self->get_data(\$rec,$f,$sf,$i,\$found);
+ Arguments are:
+ record reference C<$rec>,
+ field C<$f>,
+ optional subfiled C<$sf>,
+ index for repeatable values C<$i>.
+ Optinal variable C<$found> will be incremeted if thre
+ is field.
+ Returns value or empty string.
+ =cut
+ sub get_data {
+         my $self = shift;
+         my ($rec,$f,$sf,$i,$found) = @_;
+         if ($$rec->{$f}) {
+                 if ($sf && $$rec->{$f}->[$i]->{$sf}) {
+                         $$found++ if (defined($$found));
+                         return $$rec->{$f}->[$i]->{$sf};
+                 } elsif ($$rec->{$f}->[$i]) {
+                         $$found++ if (defined($$found));
+                         return $$rec->{$f}->[$i];
+                 }
+         } else {
+                 return '';
+         }
+ }
  =head2 fill_in
  Workhourse of all: takes record from in-memory structure of database and
-Line 196 
 values from record.
+Line 242 
 values from record.
   $webpac->fill_in($rec,'v250^a');
  Optional argument is ordinal number for repeatable fields. By default,
- it's assume to be first repeatable field.
+ it's assume to be first repeatable field (fields are perl array, so first
+ element is 0).
+ Following example will read second value from repeatable field.
+  $webpac->fill_in($rec,'Title: v250^a',1);
+ This function B<does not> perform parsing of format to inteligenty skip
+ delimiters before fields which aren't used.
  =cut
-Line 209 
 sub fill_in {
+Line 262 
 sub fill_in {
          my $i = shift || 0;
          # FIXME remove for speedup?
-         if ($rec !~ /HASH/) {
+         confess("need HASH as first argument!") if ($rec !~ /HASH/o);
-                 confess("need HASH as first argument!");
-         }
          my $found = 0;
-         # get field with subfield
+         # do actual replacement of placeholders
-         sub get_sf {
+         $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
-                 my ($found,$rec,$f,$sf,$i) = @_;
-                 if ($$rec->{$f} && $$rec->{$f}->[$i]->{$sf}) {
-                         $$found++;
-                         return $$rec->{$f}->[$i]->{$sf};
-                 } else {
-                         return '';
-                 }
-         }
-         # get field (without subfield)
+         if ($found) {
-         sub get_nosf {
+                 # do we have lookups?
-                 my ($found,$rec,$f,$i) = @_;
+                 if ($format =~ /\[[^\[\]]+\]/o) {
-                 if ($$rec->{$f} && $$rec->{$f}->[$i]) {
+                         return $self->lookup($format);
-                         $$found++;
-                         return $$rec->{$f}->[$i];
                  } else {
-                         return '';
+                         return $format;
                  }
-         }
-         # do actual replacement of placeholders
-         $format =~ s/v(\d+)\^(\w)/get_sf(\$found,\$rec,$1,$2,$i)/ges;
-         $format =~ s/v(\d+)/get_nosf(\$found,\$rec,$1,$i)/ges;
-         if ($found) {
-                 return $format;
          } else {
                  return;
          }
-Line 250 
 sub fill_in {
+Line 283 
 sub fill_in {
  =head2 lookup
- This function will perform lookups on format supplied to it.
+ Perform lookups on format supplied to it.
   my $txt = $self->lookup('[v900]');
+ Lookups can be nested (like C<[d:[a:[v900]]]>).
  =cut
  sub lookup {
-Line 261 
 sub lookup {
+Line 296 
 sub lookup {
          my $tmp = shift || confess "need format";
-         if ($tmp =~ /\[[^\[\]]+\]/) {
+         if ($tmp =~ /\[[^\[\]]+\]/o) {
                  my @in = ( $tmp );
- print "##lookup $tmp\n";
+ #print "##lookup $tmp\n";
                  my @out;
                  while (my $f = shift @in) {
                          if ($f =~ /\[([^\[\]]+)\]/) {
                                  my $k = $1;
                                  if ($self->{'lookup'}->{$k}) {
- print "## lookup key = $k\n";
+ #print "## lookup key = $k\n";
                                          foreach my $nv (@{$self->{'lookup'}->{$k}}) {
                                                  my $tmp2 = $f;
                                                  $tmp2 =~ s/\[$k\]/$nv/g;
                                                  push @in, $tmp2;
- print "## lookup in => $tmp2\n";
+ #print "## lookup in => $tmp2\n";
                                          }
                                  } else {
                                          undef $f;
                                  }
                          } elsif ($f) {
                                  push @out, $f;
- print "## lookup out => $f\n";
+ #print "## lookup out => $f\n";
                          }
                  }
                  return @out;
-Line 290 
 print "## lookup out => $f\n";
+Line 325 
 print "## lookup out => $f\n";
          }
  }
+ =head2 parse
+ Perform smart parsing of string, skipping delimiters for fields which aren't
+ defined. It can also eval code in format starting with C<eval{...}> and
+ return output or nothing depending on eval code.
+  $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
+ =cut
+ sub parse {
+         my $self = shift;
+         my ($rec, $format, $i) = @_;
+         my @out;
+         my $eval_code;
+         # remove eval{...} from beginning
+         $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
+         my $prefix = '';
+         $prefix = $1 if ($format =~ s/^(.+)(v\d+(?:\^\w)*)/$2/s);
+         sub f_sf_del {
+                 my ($self,$rec,$out,$f,$sf,$del,$i) = @_;
+                 my $found=0;
+                 my $tmp = $self->get_data($rec,$f,$sf,$i,\$found);
+                 if ($found) {
+                         push @{$$out}, $tmp;
+                         push @{$$out}, $del;
+                 }
+                 return '';
+         }
+         #$format =~ s/(.*)v(\d+)(?:\^(\w))*/f_sf_del($self,\$rec,\@out,$2,$3,$1,$i/ges;
+         print Dumper(@out);
+ }
 ;

 Legend:



Removed from v.352
 


changed lines


 
Added in v.357
 Legend:



Removed from v.352
 


changed lines


 
Added in v.357
-Removed from v.352
+Added in v.357

	ViewVC Help
Powered by ViewVC 1.1.26