--- trunk2/lib/WebPac.pm 2004/06/15 22:40:07 352 +++ trunk2/lib/WebPAC.pm 2004/06/16 16:50:30 362 @@ -1,28 +1,37 @@ -package WebPac; +package WebPAC; use Carp; +use Text::Iconv; +use Config::IniFiles; + +use Data::Dumper; =head1 NAME -WebPac - base class for WebPac +WebPAC - base class for WebPAC =head1 DESCRIPTION -This class does basic thing for WebPac. +This module implements methods used by WebPAC. =head1 METHODS =head2 new -This will create new instance of WebPac using configuration specified by C. +This will create new instance of WebPAC using configuration specified by C. - my $webpac = new WebPac( + my $webpac = new WebPAC( config_file => 'name.conf', [code_page => 'ISO-8859-2',] ); Default C is C. +It will also read configuration files +C (used by indexer and Web font-end) +and configuration file specified by C +which describes databases to be indexed. + =cut sub new { @@ -34,17 +43,9 @@ # output codepage $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'}); - return $self; -} - -=head2 read_global_config - -Read global configuration (used by indexer and Web font-end) - -=cut - -sub read_global_config { - my $self = shift; + # + # read global.conf + # $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'"; @@ -60,17 +61,9 @@ $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var); } - return $self; -} - -=head2 read_indexer_config - -Read indexer configuration (specify databases, types etc.) - -=cut - -sub read_indexer_config { - my $self = shift; + # + # read indexer config file + # $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'"; @@ -102,8 +95,8 @@ By default, ISIS code page is assumed to be C<852>. -If C is set, it will read just 500 records from -database in example above. +If optional parametar C is set, it will read just 500 records +from database in example above. Returns number of last record read into memory (size of database, really). @@ -127,6 +120,8 @@ croak "need filename" if (! $arg->{'filename'}); my $code_page = $arg->{'code_page'} || '852'; + use OpenIsis; + #$self->{'isis_code_page'} = $code_page; # create Text::Iconv object @@ -136,6 +131,8 @@ my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1; + $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn}); + # read database for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { @@ -163,30 +160,105 @@ } # create lookup + my $rec = $self->{'data'}->{$mfn}; + $self->create_lookup($rec, @{$arg->{'lookup'}}); - foreach my $i (@{$arg->{lookup}}) { - my $rec = $self->{'data'}->{$mfn}; - if ($i->{'eval'}) { - my $eval = $self->fill_in($rec,$i->{'eval'}); - my $key = $self->fill_in($rec,$i->{'key'}); - my @val = $self->fill_in($rec,$i->{'val'}); - if ($key && @val && eval $eval) { - push @{$self->{'lookup'}->{$key}}, @val; - } - } else { - my $key = $self->fill_in($rec,$i->{'key'}); - my @val = $self->fill_in($rec,$i->{'val'}); - if ($key && @val) { - push @{$self->{'lookup'}->{$key}}, @val; - } - } - } } + $self->{'current_mfn'} = 1; + # store max mfn and return it. return $self->{'max_mfn'} = $maxmfn; } +=head2 fetch_rec + +Fetch next record from database. It will also display progress bar (once +it's implemented, that is). + + my $rec = $webpac->fetch_rec; + +=cut + +sub fetch_rec { + my $self = shift; + + my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!"; + + if ($mfn > $self->{'max_mfn'}) { + $self->{'current_mfn'} = $self->{'max_mfn'}; + return; + } + + return $self->{'data'}->{$mfn}; +} + +=head2 create_lookup + +Create lookup from record using lookup definition. + +=cut + +sub create_lookup { + my $self = shift; + + my $rec = shift || confess "need record to create lookup"; + confess("need HASH as first argument!") if ($rec !~ /HASH/o); + + foreach my $i (@_) { + if ($i->{'eval'}) { + my $eval = $self->fill_in($rec,$i->{'eval'}); + my $key = $self->fill_in($rec,$i->{'key'}); + my @val = $self->fill_in($rec,$i->{'val'}); + if ($key && @val && eval $eval) { + push @{$self->{'lookup'}->{$key}}, @val; + } + } else { + my $key = $self->fill_in($rec,$i->{'key'}); + my @val = $self->fill_in($rec,$i->{'val'}); + if ($key && @val) { + push @{$self->{'lookup'}->{$key}}, @val; + } + } + } +} + +=head2 get_data + +Returns value from record. + + $self->get_data(\$rec,$f,$sf,$i,\$found); + +Arguments are: +record reference C<$rec>, +field C<$f>, +optional subfiled C<$sf>, +index for repeatable values C<$i>. + +Optinal variable C<$found> will be incremeted if thre +is field. + +Returns value or empty string. + +=cut + +sub get_data { + my $self = shift; + + my ($rec,$f,$sf,$i,$found) = @_; + if ($$rec->{$f}) { + if ($sf && $$rec->{$f}->[$i]->{$sf}) { + $$found++ if (defined($$found)); + return $$rec->{$f}->[$i]->{$sf}; + } elsif ($$rec->{$f}->[$i]) { + $$found++ if (defined($$found)); + return $$rec->{$f}->[$i]; + } + } else { + return ''; + } +} + =head2 fill_in Workhourse of all: takes record from in-memory structure of database and @@ -196,7 +268,14 @@ $webpac->fill_in($rec,'v250^a'); Optional argument is ordinal number for repeatable fields. By default, -it's assume to be first repeatable field. +it's assume to be first repeatable field (fields are perl array, so first +element is 0). +Following example will read second value from repeatable field. + + $webpac->fill_in($rec,'Title: v250^a',1); + +This function B perform parsing of format to inteligenty skip +delimiters before fields which aren't used. =cut @@ -209,40 +288,28 @@ my $i = shift || 0; # FIXME remove for speedup? - if ($rec !~ /HASH/) { - confess("need HASH as first argument!"); - } + confess("need HASH as first argument!") if ($rec !~ /HASH/o); my $found = 0; - # get field with subfield - sub get_sf { - my ($found,$rec,$f,$sf,$i) = @_; - if ($$rec->{$f} && $$rec->{$f}->[$i]->{$sf}) { - $$found++; - return $$rec->{$f}->[$i]->{$sf}; - } else { - return ''; - } - } - - # get field (without subfield) - sub get_nosf { - my ($found,$rec,$f,$i) = @_; - if ($$rec->{$f} && $$rec->{$f}->[$i]) { - $$found++; - return $$rec->{$f}->[$i]; - } else { - return ''; - } - } + my $eval_code; + # remove eval{...} from beginning + $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s); # do actual replacement of placeholders - $format =~ s/v(\d+)\^(\w)/get_sf(\$found,\$rec,$1,$2,$i)/ges; - $format =~ s/v(\d+)/get_nosf(\$found,\$rec,$1,$i)/ges; + $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges; - if ($found) { - return $format; + if ($found) { + if ($eval_code) { + my $eval = $self->fill_in($rec,$eval_code,$i); + return if (! eval $eval); + } + # do we have lookups? + if ($format =~ /\[[^\[\]]+\]/o) { + return $self->lookup($format); + } else { + return $format; + } } else { return; } @@ -250,10 +317,12 @@ =head2 lookup -This function will perform lookups on format supplied to it. +Perform lookups on format supplied to it. my $txt = $self->lookup('[v900]'); +Lookups can be nested (like C<[d:[a:[v900]]]>). + =cut sub lookup { @@ -261,27 +330,27 @@ my $tmp = shift || confess "need format"; - if ($tmp =~ /\[[^\[\]]+\]/) { + if ($tmp =~ /\[[^\[\]]+\]/o) { my @in = ( $tmp ); -print "##lookup $tmp\n"; +#print "##lookup $tmp\n"; my @out; while (my $f = shift @in) { if ($f =~ /\[([^\[\]]+)\]/) { my $k = $1; if ($self->{'lookup'}->{$k}) { -print "## lookup key = $k\n"; +#print "## lookup key = $k\n"; foreach my $nv (@{$self->{'lookup'}->{$k}}) { my $tmp2 = $f; $tmp2 =~ s/\[$k\]/$nv/g; push @in, $tmp2; -print "## lookup in => $tmp2\n"; +#print "## lookup in => $tmp2\n"; } } else { undef $f; } } elsif ($f) { push @out, $f; -print "## lookup out => $f\n"; +#print "## lookup out => $f\n"; } } return @out; @@ -290,4 +359,64 @@ } } +=head2 parse + +Perform smart parsing of string, skipping delimiters for fields which aren't +defined. It can also eval code in format starting with C and +return output or nothing depending on eval code. + + $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i); + +=cut + +sub parse { + my $self = shift; + + my ($rec, $format, $i) = @_; + + confess("need HASH as first argument!") if ($rec !~ /HASH/o); + + $i = 0 if (! $i); + + my @out; + + my $eval_code; + # remove eval{...} from beginning + $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s); + + my $prefix; + my $all_found=0; + +#print "## $format\n"; + while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) { +#print "## [ $1 | $2 | $3 ] $format\n"; + + my $del = $1 || ''; + $prefix ||= $del if ($all_found == 0); + + my $found = 0; + my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found); + + if ($found) { + push @out, $del; + push @out, $tmp; + $all_found += $found; + } + } + + return if (! $all_found); + + my $out = join('',@out) . $format; + + # add prefix if not there + $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/); + + if ($eval_code) { + my $eval = $self->fill_in($rec,$eval_code,$i); + return if (! eval $eval); + } + + return $out; +} + 1;