--- trunk2/lib/WebPAC.pm 2004/06/16 11:41:50 355 +++ trunk2/lib/WebPAC.pm 2004/06/16 20:05:19 363 @@ -3,6 +3,9 @@ use Carp; use Text::Iconv; use Config::IniFiles; +use XML::Simple; + +use Data::Dumper; =head1 NAME @@ -32,6 +35,15 @@ =cut +# mapping between data type and tag which specify +# format in XML file +my %type2tag = ( + 'isis' => 'isis', +# 'excel' => 'column', +# 'marc' => 'marc', +# 'feed' => 'feed' +); + sub new { my $class = shift; my $self = {@_}; @@ -65,18 +77,6 @@ $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'"; - # read global config parametars - foreach my $var (qw( - dbi_dbd - dbi_dsn - dbi_user - dbi_passwd - show_progress - my_unac_filter - )) { - $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var); - } - return $self; } @@ -125,10 +125,16 @@ # create Text::Iconv object my $cp = Text::Iconv->new($code_page,$self->{'code_page'}); + print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'}); + my $isis_db = OpenIsis::open($arg->{'filename'}); my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1; + $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn}); + + print STDERR "processing $maxmfn records...\n" if ($self->{'debug'}); + # read database for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { @@ -161,10 +167,67 @@ } + $self->{'current_mfn'} = 1; + # store max mfn and return it. return $self->{'max_mfn'} = $maxmfn; } +=head2 fetch_rec + +Fetch next record from database. It will also display progress bar (once +it's implemented, that is). + + my $rec = $webpac->fetch_rec; + +=cut + +sub fetch_rec { + my $self = shift; + + my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!"; + + if ($mfn > $self->{'max_mfn'}) { + $self->{'current_mfn'} = $self->{'max_mfn'}; + return; + } + + return $self->{'data'}->{$mfn}; +} + +=head2 open_import_xml + +Read file from C directory and parse it. + + $webpac->open_import_xml(type => 'isis'); + +=cut + +sub open_import_xml { + my $self = shift; + + my $arg = {@_}; + confess "need type to load file from import_xml/" if (! $arg->{'type'}); + + my $type = $arg->{'type'}; + + my $type_base = $type; + $type_base =~ s/_.*$//g; + + my $f = "./import_xml/$type.xml"; + confess "import_xml file '$f' doesn't exist!" if (! -e "$f"); + + print STDERR "reading '$f'\n" if ($self->{'debug'}); + + $self->{'import_xml'} = XMLin($f, + ForceArray => [ $type2tag{$type_base}, 'config', 'format' ], + ForceContent => 1 + ); + + print Dumper($self->{'import_xml'}); + +} + =head2 create_lookup Create lookup from record using lookup definition. @@ -195,6 +258,42 @@ } } +=head2 get_data + +Returns value from record. + + $self->get_data(\$rec,$f,$sf,$i,\$found); + +Arguments are: +record reference C<$rec>, +field C<$f>, +optional subfiled C<$sf>, +index for repeatable values C<$i>. + +Optinal variable C<$found> will be incremeted if thre +is field. + +Returns value or empty string. + +=cut + +sub get_data { + my $self = shift; + + my ($rec,$f,$sf,$i,$found) = @_; + if ($$rec->{$f}) { + if ($sf && $$rec->{$f}->[$i]->{$sf}) { + $$found++ if (defined($$found)); + return $$rec->{$f}->[$i]->{$sf}; + } elsif ($$rec->{$f}->[$i]) { + $$found++ if (defined($$found)); + return $$rec->{$f}->[$i]; + } + } else { + return ''; + } +} + =head2 fill_in Workhourse of all: takes record from in-memory structure of database and @@ -228,33 +327,18 @@ my $found = 0; - # get field with subfield - sub get_sf { - my ($found,$rec,$f,$sf,$i) = @_; - if ($$rec->{$f} && $$rec->{$f}->[$i]->{$sf}) { - $$found++; - return $$rec->{$f}->[$i]->{$sf}; - } else { - return ''; - } - } - - # get field (without subfield) - sub get_nosf { - my ($found,$rec,$f,$i) = @_; - if ($$rec->{$f} && $$rec->{$f}->[$i]) { - $$found++; - return $$rec->{$f}->[$i]; - } else { - return ''; - } - } + my $eval_code; + # remove eval{...} from beginning + $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s); # do actual replacement of placeholders - $format =~ s/v(\d+)\^(\w)/get_sf(\$found,\$rec,$1,$2,$i)/ges; - $format =~ s/v(\d+)/get_nosf(\$found,\$rec,$1,$i)/ges; + $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges; if ($found) { + if ($eval_code) { + my $eval = $self->fill_in($rec,$eval_code,$i); + return if (! eval $eval); + } # do we have lookups? if ($format =~ /\[[^\[\]]+\]/o) { return $self->lookup($format); @@ -310,4 +394,64 @@ } } +=head2 parse + +Perform smart parsing of string, skipping delimiters for fields which aren't +defined. It can also eval code in format starting with C and +return output or nothing depending on eval code. + + $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i); + +=cut + +sub parse { + my $self = shift; + + my ($rec, $format, $i) = @_; + + confess("need HASH as first argument!") if ($rec !~ /HASH/o); + + $i = 0 if (! $i); + + my @out; + + my $eval_code; + # remove eval{...} from beginning + $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s); + + my $prefix; + my $all_found=0; + +#print "## $format\n"; + while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) { +#print "## [ $1 | $2 | $3 ] $format\n"; + + my $del = $1 || ''; + $prefix ||= $del if ($all_found == 0); + + my $found = 0; + my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found); + + if ($found) { + push @out, $del; + push @out, $tmp; + $all_found += $found; + } + } + + return if (! $all_found); + + my $out = join('',@out) . $format; + + # add prefix if not there + $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/); + + if ($eval_code) { + my $eval = $self->fill_in($rec,$eval_code,$i); + return if (! eval $eval); + } + + return $out; +} + 1;