--- trunk2/lib/WebPAC.pm 2004/06/16 14:31:33 358 +++ trunk2/lib/WebPAC.pm 2004/06/17 01:44:25 366 @@ -3,6 +3,7 @@ use Carp; use Text::Iconv; use Config::IniFiles; +use XML::Simple; use Data::Dumper; @@ -34,6 +35,15 @@ =cut +# mapping between data type and tag which specify +# format in XML file +my %type2tag = ( + 'isis' => 'isis', +# 'excel' => 'column', +# 'marc' => 'marc', +# 'feed' => 'feed' +); + sub new { my $class = shift; my $self = {@_}; @@ -67,18 +77,7 @@ $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'"; - # read global config parametars - foreach my $var (qw( - dbi_dbd - dbi_dsn - dbi_user - dbi_passwd - show_progress - my_unac_filter - )) { - $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var); - } - + $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'}); return $self; } @@ -127,12 +126,16 @@ # create Text::Iconv object my $cp = Text::Iconv->new($code_page,$self->{'code_page'}); + print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'}); + my $isis_db = OpenIsis::open($arg->{'filename'}); my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1; $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn}); + print STDERR "processing $maxmfn records...\n" if ($self->{'debug'}); + # read database for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { @@ -165,10 +168,71 @@ } + $self->{'current_mfn'} = 1; + # store max mfn and return it. return $self->{'max_mfn'} = $maxmfn; } +=head2 fetch_rec + +Fetch next record from database. It will also display progress bar (once +it's implemented, that is). + + my $rec = $webpac->fetch_rec; + +=cut + +sub fetch_rec { + my $self = shift; + + my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!"; + + if ($mfn > $self->{'max_mfn'}) { + $self->{'current_mfn'} = $self->{'max_mfn'}; + return; + } + + return $self->{'data'}->{$mfn}; +} + +=head2 open_import_xml + +Read file from C directory and parse it. + + $webpac->open_import_xml(type => 'isis'); + +=cut + +sub open_import_xml { + my $self = shift; + + my $arg = {@_}; + confess "need type to load file from import_xml/" if (! $arg->{'type'}); + + $self->{'type'} = $arg->{'type'}; + + my $type_base = $arg->{'type'}; + $type_base =~ s/_.*$//g; + + $self->{'tag'} = $type2tag{$type_base}; + + print STDERR "using type ",$self->{'type'}," tag ",$self->{'tag'},"\n" if ($self->{'debug'}); + + my $f = "./import_xml/".$self->{'type'}.".xml"; + confess "import_xml file '$f' doesn't exist!" if (! -e "$f"); + + print STDERR "reading '$f'\n" if ($self->{'debug'}); + + $self->{'import_xml'} = XMLin($f, + ForceArray => [ $self->{'tag'}, 'config', 'format' ], + ForceContent => 1 + ); + + print Dumper($self->{'import_xml'}); + +} + =head2 create_lookup Create lookup from record using lookup definition. @@ -228,7 +292,17 @@ return $$rec->{$f}->[$i]->{$sf}; } elsif ($$rec->{$f}->[$i]) { $$found++ if (defined($$found)); - return $$rec->{$f}->[$i]; + # it still might have subfield, just + # not specified, so we'll dump all + if ($$rec->{$f}->[$i] =~ /HASH/o) { + my $out; + foreach my $k (keys %{$$rec->{$f}->[$i]}) { + $out .= $$rec->{$f}->[$i]->{$k}." "; + } + return $out; + } else { + return $$rec->{$f}->[$i]; + } } } else { return ''; @@ -268,10 +342,18 @@ my $found = 0; + my $eval_code; + # remove eval{...} from beginning + $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s); + # do actual replacement of placeholders $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges; if ($found) { + if ($eval_code) { + my $eval = $self->fill_in($rec,$eval_code,$i); + return if (! eval $eval); + } # do we have lookups? if ($format =~ /\[[^\[\]]+\]/o) { return $self->lookup($format); @@ -340,12 +422,17 @@ sub parse { my $self = shift; - my ($rec, $format, $i) = @_; + my ($rec, $format_utf8, $i) = @_; + + return if (! $format_utf8); confess("need HASH as first argument!") if ($rec !~ /HASH/o); + confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'}); $i = 0 if (! $i); + my $format = $self->{'utf2cp'}->convert($format_utf8) || confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'}); + my @out; my $eval_code; @@ -355,12 +442,12 @@ my $prefix; my $all_found=0; -print "## $format\n"; +#print "## $format\n"; while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) { -print "## [ $1 | $2 | $3 ] $format\n"; +#print "## [ $1 | $2 | $3 ] $format\n"; my $del = $1 || ''; - $prefix ||= $del; + $prefix ||= $del if ($all_found == 0); my $found = 0; my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found); @@ -374,14 +461,86 @@ return if (! $all_found); - print Dumper($prefix, \@out); - my $out = join('',@out) . $format; # add prefix if not there $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/); + if ($eval_code) { + my $eval = $self->fill_in($rec,$eval_code,$i); + return if (! eval $eval); + } + return $out; } +=head2 data_structure + +Create in-memory data structure which represents layout from C. +It is used later to produce output. + + my $ds = $webpac->data_structure($rec); + +=cut + +# private method _sort_by_order +# sort subrouting using order="" attribute +sub _sort_by_order { + my $self = shift; + + my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} || + $self->{'import_xml'}->{'indexer'}->{$a}; + my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} || + $self->{'import_xml'}->{'indexer'}->{$b}; + + return $va <=> $vb; +} + +sub data_structure { + my $self = shift; + + my $rec = shift; + confess("need HASH as first argument!") if ($rec !~ /HASH/o); + + my @sorted_tags; + if ($self->{tags_by_order}) { + @sorted_tags = @{$self->{tags_by_order}}; + } else { + @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}}; + $self->{tags_by_order} = \@sorted_tags; + } + + my $ds; + + foreach my $field (@sorted_tags) { + + my $row; + my $i = 0; + +#print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}); + + foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) { + + my $v = $self->parse($rec,$tag->{'content'},$i); +print "## $i:",$tag->{'content'}," = ",($v || 'null'),"\n"; + + next if (!$v || $v && $v eq ''); + + # does tag have type? + if ($tag->{'type'}) { + push @{$row->{$tag->{'type'}}}, $v; + } else { + push @{$row->{'display'}}, $v; + push @{$row->{'swish'}}, $v; + } + } + + push @{$ds->{$field}}, $row if ($row); + + } + + print Dumper($ds); + +} + 1;