--- trunk2/lib/WebPAC.pm 2004/06/16 16:50:30 362 +++ trunk2/lib/WebPAC.pm 2004/06/17 01:44:25 366 @@ -3,6 +3,7 @@ use Carp; use Text::Iconv; use Config::IniFiles; +use XML::Simple; use Data::Dumper; @@ -34,6 +35,15 @@ =cut +# mapping between data type and tag which specify +# format in XML file +my %type2tag = ( + 'isis' => 'isis', +# 'excel' => 'column', +# 'marc' => 'marc', +# 'feed' => 'feed' +); + sub new { my $class = shift; my $self = {@_}; @@ -67,18 +77,7 @@ $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'"; - # read global config parametars - foreach my $var (qw( - dbi_dbd - dbi_dsn - dbi_user - dbi_passwd - show_progress - my_unac_filter - )) { - $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var); - } - + $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'}); return $self; } @@ -127,12 +126,16 @@ # create Text::Iconv object my $cp = Text::Iconv->new($code_page,$self->{'code_page'}); + print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'}); + my $isis_db = OpenIsis::open($arg->{'filename'}); my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1; $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn}); + print STDERR "processing $maxmfn records...\n" if ($self->{'debug'}); + # read database for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { @@ -193,6 +196,43 @@ return $self->{'data'}->{$mfn}; } +=head2 open_import_xml + +Read file from C directory and parse it. + + $webpac->open_import_xml(type => 'isis'); + +=cut + +sub open_import_xml { + my $self = shift; + + my $arg = {@_}; + confess "need type to load file from import_xml/" if (! $arg->{'type'}); + + $self->{'type'} = $arg->{'type'}; + + my $type_base = $arg->{'type'}; + $type_base =~ s/_.*$//g; + + $self->{'tag'} = $type2tag{$type_base}; + + print STDERR "using type ",$self->{'type'}," tag ",$self->{'tag'},"\n" if ($self->{'debug'}); + + my $f = "./import_xml/".$self->{'type'}.".xml"; + confess "import_xml file '$f' doesn't exist!" if (! -e "$f"); + + print STDERR "reading '$f'\n" if ($self->{'debug'}); + + $self->{'import_xml'} = XMLin($f, + ForceArray => [ $self->{'tag'}, 'config', 'format' ], + ForceContent => 1 + ); + + print Dumper($self->{'import_xml'}); + +} + =head2 create_lookup Create lookup from record using lookup definition. @@ -252,7 +292,17 @@ return $$rec->{$f}->[$i]->{$sf}; } elsif ($$rec->{$f}->[$i]) { $$found++ if (defined($$found)); - return $$rec->{$f}->[$i]; + # it still might have subfield, just + # not specified, so we'll dump all + if ($$rec->{$f}->[$i] =~ /HASH/o) { + my $out; + foreach my $k (keys %{$$rec->{$f}->[$i]}) { + $out .= $$rec->{$f}->[$i]->{$k}." "; + } + return $out; + } else { + return $$rec->{$f}->[$i]; + } } } else { return ''; @@ -372,12 +422,17 @@ sub parse { my $self = shift; - my ($rec, $format, $i) = @_; + my ($rec, $format_utf8, $i) = @_; + + return if (! $format_utf8); confess("need HASH as first argument!") if ($rec !~ /HASH/o); + confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'}); $i = 0 if (! $i); + my $format = $self->{'utf2cp'}->convert($format_utf8) || confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'}); + my @out; my $eval_code; @@ -419,4 +474,73 @@ return $out; } +=head2 data_structure + +Create in-memory data structure which represents layout from C. +It is used later to produce output. + + my $ds = $webpac->data_structure($rec); + +=cut + +# private method _sort_by_order +# sort subrouting using order="" attribute +sub _sort_by_order { + my $self = shift; + + my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} || + $self->{'import_xml'}->{'indexer'}->{$a}; + my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} || + $self->{'import_xml'}->{'indexer'}->{$b}; + + return $va <=> $vb; +} + +sub data_structure { + my $self = shift; + + my $rec = shift; + confess("need HASH as first argument!") if ($rec !~ /HASH/o); + + my @sorted_tags; + if ($self->{tags_by_order}) { + @sorted_tags = @{$self->{tags_by_order}}; + } else { + @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}}; + $self->{tags_by_order} = \@sorted_tags; + } + + my $ds; + + foreach my $field (@sorted_tags) { + + my $row; + my $i = 0; + +#print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}); + + foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) { + + my $v = $self->parse($rec,$tag->{'content'},$i); +print "## $i:",$tag->{'content'}," = ",($v || 'null'),"\n"; + + next if (!$v || $v && $v eq ''); + + # does tag have type? + if ($tag->{'type'}) { + push @{$row->{$tag->{'type'}}}, $v; + } else { + push @{$row->{'display'}}, $v; + push @{$row->{'swish'}}, $v; + } + } + + push @{$ds->{$field}}, $row if ($row); + + } + + print Dumper($ds); + +} + 1;