--- trunk2/lib/WebPAC.pm 2004/06/16 14:31:33 358
+++ trunk2/lib/WebPAC.pm 2004/06/17 01:44:25 366
@@ -3,6 +3,7 @@
use Carp;
use Text::Iconv;
use Config::IniFiles;
+use XML::Simple;
use Data::Dumper;
@@ -34,6 +35,15 @@
=cut
+# mapping between data type and tag which specify
+# format in XML file
+my %type2tag = (
+ 'isis' => 'isis',
+# 'excel' => 'column',
+# 'marc' => 'marc',
+# 'feed' => 'feed'
+);
+
sub new {
my $class = shift;
my $self = {@_};
@@ -67,18 +77,7 @@
$self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
- # read global config parametars
- foreach my $var (qw(
- dbi_dbd
- dbi_dsn
- dbi_user
- dbi_passwd
- show_progress
- my_unac_filter
- )) {
- $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
- }
-
+ $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
return $self;
}
@@ -127,12 +126,16 @@
# create Text::Iconv object
my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
+ print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'});
+
my $isis_db = OpenIsis::open($arg->{'filename'});
my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
$maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
+ print STDERR "processing $maxmfn records...\n" if ($self->{'debug'});
+
# read database
for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
@@ -165,10 +168,71 @@
}
+ $self->{'current_mfn'} = 1;
+
# store max mfn and return it.
return $self->{'max_mfn'} = $maxmfn;
}
+=head2 fetch_rec
+
+Fetch next record from database. It will also display progress bar (once
+it's implemented, that is).
+
+ my $rec = $webpac->fetch_rec;
+
+=cut
+
+sub fetch_rec {
+ my $self = shift;
+
+ my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!";
+
+ if ($mfn > $self->{'max_mfn'}) {
+ $self->{'current_mfn'} = $self->{'max_mfn'};
+ return;
+ }
+
+ return $self->{'data'}->{$mfn};
+}
+
+=head2 open_import_xml
+
+Read file from C directory and parse it.
+
+ $webpac->open_import_xml(type => 'isis');
+
+=cut
+
+sub open_import_xml {
+ my $self = shift;
+
+ my $arg = {@_};
+ confess "need type to load file from import_xml/" if (! $arg->{'type'});
+
+ $self->{'type'} = $arg->{'type'};
+
+ my $type_base = $arg->{'type'};
+ $type_base =~ s/_.*$//g;
+
+ $self->{'tag'} = $type2tag{$type_base};
+
+ print STDERR "using type ",$self->{'type'}," tag ",$self->{'tag'},"\n" if ($self->{'debug'});
+
+ my $f = "./import_xml/".$self->{'type'}.".xml";
+ confess "import_xml file '$f' doesn't exist!" if (! -e "$f");
+
+ print STDERR "reading '$f'\n" if ($self->{'debug'});
+
+ $self->{'import_xml'} = XMLin($f,
+ ForceArray => [ $self->{'tag'}, 'config', 'format' ],
+ ForceContent => 1
+ );
+
+ print Dumper($self->{'import_xml'});
+
+}
+
=head2 create_lookup
Create lookup from record using lookup definition.
@@ -228,7 +292,17 @@
return $$rec->{$f}->[$i]->{$sf};
} elsif ($$rec->{$f}->[$i]) {
$$found++ if (defined($$found));
- return $$rec->{$f}->[$i];
+ # it still might have subfield, just
+ # not specified, so we'll dump all
+ if ($$rec->{$f}->[$i] =~ /HASH/o) {
+ my $out;
+ foreach my $k (keys %{$$rec->{$f}->[$i]}) {
+ $out .= $$rec->{$f}->[$i]->{$k}." ";
+ }
+ return $out;
+ } else {
+ return $$rec->{$f}->[$i];
+ }
}
} else {
return '';
@@ -268,10 +342,18 @@
my $found = 0;
+ my $eval_code;
+ # remove eval{...} from beginning
+ $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
+
# do actual replacement of placeholders
$format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
if ($found) {
+ if ($eval_code) {
+ my $eval = $self->fill_in($rec,$eval_code,$i);
+ return if (! eval $eval);
+ }
# do we have lookups?
if ($format =~ /\[[^\[\]]+\]/o) {
return $self->lookup($format);
@@ -340,12 +422,17 @@
sub parse {
my $self = shift;
- my ($rec, $format, $i) = @_;
+ my ($rec, $format_utf8, $i) = @_;
+
+ return if (! $format_utf8);
confess("need HASH as first argument!") if ($rec !~ /HASH/o);
+ confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
$i = 0 if (! $i);
+ my $format = $self->{'utf2cp'}->convert($format_utf8) || confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
+
my @out;
my $eval_code;
@@ -355,12 +442,12 @@
my $prefix;
my $all_found=0;
-print "## $format\n";
+#print "## $format\n";
while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {
-print "## [ $1 | $2 | $3 ] $format\n";
+#print "## [ $1 | $2 | $3 ] $format\n";
my $del = $1 || '';
- $prefix ||= $del;
+ $prefix ||= $del if ($all_found == 0);
my $found = 0;
my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
@@ -374,14 +461,86 @@
return if (! $all_found);
- print Dumper($prefix, \@out);
-
my $out = join('',@out) . $format;
# add prefix if not there
$out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
+ if ($eval_code) {
+ my $eval = $self->fill_in($rec,$eval_code,$i);
+ return if (! eval $eval);
+ }
+
return $out;
}
+=head2 data_structure
+
+Create in-memory data structure which represents layout from C.
+It is used later to produce output.
+
+ my $ds = $webpac->data_structure($rec);
+
+=cut
+
+# private method _sort_by_order
+# sort subrouting using order="" attribute
+sub _sort_by_order {
+ my $self = shift;
+
+ my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
+ $self->{'import_xml'}->{'indexer'}->{$a};
+ my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
+ $self->{'import_xml'}->{'indexer'}->{$b};
+
+ return $va <=> $vb;
+}
+
+sub data_structure {
+ my $self = shift;
+
+ my $rec = shift;
+ confess("need HASH as first argument!") if ($rec !~ /HASH/o);
+
+ my @sorted_tags;
+ if ($self->{tags_by_order}) {
+ @sorted_tags = @{$self->{tags_by_order}};
+ } else {
+ @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
+ $self->{tags_by_order} = \@sorted_tags;
+ }
+
+ my $ds;
+
+ foreach my $field (@sorted_tags) {
+
+ my $row;
+ my $i = 0;
+
+#print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
+
+ foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
+
+ my $v = $self->parse($rec,$tag->{'content'},$i);
+print "## $i:",$tag->{'content'}," = ",($v || 'null'),"\n";
+
+ next if (!$v || $v && $v eq '');
+
+ # does tag have type?
+ if ($tag->{'type'}) {
+ push @{$row->{$tag->{'type'}}}, $v;
+ } else {
+ push @{$row->{'display'}}, $v;
+ push @{$row->{'swish'}}, $v;
+ }
+ }
+
+ push @{$ds->{$field}}, $row if ($row);
+
+ }
+
+ print Dumper($ds);
+
+}
+
1;