--- trunk2/lib/WebPAC.pm 2004/06/16 11:41:50 355
+++ trunk2/lib/WebPAC.pm 2004/06/16 20:05:19 363
@@ -3,6 +3,9 @@
use Carp;
use Text::Iconv;
use Config::IniFiles;
+use XML::Simple;
+
+use Data::Dumper;
=head1 NAME
@@ -32,6 +35,15 @@
=cut
+# mapping between data type and tag which specify
+# format in XML file
+my %type2tag = (
+ 'isis' => 'isis',
+# 'excel' => 'column',
+# 'marc' => 'marc',
+# 'feed' => 'feed'
+);
+
sub new {
my $class = shift;
my $self = {@_};
@@ -65,18 +77,6 @@
$self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
- # read global config parametars
- foreach my $var (qw(
- dbi_dbd
- dbi_dsn
- dbi_user
- dbi_passwd
- show_progress
- my_unac_filter
- )) {
- $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
- }
-
return $self;
}
@@ -125,10 +125,16 @@
# create Text::Iconv object
my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
+ print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'});
+
my $isis_db = OpenIsis::open($arg->{'filename'});
my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
+ $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
+
+ print STDERR "processing $maxmfn records...\n" if ($self->{'debug'});
+
# read database
for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
@@ -161,10 +167,67 @@
}
+ $self->{'current_mfn'} = 1;
+
# store max mfn and return it.
return $self->{'max_mfn'} = $maxmfn;
}
+=head2 fetch_rec
+
+Fetch next record from database. It will also display progress bar (once
+it's implemented, that is).
+
+ my $rec = $webpac->fetch_rec;
+
+=cut
+
+sub fetch_rec {
+ my $self = shift;
+
+ my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!";
+
+ if ($mfn > $self->{'max_mfn'}) {
+ $self->{'current_mfn'} = $self->{'max_mfn'};
+ return;
+ }
+
+ return $self->{'data'}->{$mfn};
+}
+
+=head2 open_import_xml
+
+Read file from C directory and parse it.
+
+ $webpac->open_import_xml(type => 'isis');
+
+=cut
+
+sub open_import_xml {
+ my $self = shift;
+
+ my $arg = {@_};
+ confess "need type to load file from import_xml/" if (! $arg->{'type'});
+
+ my $type = $arg->{'type'};
+
+ my $type_base = $type;
+ $type_base =~ s/_.*$//g;
+
+ my $f = "./import_xml/$type.xml";
+ confess "import_xml file '$f' doesn't exist!" if (! -e "$f");
+
+ print STDERR "reading '$f'\n" if ($self->{'debug'});
+
+ $self->{'import_xml'} = XMLin($f,
+ ForceArray => [ $type2tag{$type_base}, 'config', 'format' ],
+ ForceContent => 1
+ );
+
+ print Dumper($self->{'import_xml'});
+
+}
+
=head2 create_lookup
Create lookup from record using lookup definition.
@@ -195,6 +258,42 @@
}
}
+=head2 get_data
+
+Returns value from record.
+
+ $self->get_data(\$rec,$f,$sf,$i,\$found);
+
+Arguments are:
+record reference C<$rec>,
+field C<$f>,
+optional subfiled C<$sf>,
+index for repeatable values C<$i>.
+
+Optinal variable C<$found> will be incremeted if thre
+is field.
+
+Returns value or empty string.
+
+=cut
+
+sub get_data {
+ my $self = shift;
+
+ my ($rec,$f,$sf,$i,$found) = @_;
+ if ($$rec->{$f}) {
+ if ($sf && $$rec->{$f}->[$i]->{$sf}) {
+ $$found++ if (defined($$found));
+ return $$rec->{$f}->[$i]->{$sf};
+ } elsif ($$rec->{$f}->[$i]) {
+ $$found++ if (defined($$found));
+ return $$rec->{$f}->[$i];
+ }
+ } else {
+ return '';
+ }
+}
+
=head2 fill_in
Workhourse of all: takes record from in-memory structure of database and
@@ -228,33 +327,18 @@
my $found = 0;
- # get field with subfield
- sub get_sf {
- my ($found,$rec,$f,$sf,$i) = @_;
- if ($$rec->{$f} && $$rec->{$f}->[$i]->{$sf}) {
- $$found++;
- return $$rec->{$f}->[$i]->{$sf};
- } else {
- return '';
- }
- }
-
- # get field (without subfield)
- sub get_nosf {
- my ($found,$rec,$f,$i) = @_;
- if ($$rec->{$f} && $$rec->{$f}->[$i]) {
- $$found++;
- return $$rec->{$f}->[$i];
- } else {
- return '';
- }
- }
+ my $eval_code;
+ # remove eval{...} from beginning
+ $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
# do actual replacement of placeholders
- $format =~ s/v(\d+)\^(\w)/get_sf(\$found,\$rec,$1,$2,$i)/ges;
- $format =~ s/v(\d+)/get_nosf(\$found,\$rec,$1,$i)/ges;
+ $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
if ($found) {
+ if ($eval_code) {
+ my $eval = $self->fill_in($rec,$eval_code,$i);
+ return if (! eval $eval);
+ }
# do we have lookups?
if ($format =~ /\[[^\[\]]+\]/o) {
return $self->lookup($format);
@@ -310,4 +394,64 @@
}
}
+=head2 parse
+
+Perform smart parsing of string, skipping delimiters for fields which aren't
+defined. It can also eval code in format starting with C and
+return output or nothing depending on eval code.
+
+ $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
+
+=cut
+
+sub parse {
+ my $self = shift;
+
+ my ($rec, $format, $i) = @_;
+
+ confess("need HASH as first argument!") if ($rec !~ /HASH/o);
+
+ $i = 0 if (! $i);
+
+ my @out;
+
+ my $eval_code;
+ # remove eval{...} from beginning
+ $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
+
+ my $prefix;
+ my $all_found=0;
+
+#print "## $format\n";
+ while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {
+#print "## [ $1 | $2 | $3 ] $format\n";
+
+ my $del = $1 || '';
+ $prefix ||= $del if ($all_found == 0);
+
+ my $found = 0;
+ my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
+
+ if ($found) {
+ push @out, $del;
+ push @out, $tmp;
+ $all_found += $found;
+ }
+ }
+
+ return if (! $all_found);
+
+ my $out = join('',@out) . $format;
+
+ # add prefix if not there
+ $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
+
+ if ($eval_code) {
+ my $eval = $self->fill_in($rec,$eval_code,$i);
+ return if (! eval $eval);
+ }
+
+ return $out;
+}
+
1;