--- trunk/all2xml.pl 2003/06/25 12:09:27 56 +++ trunk/all2xml.pl 2003/07/04 20:11:48 62 @@ -18,7 +18,8 @@ my $config; -use index_DBI; # there is no other, right now ;-) +#use index_DBI; # default DBI module for index +use index_DBI_cache; # faster DBI module using memory cache my $index; my %opts; @@ -33,7 +34,7 @@ my $path; # this is name of database -Text::Iconv->raise_error(1); # Conversion errors raise exceptions +Text::Iconv->raise_error(0); # Conversion errors don't raise exceptions # this is encoding of all files on disk, including import_xml/*.xml file and # filter/*.pm files! It will be used to store strings in perl internally! @@ -55,7 +56,8 @@ # format in XML file my %type2tag = ( 'isis' => 'isis', - 'excel' => 'column' + 'excel' => 'column', + 'marc' => 'marc', ); sub data2xml { @@ -65,6 +67,9 @@ my $type = shift @_; my $row = shift @_; my $add_xml = shift @_; + # needed to read values from configuration file + my $cfg = shift @_; + my $database = shift @_; my $xml; @@ -169,6 +174,25 @@ } } + # now try to parse variables from configuration file + foreach my $x (@{$config->{indexer}->{$field}->{'config'}}) { + + my $delimiter = x($x->{delimiter}) || ' '; + my $val = $cfg->val($database, x($x->{content})); + + my ($s,$d,$i) = (1,1,0); # swish, display default + $s = 0 if (lc($x->{type}) eq "display"); + $d = 0 if (lc($x->{type}) eq "swish"); + ($s,$d,$i) = (0,0,1) if (lc($x->{type}) eq "index"); + + if ($val) { + $display_data .= $delimiter.$val if ($d); + $swish_data .= $val if ($s); + $index->insert($field, $val, $path) if ($i); + } + + } + if ($display_data) { @@ -240,14 +264,18 @@ print STDERR "reading ./import_xml/$type.xml\n"; - $config=XMLin("./import_xml/$type.xml", forcearray => [ $type2tag{$type} ], forcecontent => 1); + # extract just type basic + my $type_base = $type; + $type_base =~ s/_.+$//g; + + $config=XMLin("./import_xml/$type.xml", forcearray => [ $type2tag{$type_base}, 'config' ], forcecontent => 1); # output current progress indicator my $last_p = 0; sub progress { #return if (! $opts{q}); # FIXME my $current = shift; - my $total = shift; + my $total = shift || 1; my $p = int($current * 100 / $total); if ($p != $last_p) { printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$current,$total,"=" x ($p/2).">", $p ); @@ -258,7 +286,8 @@ # now read database print STDERR "using: $type...\n"; - if ($type eq "isis") { + if ($type_base eq "isis") { + my $isis_db = $cfg -> val($database, 'isis_db') || die "$database doesn't have 'isis_db' defined!"; $import2cp = Text::Iconv->new($config->{isis_codepage},$codepage); @@ -278,7 +307,7 @@ my $swishpath = $path."#".int($row->{mfn}); - if (my $xml = data2xml($type,$row,$add_xml)) { + if (my $xml = data2xml($type_base,$row,$add_xml,$cfg,$database)) { $xml = $cp2utf->convert($xml); use bytes; # as opposed to chars print "Path-Name: $swishpath\n"; @@ -289,7 +318,7 @@ } print STDERR "\n"; - } elsif ($type eq "excel") { + } elsif ($type_base eq "excel") { use Spreadsheet::ParseExcel; use Spreadsheet::ParseExcel::Utility qw(int2col); @@ -335,7 +364,49 @@ next if (! $row); - if (my $xml = data2xml($type,$row,$add_xml)) { + if (my $xml = data2xml($type_base,$row,$add_xml,$cfg,$database)) { + $xml = $cp2utf->convert($xml); + use bytes; # as opposed to chars + print "Path-Name: $swishpath\n"; + print "Content-Length: ".(length($xml)+1)."\n"; + print "Document-Type: XML\n\n$xml\n"; + } + } + } elsif ($type_base eq "marc") { + ## XXX + use MARC; + + $import2cp = Text::Iconv->new($config->{marc_codepage},$codepage); + my $marc_file = $cfg -> val($database, 'marc_file') || die "$database doesn't have 'marc_file' defined!"; + + # optional argument is format + my $format = x($config->{format}) || 'usmarc'; + + my %id_stored; # to aviod duplicates + + print STDERR "Reading MARC file '$marc_file'\n"; + + my $marc = new MARC; + my $nr = $marc->openmarc({ + file=>$marc_file, format=>$format + }) || die "Can't open MARC file '$marc_file'"; + + my $i=0; # record nr. + my $inc=1; + my $max_i=1000; + + my $rec; + + while ($marc->nextmarc(1)) { + + # XXX + progress($i, $max_i); + $i += $inc; + $inc = -$inc if ($i > $max_i || $i < 0); + + my $swishpath = $database."#".$i; + + if (my $xml = data2xml($type_base,$marc,$add_xml,$cfg,$database)) { $xml = $cp2utf->convert($xml); use bytes; # as opposed to chars print "Path-Name: $swishpath\n";