--- branches/cpi/all2xml.pl 2006/04/13 19:47:32 731
+++ branches/cpi/all2xml.pl 2008/08/03 06:33:56 776
@@ -1,16 +1,16 @@
#!/usr/bin/perl -w
use strict;
-use Biblio::Isis;
+use Biblio::Isis 0.23;
use Getopt::Std;
use Data::Dumper;
use XML::Simple;
use Text::Iconv;
use Config::IniFiles;
use Encode;
-#use GDBM_File;
+use GDBM_File;
use Fcntl; # for O_RDWR
-use TDB_File;
+#use TDB_File;
use Carp;
$|=1;
@@ -76,6 +76,9 @@
my $last_field_name; # cache to prevent repeated fields
+my $broken_cdata = XMLin(']]>') eq '>';
+warn "XML::Simple on this system seems broken with .\n" if ($broken_cdata);
+
sub data2xml {
use xmlify;
@@ -207,6 +210,7 @@
delete $x->{value};
delete $x->{delimiter};
$x->{content} = $v;
+ $d =~ s#>$## if ($d && $broken_cdata);
$x->{delimiter} = $d;
}
return $x;
@@ -488,7 +492,7 @@
if ($val) {
$display_data .= $delimiter.$val if ($d);
$swish_data .= " ".$val if ($s);
- $index->insert($field, $val, $path) if ($i);
+ $index->insert($field, $val, $val, $path) if ($i);
}
if ($iterate_by_page) {
@@ -669,12 +673,12 @@
# create new lookup file
my $lookup_file = $cfg -> val($database, 'lookup_newfile'); # optional
if ($lookup_file) {
- #tie %lhash, 'GDBM_File', $lookup_file, &GDBM_NEWDB, 0644;
if (! -e $lookup_file) {
open(LOOKUP, "> $lookup_file") || die "can't create $lookup_file': $!";
close(LOOKUP);
}
- tie %lhash, 'TDB_File', $lookup_file, TDB_CLEAR_IF_FIRST, O_RDWR, 0644;
+ tie %lhash, 'GDBM_File', $lookup_file, &GDBM_NEWDB, 0644;
+ #tie %lhash, 'TDB_File', $lookup_file, TDB_CLEAR_IF_FIRST, O_RDWR, 0644;
print STDERR "creating lookup file '$lookup_file'\n";
# delete memory cache for lookup file
delete $cache->{lhash};
@@ -683,8 +687,8 @@
# open existing lookup file
$lookup_file = $cfg -> val($database, 'lookup_open'); # optional
if ($lookup_file) {
- #tie %lhash, 'GDBM_File', $lookup_file, &GDBM_READER, 0644;
- tie %lhash, 'TDB_File', $lookup_file, TDB_DEFAULT, O_RDWR, 0644;
+ tie %lhash, 'GDBM_File', $lookup_file, &GDBM_READER, 0644;
+ #tie %lhash, 'TDB_File', $lookup_file, TDB_DEFAULT, O_RDWR, 0644;
print STDERR "opening lookup file '$lookup_file'\n";
}
@@ -705,6 +709,13 @@
my $tag = $cfg->val($database, 'import_xml_tag') || $type2tag{$type_base} || die "can't find which tag to use for type $type";
$config=XMLin($import_xml_file, ForceArray => [ $tag, 'config', 'format' ], ForceContent => 1 );
+ # check for broken XML::Simple
+ if ( $broken_cdata ) {
+ map {
+ $config->{format}->{$_}->{content} =~ s#>$##;
+ } keys %{ $config->{format} };
+ }
+
# helper for progress bar
sub fmt_time {
my $t = shift || 0;
@@ -774,7 +785,10 @@
my $isis_db = $cfg -> val($database, 'isis_db') || die "$database doesn't have 'isis_db' defined!";
$import2cp = Text::Iconv->new($config->{isis_codepage},$codepage);
- my $db = new Biblio::Isis( isisdb => $isis_db );
+ my $db = new Biblio::Isis(
+ isisdb => $isis_db,
+ join_subfields_with => ' ; ',
+ );
if (! $db) {
print STDERR "FATAL: can't read ISIS database: $isis_db, skipping...\n";