--- branches/cpi/all2xml.pl 2006/04/13 19:47:32 731 +++ branches/cpi/all2xml.pl 2008/08/03 06:33:56 776 @@ -1,16 +1,16 @@ #!/usr/bin/perl -w use strict; -use Biblio::Isis; +use Biblio::Isis 0.23; use Getopt::Std; use Data::Dumper; use XML::Simple; use Text::Iconv; use Config::IniFiles; use Encode; -#use GDBM_File; +use GDBM_File; use Fcntl; # for O_RDWR -use TDB_File; +#use TDB_File; use Carp; $|=1; @@ -76,6 +76,9 @@ my $last_field_name; # cache to prevent repeated fields +my $broken_cdata = XMLin(']]>') eq '>'; +warn "XML::Simple on this system seems broken with .\n" if ($broken_cdata); + sub data2xml { use xmlify; @@ -207,6 +210,7 @@ delete $x->{value}; delete $x->{delimiter}; $x->{content} = $v; + $d =~ s#>$## if ($d && $broken_cdata); $x->{delimiter} = $d; } return $x; @@ -488,7 +492,7 @@ if ($val) { $display_data .= $delimiter.$val if ($d); $swish_data .= " ".$val if ($s); - $index->insert($field, $val, $path) if ($i); + $index->insert($field, $val, $val, $path) if ($i); } if ($iterate_by_page) { @@ -669,12 +673,12 @@ # create new lookup file my $lookup_file = $cfg -> val($database, 'lookup_newfile'); # optional if ($lookup_file) { - #tie %lhash, 'GDBM_File', $lookup_file, &GDBM_NEWDB, 0644; if (! -e $lookup_file) { open(LOOKUP, "> $lookup_file") || die "can't create $lookup_file': $!"; close(LOOKUP); } - tie %lhash, 'TDB_File', $lookup_file, TDB_CLEAR_IF_FIRST, O_RDWR, 0644; + tie %lhash, 'GDBM_File', $lookup_file, &GDBM_NEWDB, 0644; + #tie %lhash, 'TDB_File', $lookup_file, TDB_CLEAR_IF_FIRST, O_RDWR, 0644; print STDERR "creating lookup file '$lookup_file'\n"; # delete memory cache for lookup file delete $cache->{lhash}; @@ -683,8 +687,8 @@ # open existing lookup file $lookup_file = $cfg -> val($database, 'lookup_open'); # optional if ($lookup_file) { - #tie %lhash, 'GDBM_File', $lookup_file, &GDBM_READER, 0644; - tie %lhash, 'TDB_File', $lookup_file, TDB_DEFAULT, O_RDWR, 0644; + tie %lhash, 'GDBM_File', $lookup_file, &GDBM_READER, 0644; + #tie %lhash, 'TDB_File', $lookup_file, TDB_DEFAULT, O_RDWR, 0644; print STDERR "opening lookup file '$lookup_file'\n"; } @@ -705,6 +709,13 @@ my $tag = $cfg->val($database, 'import_xml_tag') || $type2tag{$type_base} || die "can't find which tag to use for type $type"; $config=XMLin($import_xml_file, ForceArray => [ $tag, 'config', 'format' ], ForceContent => 1 ); + # check for broken XML::Simple + if ( $broken_cdata ) { + map { + $config->{format}->{$_}->{content} =~ s#>$##; + } keys %{ $config->{format} }; + } + # helper for progress bar sub fmt_time { my $t = shift || 0; @@ -774,7 +785,10 @@ my $isis_db = $cfg -> val($database, 'isis_db') || die "$database doesn't have 'isis_db' defined!"; $import2cp = Text::Iconv->new($config->{isis_codepage},$codepage); - my $db = new Biblio::Isis( isisdb => $isis_db ); + my $db = new Biblio::Isis( + isisdb => $isis_db, + join_subfields_with => ' ; ', + ); if (! $db) { print STDERR "FATAL: can't read ISIS database: $isis_db, skipping...\n";