--- branches/tehnika/all2xml.pl 2004/03/14 19:44:57 286 +++ branches/tehnika/all2xml.pl 2004/09/21 16:53:44 458 @@ -5,7 +5,6 @@ use Getopt::Std; use Data::Dumper; use XML::Simple; -use Text::Unaccent 1.02; # 1.01 won't compile on my platform, use Text::Iconv; use Config::IniFiles; use Encode; @@ -17,6 +16,7 @@ my $config_file = $0; $config_file =~ s/\.pl$/.conf/; +$config_file = $ARGV[0] if ($ARGV[0] && -f $ARGV[0]); die "FATAL: can't find configuration file '$config_file'" if (! -e $config_file); my $config; @@ -245,7 +245,7 @@ # what will separate last line from this one? if ($display_data && $x->{append}) { - $line_delimiter = ' '; + $line_delimiter = $delimiter; } elsif ($display_data) { $line_delimiter = '
'; } @@ -525,7 +525,7 @@ $swish_data =~ s/ +/ /g; $swish_data =~ s/ +$//g; - $xml .= xmlify($field."_swish", unac_string($codepage,$swish_data)); + $xml .= xmlify($field."_swish", my_unac_string($codepage,$swish_data)); } my $swish_exact_data = $cache->{swish_exact_data}->{$field}->[$page]; @@ -535,7 +535,7 @@ # add delimiters before and after word. # That is required to produce exact match - $xml .= xmlify($field."_swish_exact", unac_string($codepage,$swish_exact_data)); + $xml .= xmlify($field."_swish_exact", my_unac_string($codepage,$swish_exact_data)); } my $idel = $cache->{index_delimiter}->{$field}; @@ -568,7 +568,7 @@ $swish_data =~ s/ +/ /g; $swish_data =~ s/ +$//g; - $xml .= xmlify($field."_swish", unac_string($codepage,$swish_data)); + $xml .= xmlify($field."_swish", my_unac_string($codepage,$swish_data)); } if ($swish_exact_data) { @@ -577,7 +577,7 @@ # add delimiters before and after word. # That is required to produce exact match - $xml .= xmlify($field."_swish_exact", unac_string($codepage,$swish_exact_data)); + $xml .= xmlify($field."_swish_exact", my_unac_string($codepage,$swish_exact_data)); } } } @@ -614,9 +614,18 @@ my $show_progress = $cfg_global->val('global', 'show_progress'); -my $unac_filter = $cfg_global->val('global', 'unac_filter'); -if ($unac_filter) { - require $unac_filter; +my $my_unac_filter = $cfg_global->val('global', 'my_unac_filter'); +if ($my_unac_filter) { + print STDERR "using $my_unac_filter to filter characters for search\n"; + require $my_unac_filter; +} else { + print STDERR "### fallback to default my_unac_string!\n"; + eval q{ + sub main::my_unac_string($$) { + my ($charset, $string) = (@_); + return $string; + } + }; } foreach my $database ($cfg->Sections) { @@ -628,6 +637,10 @@ my $lookup_file = $cfg -> val($database, 'lookup_newfile'); # optional if ($lookup_file) { #tie %lhash, 'GDBM_File', $lookup_file, &GDBM_NEWDB, 0644; + if (! -e $lookup_file) { + open(LOOKUP, "> $lookup_file") || die "can't create $lookup_file': $!"; + close(LOOKUP); + } tie %lhash, 'TDB_File', $lookup_file, TDB_CLEAR_IF_FIRST, O_RDWR, 0644; print STDERR "creating lookup file '$lookup_file'\n"; # delete memory cache for lookup file @@ -918,6 +931,10 @@ all2xml.pl - read various file formats and dump XML for SWISH-E +=head1 SYNOPSYS + + $ all2xml.pl [test.conf] + =head1 DESCRIPTION This command will read ISIS data file using OpenIsis perl module, MARC @@ -926,6 +943,9 @@ this script B from isis files (isis allready has something like that). Output of this script is tailor-made for SWISH-E. +If no configuration file is specified, it will use default one called +C. + =head1 BUGS Documentation is really lacking. However, in true Open Source spirit, source