--- trunk/all2xml.pl 2004/03/14 14:59:43 279 +++ trunk2/all2xml.pl 2004/06/10 19:22:40 337 @@ -5,7 +5,6 @@ use Getopt::Std; use Data::Dumper; use XML::Simple; -use Text::Unaccent 1.02; # 1.01 won't compile on my platform, use Text::Iconv; use Config::IniFiles; use Encode; @@ -17,6 +16,7 @@ my $config_file = $0; $config_file =~ s/\.pl$/.conf/; +$config_file = $ARGV[0] if ($ARGV[0] && -f $ARGV[0]); die "FATAL: can't find configuration file '$config_file'" if (! -e $config_file); my $config; @@ -168,6 +168,8 @@ ($s,$se,$d,$i) = (0,1,0,0); } elsif (lc($type) =~ /^lookup/) { ($s,$se,$d,$i,$il) = (0,1,0,0,1); + } elsif ($type) { + print STDERR "WARNING: unknown type: $type\n"; } return ($s,$se,$d,$i,$il); } @@ -245,7 +247,7 @@ # what will separate last line from this one? if ($display_data && $x->{append}) { - $line_delimiter = ' '; + $line_delimiter = $delimiter; } elsif ($display_data) { $line_delimiter = '
'; } @@ -525,7 +527,7 @@ $swish_data =~ s/ +/ /g; $swish_data =~ s/ +$//g; - $xml .= xmlify($field."_swish", unac_string($codepage,$swish_data)); + $xml .= xmlify($field."_swish", my_unac_string($codepage,$swish_data)); } my $swish_exact_data = $cache->{swish_exact_data}->{$field}->[$page]; @@ -535,7 +537,7 @@ # add delimiters before and after word. # That is required to produce exact match - $xml .= xmlify($field."_swish_exact", unac_string($codepage,$swish_exact_data)); + $xml .= xmlify($field."_swish_exact", my_unac_string($codepage,$swish_exact_data)); } my $idel = $cache->{index_delimiter}->{$field}; @@ -568,7 +570,7 @@ $swish_data =~ s/ +/ /g; $swish_data =~ s/ +$//g; - $xml .= xmlify($field."_swish", unac_string($codepage,$swish_data)); + $xml .= xmlify($field."_swish", my_unac_string($codepage,$swish_data)); } if ($swish_exact_data) { @@ -577,7 +579,7 @@ # add delimiters before and after word. # That is required to produce exact match - $xml .= xmlify($field."_swish_exact", unac_string($codepage,$swish_exact_data)); + $xml .= xmlify($field."_swish_exact", my_unac_string($codepage,$swish_exact_data)); } } } @@ -614,9 +616,18 @@ my $show_progress = $cfg_global->val('global', 'show_progress'); -my $unac_filter = $cfg_global->val('global', 'unac_filter'); -if ($unac_filter) { - require $unac_filter; +my $my_unac_filter = $cfg_global->val('global', 'my_unac_filter'); +if ($my_unac_filter) { + print STDERR "using $my_unac_filter to filter characters for search\n"; + require $my_unac_filter; +} else { + print STDERR "### fallback to default my_unac_string!\n"; + eval q{ + sub main::my_unac_string($$) { + my ($charset, $string) = (@_); + return $string; + } + }; } foreach my $database ($cfg->Sections) { @@ -918,6 +929,10 @@ all2xml.pl - read various file formats and dump XML for SWISH-E +=head1 SYNOPSYS + + $ all2xml.pl [test.conf] + =head1 DESCRIPTION This command will read ISIS data file using OpenIsis perl module, MARC @@ -926,6 +941,9 @@ this script B from isis files (isis allready has something like that). Output of this script is tailor-made for SWISH-E. +If no configuration file is specified, it will use default one called +C. + =head1 BUGS Documentation is really lacking. However, in true Open Source spirit, source