--- trunk/all2xml.pl 2003/11/16 22:42:41 153 +++ trunk/all2xml.pl 2003/11/23 15:42:16 170 @@ -61,6 +61,8 @@ 'feed' => 'feed' ); +my $cache; # for cacheing + sub data2xml { use xmlify; @@ -90,12 +92,21 @@ return $va <=> $vb; } - foreach my $field (sort by_order keys %{$config->{indexer}}) { + my @sorted_tags; + if ($cache->{tags_by_order}->{$type}) { + @sorted_tags = @{$cache->{tags_by_order}->{$type}}; + } else { + @sorted_tags = sort by_order keys %{$config->{indexer}}; + $cache->{tags_by_order}->{$type} = \@sorted_tags; + } + + foreach my $field (@sorted_tags) { $field=x($field); $field_usage{$field}++; my $swish_data = ""; + my $swish_exact_data = ""; my $display_data = ""; my $line_delimiter; @@ -109,9 +120,10 @@ my $repeat_off = 0; # repeatable offset - my ($s,$d,$i) = (1,1,0); # swish, display default + my ($s,$se,$d,$i) = (1,0,1,0); # swish, display default $s = 0 if (lc($x->{type}) eq "display"); $d = 0 if (lc($x->{type}) eq "swish"); + $se = 1 if (lc($x->{type}) eq "swish_exact"); ($s,$d,$i) = (0,0,1) if (lc($x->{type}) eq "index"); # what will separate last line from this one? @@ -165,16 +177,21 @@ # filter="name" ; filter this field through # filter/[name].pm my $filter = $x->{filter}; - if ($filter) { + if ($filter && !$cache->{filter_loaded}->{$filter}) { require "filter/".$filter.".pm"; + $cache->{filter_loaded}->{$filter}++; } # type="swish" ; field for swish - if ($s && $swish) { - if ($filter) { + if ($swish) { + if ($filter && ($s || $se)) { no strict 'refs'; - $swish_data .= join(" ",&$filter($swish)); + my $tmp = join(" ",&$filter($swish)) if ($s || $se); + $swish_data .= $tmp if ($s); + $swish_exact_data .= $tmp if ($se); + } else { - $swish_data .= $swish; + $swish_data .= $swish if ($s); + $swish_exact_data .= $swish if ($se); } } @@ -231,6 +248,8 @@ my ($s,$d,$i) = (1,1,0); # swish, display default $s = 0 if (lc($x->{type}) eq "display"); $d = 0 if (lc($x->{type}) eq "swish"); + # no support for swish exact in config. + # IMHO, it's useless ($s,$d,$i) = (0,0,1) if (lc($x->{type}) eq "index"); if ($val) { @@ -273,6 +292,15 @@ $xml .= xmlify($field."_swish", unac_string($codepage,$swish_data)); } + if ($swish_exact_data) { + $swish_exact_data =~ s/ +/ /g; + $swish_exact_data =~ s/ +$//g; + + # add delimiters before and after word. + # That is required to produce exact match + $xml .= xmlify($field."_swish_exact", unac_string($codepage,'xxbxx '.$swish_exact_data.' xxexx')); + } + } @@ -308,6 +336,11 @@ my $show_progress = $cfg_global->val('global', 'show_progress'); +my $unac_filter = $cfg_global->val('global', 'unac_filter'); +if ($unac_filter) { + require $unac_filter; +} + foreach my $database ($cfg->Sections) { my $type = lc($cfg -> val($database, 'type')) || die "$database doesn't have 'type' defined";