--- trunk/all2xml.pl 2003/11/22 22:04:05 164 +++ trunk/all2xml.pl 2003/11/24 01:19:15 177 @@ -9,6 +9,9 @@ use Text::Iconv; use Config::IniFiles; use Encode; +#use GDBM_File; +use Fcntl; # for O_RDWR +use TDB_File; $|=1; @@ -61,6 +64,14 @@ 'feed' => 'feed' ); +my $cache; # for cacheing + +# lookup hash (tied to file) +my %lhash; +# this option will cache all lookup entries in memory. +# if you are tight on memory, turn this off +my $use_lhash_cache = 1; + sub data2xml { use xmlify; @@ -90,7 +101,18 @@ return $va <=> $vb; } - foreach my $field (sort by_order keys %{$config->{indexer}}) { + my @sorted_tags; + if ($cache->{tags_by_order}->{$type}) { + @sorted_tags = @{$cache->{tags_by_order}->{$type}}; + } else { + @sorted_tags = sort by_order keys %{$config->{indexer}}; + $cache->{tags_by_order}->{$type} = \@sorted_tags; + } + + # lookup key + my $lookup_key; + + foreach my $field (@sorted_tags) { $field=x($field); $field_usage{$field}++; @@ -110,11 +132,15 @@ my $repeat_off = 0; # repeatable offset - my ($s,$se,$d,$i) = (1,0,1,0); # swish, display default + # swish, swish_exact, display, index, index_lookup + # swish and display defaults + my ($s,$se,$d,$i,$il) = (1,0,1,0,0); $s = 0 if (lc($x->{type}) eq "display"); $d = 0 if (lc($x->{type}) eq "swish"); $se = 1 if (lc($x->{type}) eq "swish_exact"); ($s,$d,$i) = (0,0,1) if (lc($x->{type}) eq "index"); + $il = 1 if (lc($x->{type}) =~ /^lookup/); + # what will separate last line from this one? if ($display_data && $x->{append} && $x->{append} eq "1") { @@ -128,7 +154,6 @@ # placeholder for all repeatable entries for index my @index_data; - my $index_filter; sub mkformat { my $x = shift || die "mkformat needs tag reference"; @@ -163,12 +188,35 @@ print STDERR "loop (more than 1000 repeatable fields) deteced in $row, $format\n"; last; } - + + # is this field is lookup? + if ($display && $x->{lookup}) { + if ($use_lhash_cache) { + if (!defined($cache->{lhash}->{$display})) { + my $new_display = $lhash{$display}; + if ($new_display) { +#print STDERR "lookup cache store '$display' = '$new_display'\n"; + $display = $new_display; + $cache->{lhash}->{$display} = $new_display; + } else { + print STDERR "WARNING: lookup for '$display' didn't find anything.\n"; + $display = ""; + $cache->{lhash}->{$display} = ""; + } + } else { + $display = $cache->{lhash}->{$display}; + } + } else { + $display = $lhash{$display} || ""; + } + } + # filter="name" ; filter this field through # filter/[name].pm my $filter = $x->{filter}; - if ($filter) { + if ($filter && !$cache->{filter_loaded}->{$filter}) { require "filter/".$filter.".pm"; + $cache->{filter_loaded}->{$filter}++; } # type="swish" ; field for swish if ($swish) { @@ -208,24 +256,39 @@ # type="index" ; insert into index if ($i && $display) { - push @index_data, $display; - $index_filter = $filter if ($filter); + if ($filter) { + no strict 'refs'; + $display = &$filter($display); + } + if ($x->{append} && @index_data) { + $index_data[$#index_data].=$display; + } else { + push @index_data, $display; + } } - } - # fill data in index - if (@index_data) { - if ($index_filter) { - no strict 'refs'; - foreach my $d (@index_data) { - $index->insert($field, &$index_filter($d), $path); - } - } else { - foreach my $d (@index_data) { - $index->insert($field, $d, $path); + # store fields in lookup + if ($il && $display) { + if (lc($x->{type}) eq "lookup_key") { + if ($lookup_key) { + print STDERR "WARNING: try to redefine lookup_key (keys shouldn't be repeatable fields!)"; + } else { + $lookup_key = $display; + } + } elsif (lc($x->{type}) eq "lookup_val") { + if ($lookup_key) { + $lhash{$lookup_key} = $display; + } else { + print STDERR "WARNING: no lookup_key defined for '$display'?"; + } } } } + + # fill data in index + foreach my $d (@index_data) { + $index->insert($field, $d, $path); + } } # now try to parse variables from configuration file @@ -335,6 +398,22 @@ my $type = lc($cfg -> val($database, 'type')) || die "$database doesn't have 'type' defined"; my $add_xml = $cfg -> val($database, 'xml'); # optional + # create new lookup file + my $lookup_file = $cfg -> val($database, 'lookup_newfile'); # optional + if ($lookup_file) { + #tie %lhash, 'GDBM_File', $lookup_file, &GDBM_NEWDB, 0644; + tie %lhash, 'TDB_File', $lookup_file, TDB_CLEAR_IF_FIRST, O_RDWR, 0644; + print STDERR "creating lookup file '$lookup_file'\n"; + } + + # open existing lookup file + $lookup_file = $cfg -> val($database, 'lookup_open'); # optional + if ($lookup_file) { + #tie %lhash, 'GDBM_File', $lookup_file, &GDBM_READER, 0644; + tie %lhash, 'TDB_File', $lookup_file, TDB_DEFAULT, O_RDWR, 0644; + print STDERR "opening lookup file '$lookup_file'\n"; + } + print STDERR "reading ./import_xml/$type.xml\n"; # extract just type basic @@ -585,6 +664,8 @@ fakeprogress($i); } + # close lookup + untie %lhash if (%lhash); } }