--- trunk2/all2all.pl 2004/11/01 22:15:41 579 +++ trunk2/all2all.pl 2005/07/13 23:38:04 708 @@ -15,7 +15,8 @@ use lib './lib'; use WebPAC; -use WebPAC::jsFind; +#use WebPAC::jsFind; +use WebPAC::HyperEstraier; use WebPAC::Index; use WebPAC::Tree; @@ -23,7 +24,8 @@ # my $code_page = 'ISO-8859-2'; my ($limit_mfn, $start_mfn, $debug, $low_mem); -my $index_path = './out/index'; +#my $index_path = './out/index'; +my $index_path = './out/casket'; my $result = GetOptions( "code_page=s" => \$code_page, @@ -104,12 +106,15 @@ my $log = $webpac->_get_logger() || die "can't get logger"; -$log->debug("creating WebPAC::jsFind object"); +#$log->debug("creating WebPAC::jsFind object"); +# +#my $index = new WebPAC::jsFind( +# index_path => $index_path, +# keys => 62, +#) || die; -my $index = new WebPAC::jsFind( - index_path => $index_path, - keys => 62, -) || die; +$log->debug("creating HyperEstraier object"); +my $index = new WebPAC::HyperEstraier( index_path => $index_path ) || die; my $thes; @@ -128,6 +133,7 @@ { 'eval' => 'length("v251") == 2 && "v800" =~ m/EUROVOC/ || "v800" =~ m/CROVOC/ && "v251" =~ m/^(H|HD|L|Z|P)$/', 'key' => 'root:v251', 'val' => 'v900' }, { 'eval' => '"v251"', 'key' => 'code:v900', 'val' => 'v561^4:v251' }, { 'eval' => '"v561^4" && "v562^4"', 'key' => 'code:v900', 'val' => 'v561^4:v562^4' }, + { 'key' => 'crovoc:v900', 'val' => 'filter{CROVOC}v800' }, ], ); @@ -178,7 +184,7 @@ # strip all non word characters from beginning or end # of word - my $words = join(" ",@{$ds->{'swish'}}); + my $words = unac(join(" ",@{$ds->{'swish'}})); $words =~ s/^\W+//; $words =~ s/\W*\s+\W*/ /g; $words =~ s/\W+$//; @@ -195,7 +201,7 @@ #path => $f, path => $webpac->mfn, headline => $h, - words => unac($words), + words => $words, ); }