--- trunk2/all2all.pl	2004/06/16 11:29:37	353
+++ trunk2/all2all.pl	2004/09/08 15:30:07	415
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
-all2all.pl - basic script for all WebPac needs
+all2all.pl - basic script for all WebPAC needs
 
 =cut
 
@@ -11,45 +11,132 @@
 use Carp;
 
 use lib './lib';
-use WebPac;
+use WebPAC;
+use WebPAC::jsFind;
+use WebPAC::Index;
 
-my $webpac = new WebPac(
+my $webpac = new WebPAC(
 	code_page => 'ISO-8859-2',
+	limit_mfn => 500,
+#	debug => 1,
 ) || die;
 
+my $log = $webpac->_get_logger();
+
+my $index = new WebPAC::jsFind(
+	index_path => './out/index',
+	keys => 10,
+) || die;
+
+my $thes;
+
 $|=1;
 
-print "reading database\n";
 my $maxmfn = $webpac->open_isis(
-	filename => shift @ARGV || '/data/hidra/THS-500/THS',
+	filename => shift @ARGV || '/data/hidra/THS/THS',
 	lookup => [
 	{ 'key' => 'd:v900', 'val' => 'v250^a' },
 #	{ 'eval' => '"v901^a" eq "Područje"', 'key' => 'pa:v561^4:v562^4:v461^1', 'val' => 'v900' },
 #	{ 'eval '=> '"v901^a" eq "Mikrotezaurus"', 'key' => 'a:v561^4:v562^4:v461^1', 'val' => 'v900' },
 #	{ 'eval' => '"v901^a" eq "Deskriptor"', 'key' => 'a:v561^4:v562^4:v461^1', 'val' => 'v900' },
 	{ 'key' => 'a:v561^4:v562^4:v461^1', 'val' => 'v900' },
+	{ 'key' => '900_mfn:v900', 'val' => 'v000' },
 	],
 );
 
-print "rows: $maxmfn\n\n";
+$log->info("rows: $maxmfn");
+
+$webpac->open_import_xml(type => 'isis_hidra_ths');
 
-for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
-	my $rec = $webpac->{'data'}->{$mfn} || die "no record with mfn $mfn";
+while (my $rec = $webpac->fetch_rec) {
 
-	print "-- ",$webpac->fill_in($rec,'v250^a (v901^a)'),"\n";
+	my @ds = $webpac->data_structure($rec);
 
-	my @t = $webpac->fill_in($rec,'v553^1;;v553^a');
-	print " Uži pojam: ",join("\t\n",@t),"\n" if (@t);
+	if (0 && $log->is_debug) {
+		$log->debug("rec = ",Dumper($rec));
+		$log->debug("ds = ",Dumper(\@ds));
+	}
+
+	next if (! @ds);
+
+	my $filename = $webpac->{'current_filename'};
+
+	if ($filename) {
+		$webpac->output_file(
+			file => $filename,
+			template => 'html.tt',
+			data => \@ds,
+			headline => $webpac->{'headline'},
+		);
+	} else {
+		print $webpac->output(
+			template => 'text.tt',
+			data => \@ds,
+			headline => $webpac->{'headline'},
+		);
+	}
+
+	my $headline = $webpac->{'headline'};
+
+	my $f = $filename;
+	$f =~ s!out/!!;
+
+	# save into index
+	foreach my $ds (@ds) {
+		next if (! $ds->{'swish'});
+
+		$index->insert(
+			index_name => $ds->{'tag'},
+			path => $f,
+			headline => $headline,
+			words => join(" ",@{$ds->{'swish'}})
+		);
+	}
+
+	# save into sorted index (thesaurus)
+	foreach my $ds (@ds) {
+		next if (! $ds->{'index'});
+
+		$thes->{$ds->{'tag'}} ||= new WebPAC::Index;
+
+		foreach my $h (@{$ds->{'index'}}) {
+			$thes->{$ds->{'tag'}}->insert(
+				path => $f,
+				headline => $h,
+			);
+		}
+	}
 
-	@t = $webpac->fill_in($rec,'[a:v251::];;[d:[a:v251::]]');
-	print " Područje: ",join("\t\n",@t),"\n" if (@t);
+#	print Dumper(\@ds);
 
-	@t = $webpac->fill_in($rec,'[a:v561^4:v251:];;[d:[a:v561^4:v251:]]');
-	print " Mikrotezaurus: ",join("\t\n",@t),"\n" if (@t);
+}
+
+foreach my $t (keys %{$thes}) {
 
-	@t = $webpac->fill_in($rec,'[a:v561^4:v562^4:v900];;[d:[a:v561^4:v562^4:v900]]');
-	print " Deskriptor: ",join("\t\n",@t),"\n" if (@t);
+	my @e = $thes->{$t}->elements;
+	if (! @e) {
+		$log->logwarn("no elements in sorted index $t?");
+		next;
+	}
+
+	my $file = "./out/bfilter/$t.txt";
+	$log->info("saving sorted index $t to '$file' [".scalar(@e)." elements]");
+
+	$webpac->output_file(
+		file => $file,
+		template => 'index.tt',
+		data => \@e,
+		index_name => $t,
+	);
 }
 
-print "## lookup ",Dumper($webpac->{'lookup'});
-print "## data ",Dumper($webpac->{'data'});
+if (0 && $log->is_debug) {
+	$log->debug("lookup hash: ",Dumper($webpac->{'lookup'}));
+	$log->debug("data hash: ",Dumper($webpac->{'data'}));
+	foreach my $t (keys %{$thes}) {
+		$log->debug("thesaurus $t hash: ",Dumper($thes->{$t}));
+	}
+}
+
+$index->close;
+