13 |
use lib './lib'; |
use lib './lib'; |
14 |
use WebPAC; |
use WebPAC; |
15 |
use WebPAC::jsFind; |
use WebPAC::jsFind; |
16 |
|
use WebPAC::Index; |
17 |
|
|
18 |
my $webpac = new WebPAC( |
my $webpac = new WebPAC( |
19 |
code_page => 'ISO-8859-2', |
code_page => 'ISO-8859-2', |
20 |
# limit_mfn => 100, |
limit_mfn => 500, |
21 |
# debug => 1, |
# debug => 1, |
22 |
) || die; |
) || die; |
23 |
|
|
25 |
|
|
26 |
my $index = new WebPAC::jsFind( |
my $index = new WebPAC::jsFind( |
27 |
index_path => './out/index', |
index_path => './out/index', |
28 |
keys => 80, |
keys => 10, |
29 |
) || die; |
) || die; |
30 |
|
|
31 |
|
my $thes; |
32 |
|
|
33 |
$|=1; |
$|=1; |
34 |
|
|
35 |
my $maxmfn = $webpac->open_isis( |
my $maxmfn = $webpac->open_isis( |
52 |
|
|
53 |
my @ds = $webpac->data_structure($rec); |
my @ds = $webpac->data_structure($rec); |
54 |
|
|
55 |
if ($log->is_debug) { |
if (0 && $log->is_debug) { |
56 |
$log->debug("rec = ",Dumper($rec)); |
$log->debug("rec = ",Dumper($rec)); |
57 |
$log->debug("ds = ",Dumper(\@ds)); |
$log->debug("ds = ",Dumper(\@ds)); |
58 |
} |
} |
62 |
my $filename = $webpac->{'current_filename'}; |
my $filename = $webpac->{'current_filename'}; |
63 |
|
|
64 |
if ($filename) { |
if ($filename) { |
65 |
open(OUT,"> $filename") || $log->logdie("can't open output '$filename': $!"); |
$webpac->output_file( |
66 |
print OUT $webpac->output( |
file => $filename, |
67 |
template => 'html.tt', |
template => 'html.tt', |
68 |
data => \@ds, |
data => \@ds, |
69 |
headline => $webpac->{'headline'}, |
headline => $webpac->{'headline'}, |
70 |
); |
); |
|
close(OUT); |
|
71 |
} else { |
} else { |
72 |
print $webpac->output( |
print $webpac->output( |
73 |
template => 'text.tt', |
template => 'text.tt', |
93 |
); |
); |
94 |
} |
} |
95 |
|
|
96 |
|
# save into sorted index (thesaurus) |
97 |
|
foreach my $ds (@ds) { |
98 |
|
next if (! $ds->{'index'}); |
99 |
|
|
100 |
|
$thes->{$ds->{'tag'}} ||= new WebPAC::Index; |
101 |
|
|
102 |
|
foreach my $h (@{$ds->{'index'}}) { |
103 |
|
$thes->{$ds->{'tag'}}->insert( |
104 |
|
path => $f, |
105 |
|
headline => $h, |
106 |
|
); |
107 |
|
} |
108 |
|
} |
109 |
|
|
110 |
# print Dumper(\@ds); |
# print Dumper(\@ds); |
111 |
|
|
112 |
} |
} |
113 |
|
|
114 |
if ($log->is_debug) { |
foreach my $t (keys %{$thes}) { |
115 |
|
|
116 |
|
my @e = $thes->{$t}->elements; |
117 |
|
if (! @e) { |
118 |
|
$log->logwarn("no elements in sorted index $t?"); |
119 |
|
next; |
120 |
|
} |
121 |
|
|
122 |
|
my $file = "./out/bfilter/$t.txt"; |
123 |
|
$log->info("saving sorted index $t to '$file' [".scalar(@e)." elements]"); |
124 |
|
|
125 |
|
$webpac->output_file( |
126 |
|
file => $file, |
127 |
|
template => 'index.tt', |
128 |
|
data => \@e, |
129 |
|
index_name => $t, |
130 |
|
); |
131 |
|
} |
132 |
|
|
133 |
|
if (0 && $log->is_debug) { |
134 |
$log->debug("lookup hash: ",Dumper($webpac->{'lookup'})); |
$log->debug("lookup hash: ",Dumper($webpac->{'lookup'})); |
135 |
$log->debug("data hash: ",Dumper($webpac->{'data'})); |
$log->debug("data hash: ",Dumper($webpac->{'data'})); |
136 |
|
foreach my $t (keys %{$thes}) { |
137 |
|
$log->debug("thesaurus $t hash: ",Dumper($thes->{$t})); |
138 |
|
} |
139 |
} |
} |
140 |
|
|
141 |
$index->close; |
$index->close; |