/[webpac]/trunk2/all2all.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk2/all2all.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 490 - (show annotations)
Sat Oct 9 21:44:25 2004 UTC (19 years, 5 months ago) by dpavlin
File MIME type: text/plain
File size: 7980 byte(s)
use locale so that \W (non-word character) won't eat local characters

1 #!/usr/bin/perl -w
2
3 =head1 NAME
4
5 all2all.pl - basic script for all WebPAC needs
6
7 =cut
8
9 use strict;
10 use locale;
11 use YAML;
12 use Carp;
13 use Getopt::Long;
14
15 use lib './lib';
16 use WebPAC;
17 use WebPAC::jsFind;
18 use WebPAC::Index;
19 use WebPAC::Tree;
20
21 # options which can be changed via command line
22 #
23 my $code_page = 'ISO-8859-2';
24 my ($limit_mfn, $start_mfn, $debug, $low_mem);
25 my $index_path = './out/index';
26
27 my $result = GetOptions(
28 "code_page=s" => \$code_page,
29 "limit_mfn=i" => \$limit_mfn,
30 "start_mfn=i" => \$start_mfn,
31 "debug!" => \$debug,
32 "low_mem!" => \$low_mem,
33 );
34
35 # create WebPAC object
36 #
37 my $webpac = new WebPAC(
38 code_page => $code_page,
39 limit_mfn => $limit_mfn,
40 start_mfn => $start_mfn,
41 debug => $debug,
42 low_mem => $low_mem,
43 ) || die;
44
45 my $log = $webpac->_get_logger() || die "can't get logger";
46
47 $log->debug("creating WebPAC::jsFind object");
48
49 my $index = new WebPAC::jsFind(
50 index_path => $index_path,
51 keys => 10,
52 ) || die;
53
54 my $thes;
55
56 $|=1;
57
58 my $maxmfn = $webpac->open_isis(
59 filename => shift @ARGV || '/data/hidra/THS/THS',
60 lookup => [
61 { 'key' => 'd:v900', 'val' => 'v250^a' },
62 # { 'eval' => '"v901^a" eq "Podruèje"', 'key' => 'pa:v561^4:v562^4:v461^1', 'val' => 'v900' },
63 # { 'eval '=> '"v901^a" eq "Mikrotezaurus"', 'key' => 'a:v561^4:v562^4:v461^1', 'val' => 'v900' },
64 # { 'eval' => '"v901^a" eq "Deskriptor"', 'key' => 'a:v561^4:v562^4:v461^1', 'val' => 'v900' },
65 { 'key' => 'a:v561^4:v562^4:v461^1', 'val' => 'v900' },
66 { 'key' => '900_mfn:v900', 'val' => 'v000' },
67 # tree structure
68 { 'eval' => 'length("v251") == 2 && "v800" =~ m/EUROVOC/ || "v800" =~ m/CROVOC/ && "v251" =~ m/^(H|HD|L|Z|P)$/', 'key' => 'root:v251', 'val' => 'v900' },
69 { 'eval' => '"v251"', 'key' => 'code:v900', 'val' => 'v561^4:v251' },
70 { 'eval' => '"v561^4" && "v562^4"', 'key' => 'code:v900', 'val' => 'v561^4:v562^4' },
71 ],
72 );
73
74 $log->debug("isis file ",$webpac->{'isis_filename'}," opened");
75
76 $log->info("rows: $maxmfn");
77
78 $webpac->open_import_xml(type => 'isis_hidra_ths');
79
80 if(1) { # XXX
81
82 while (my $rec = $webpac->fetch_rec) {
83
84 my @ds = $webpac->data_structure($rec);
85
86 if (0 && $log->is_debug) {
87 $log->debug("rec = ",Dump($rec));
88 $log->debug("ds = ",Dump(\@ds));
89 }
90
91 next if (! @ds);
92
93 my $filename = $webpac->{'current_filename'} || $log->logdie("no current_filename in webpac object");
94
95 if ($filename) {
96 $webpac->output_file(
97 file => $filename,
98 template => 'html.tt',
99 data => \@ds,
100 headline => $webpac->{'headline'},
101 );
102 } else {
103 print $webpac->output(
104 template => 'text.tt',
105 data => \@ds,
106 headline => $webpac->{'headline'},
107 );
108 }
109
110 my $headline = $webpac->{'headline'};
111
112 my $f = $filename;
113 $f =~ s!out/!!;
114
115 # save into index
116 foreach my $ds (@ds) {
117 next if (! $ds->{'swish'});
118
119 # strip all non word characters from beginning or end
120 # of word
121 my $words = join(" ",@{$ds->{'swish'}});
122 $words =~ s/^\W+//;
123 $words =~ s/\W*\s+\W*/ /g;
124 $words =~ s/\W+$//;
125
126 $index->insert(
127 index_name => $ds->{'tag'},
128 #path => $f,
129 path => $webpac->mfn,
130 headline => $headline,
131 words => $words,
132 );
133 }
134
135 # save into sorted index (thesaurus)
136 foreach my $ds (@ds) {
137 next if (! $ds->{'index'});
138
139 $thes->{$ds->{'tag'}} ||= new WebPAC::Index;
140
141 foreach my $h (@{$ds->{'index'}}) {
142 $thes->{$ds->{'tag'}}->insert(
143 headline => $h,
144 mfn => $webpac->mfn,
145 );
146 }
147 }
148
149 # print Dump(\@ds);
150
151 }
152
153 foreach my $t (keys %{$thes}) {
154
155 my @e = $thes->{$t}->elements;
156 if (! @e) {
157 $log->logwarn("no elements in sorted index $t?");
158 next;
159 }
160
161 my $file = "./out/bfilter/$t.txt";
162 $log->info("saving sorted index $t to '$file' [".scalar(@e)." elements]");
163
164 $webpac->output_file(
165 file => $file,
166 template => 'index.tt',
167 data => \@e,
168 index_name => $t,
169 );
170 }
171
172 if (0 && $log->is_debug) {
173 $log->debug("lookup hash: ",Dump($webpac->{'lookup'}));
174 $log->debug("data hash: ",Dump($webpac->{'data'}));
175 foreach my $t (keys %{$thes}) {
176 $log->debug("thesaurus $t hash: ",Dump($thes->{$t}));
177 }
178 }
179
180 } # XXX if(0)
181
182 $log->debug("lookup hash: ",Dump($webpac->{'lookup'}));
183
184 $log->info("creating tree");
185
186 #
187 # define tree structure
188 #
189
190 my $l = $webpac->{'lookup'} || $log->logconfess("can't find lookup");
191
192 my @tree = ({
193 # level 0
194 code_arr => sub { sort keys %{$l} },
195 filter_code => sub {
196 my $t = shift;
197 return $t if ($t =~ s/root://);
198 },
199 lookup_v900 => sub { shift @{$l->{"root:".$_[0]}} },
200 lookup_term => sub { shift @{$l->{"d:".$_[1]}} },
201 lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[1]}} },
202 have_children => sub { return $l->{"a:".$_[0]."::"} },
203 have_children_at_level => sub {
204 return unless (defined($l->{"code:".$_[1]}));
205 my $code = shift @{$l->{"code:".$_[1]}};
206 print STDERR "## $_[1] -> $code\n";
207 return undef unless($code);
208 return(9, $l->{"a:$code:"} ) if (defined($l->{"a:$code:"}));
209 },
210 style => 'display: none',
211 },{
212 # 1
213 code_arr => sub { @{$_[0]} },
214 filter_code => sub { shift }, # nop
215 lookup_v900 => sub { shift @{$l->{"code:".$_[0]}} },
216 lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
217 lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
218 have_children => sub { return $l->{"a:".$_[1].":"} },
219 style => 'display: none',
220 },{
221 # 2
222 code_arr => sub { @{$_[0]} },
223 filter_code => sub { shift },
224 lookup_v900 => sub { shift @{$l->{"code:".$_[0]}} },
225 lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
226 lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
227 have_children => sub { return $l->{"a:".$_[1].":".$_[0]} },
228 #style => 'display: none',
229 },{
230 # 3 u¾i pojam
231 code_arr => sub { @{$_[0]} },
232 filter_code => sub { shift },
233 lookup_v900 => sub { shift @{$l->{"code:".$_[0]}} },
234 lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
235 lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
236 have_children => sub { return $l->{"a:".$_[1].":".$_[0]} },
237 },{
238 # 4
239 code_arr => sub { @{$_[0]} },
240 filter_code => sub { shift },
241 lookup_v900 => sub { shift @{$l->{"code:".$_[0]}} },
242 lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
243 lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
244 have_children => sub { return $l->{"a:".$_[1].":".$_[0]} },
245 },{
246 # 5
247 code_arr => sub { @{$_[0]} },
248 filter_code => sub { shift },
249 lookup_v900 => sub { shift @{$l->{"code:".$_[0]}} },
250 lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
251 lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
252 have_children => sub { return $l->{"a:".$_[1].":".$_[0]} },
253 },{
254 # 6
255 code_arr => sub { @{$_[0]} },
256 filter_code => sub { shift },
257 lookup_v900 => sub { shift @{$l->{"code:".$_[0]}} },
258 lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
259 lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
260 have_children => sub { return $l->{"a:".$_[1].":".$_[0]} },
261 },{
262 # 7
263 code_arr => sub { @{$_[0]} },
264 filter_code => sub { shift },
265 lookup_v900 => sub { shift @{$l->{"code:".$_[0]}} },
266 lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
267 lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
268 have_children => sub { return $l->{"a:".$_[1].":".$_[0]} },
269 },{
270 # 8
271 code_arr => sub { @{$_[0]} },
272 filter_code => sub { shift },
273 lookup_v900 => sub { shift @{$l->{"code:".$_[0]}} },
274 lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
275 lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
276 # have_children => sub { return $l->{"a:".$_[1].":".$_[0]} },
277 have_children => sub { 0 },
278 },{
279 # 9 - level which is never reached except explicitly
280 code_arr => sub { @{$_[0]} },
281 filter_code => sub { shift },
282 lookup_v900 => sub { shift @{$l->{"code:".$_[0]}} },
283 lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
284 lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
285 have_children => sub { 0 },
286 have_children_at_level => sub { defined($l->{"a:".$_[1].":".$_[0]}) && return (9,$l->{"a:".$_[1].":".$_[0]}) },
287 },{
288 });
289
290 my $tree = new WebPAC::Tree(
291 dir => './out',
292 html => 'browse.html',
293 template => './output_template/tree.tt',
294 js => 'tree-ids.js',
295 tree => \@tree,
296 );
297
298 $log->info("closing index");
299 $index->close;
300
301 $log->info("elapsed time: ",$webpac->fmt_time(time() - $webpac->{'start_t'}));

Properties

Name Value
cvs2svn:cvs-rev 1.64
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26