/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 358 - (show annotations)
Wed Jun 16 14:31:33 2004 UTC (15 years, 7 months ago) by dpavlin
File size: 8049 byte(s)
format seems to work

1 package WebPAC;
2
3 use Carp;
4 use Text::Iconv;
5 use Config::IniFiles;
6
7 use Data::Dumper;
8
9 =head1 NAME
10
11 WebPAC - base class for WebPAC
12
13 =head1 DESCRIPTION
14
15 This module implements methods used by WebPAC.
16
17 =head1 METHODS
18
19 =head2 new
20
21 This will create new instance of WebPAC using configuration specified by C<config_file>.
22
23 my $webpac = new WebPAC(
24 config_file => 'name.conf',
25 [code_page => 'ISO-8859-2',]
26 );
27
28 Default C<code_page> is C<ISO-8859-2>.
29
30 It will also read configuration files
31 C<global.conf> (used by indexer and Web font-end)
32 and configuration file specified by C<config_file>
33 which describes databases to be indexed.
34
35 =cut
36
37 sub new {
38 my $class = shift;
39 my $self = {@_};
40 bless($self, $class);
41
42 # fill in default values
43 # output codepage
44 $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
45
46 #
47 # read global.conf
48 #
49
50 $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
51
52 # read global config parametars
53 foreach my $var (qw(
54 dbi_dbd
55 dbi_dsn
56 dbi_user
57 dbi_passwd
58 show_progress
59 my_unac_filter
60 )) {
61 $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
62 }
63
64 #
65 # read indexer config file
66 #
67
68 $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
69
70 # read global config parametars
71 foreach my $var (qw(
72 dbi_dbd
73 dbi_dsn
74 dbi_user
75 dbi_passwd
76 show_progress
77 my_unac_filter
78 )) {
79 $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
80 }
81
82 return $self;
83 }
84
85 =head2 open_isis
86
87 Open CDS/ISIS database using OpenIsis module and read all records to memory.
88
89 $webpac->open_isis(
90 filename => '/data/ISIS/ISIS',
91 code_page => '852',
92 limit_mfn => '500',
93 lookup => [ ... ],
94 );
95
96 By default, ISIS code page is assumed to be C<852>.
97
98 If optional parametar C<limit_mfn> is set, it will read just 500 records
99 from database in example above.
100
101 Returns number of last record read into memory (size of database, really).
102
103 C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
104 C<val>. Optional parametar C<eval> is perl code to evaluate before storing
105 value in index.
106
107 lookup => [
108 { 'key' => 'd:v900', 'val' => 'v250^a' },
109 { 'eval' => '"v901^a" eq "Podruèje"',
110 'key' => 'pa:v561^4:v562^4:v461^1',
111 'val' => 'v900' },
112 ]
113
114 =cut
115
116 sub open_isis {
117 my $self = shift;
118 my $arg = {@_};
119
120 croak "need filename" if (! $arg->{'filename'});
121 my $code_page = $arg->{'code_page'} || '852';
122
123 use OpenIsis;
124
125 #$self->{'isis_code_page'} = $code_page;
126
127 # create Text::Iconv object
128 my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
129
130 my $isis_db = OpenIsis::open($arg->{'filename'});
131
132 my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
133
134 $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
135
136 # read database
137 for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
138
139 # read record
140 my $row = OpenIsis::read( $isis_db, $mfn );
141 foreach my $k (keys %{$row}) {
142 if ($k ne "mfn") {
143 foreach my $l (@{$row->{$k}}) {
144 $l = $cp->convert($l);
145 # has subfields?
146 my $val;
147 if ($l =~ m/\^/) {
148 foreach my $t (split(/\^/,$l)) {
149 next if (! $t);
150 $val->{substr($t,0,1)} = substr($t,1);
151 }
152 } else {
153 $val = $l;
154 }
155
156 push @{$self->{'data'}->{$mfn}->{$k}}, $val;
157 }
158 }
159
160 }
161
162 # create lookup
163 my $rec = $self->{'data'}->{$mfn};
164 $self->create_lookup($rec, @{$arg->{'lookup'}});
165
166 }
167
168 # store max mfn and return it.
169 return $self->{'max_mfn'} = $maxmfn;
170 }
171
172 =head2 create_lookup
173
174 Create lookup from record using lookup definition.
175
176 =cut
177
178 sub create_lookup {
179 my $self = shift;
180
181 my $rec = shift || confess "need record to create lookup";
182 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
183
184 foreach my $i (@_) {
185 if ($i->{'eval'}) {
186 my $eval = $self->fill_in($rec,$i->{'eval'});
187 my $key = $self->fill_in($rec,$i->{'key'});
188 my @val = $self->fill_in($rec,$i->{'val'});
189 if ($key && @val && eval $eval) {
190 push @{$self->{'lookup'}->{$key}}, @val;
191 }
192 } else {
193 my $key = $self->fill_in($rec,$i->{'key'});
194 my @val = $self->fill_in($rec,$i->{'val'});
195 if ($key && @val) {
196 push @{$self->{'lookup'}->{$key}}, @val;
197 }
198 }
199 }
200 }
201
202 =head2 get_data
203
204 Returns value from record.
205
206 $self->get_data(\$rec,$f,$sf,$i,\$found);
207
208 Arguments are:
209 record reference C<$rec>,
210 field C<$f>,
211 optional subfiled C<$sf>,
212 index for repeatable values C<$i>.
213
214 Optinal variable C<$found> will be incremeted if thre
215 is field.
216
217 Returns value or empty string.
218
219 =cut
220
221 sub get_data {
222 my $self = shift;
223
224 my ($rec,$f,$sf,$i,$found) = @_;
225 if ($$rec->{$f}) {
226 if ($sf && $$rec->{$f}->[$i]->{$sf}) {
227 $$found++ if (defined($$found));
228 return $$rec->{$f}->[$i]->{$sf};
229 } elsif ($$rec->{$f}->[$i]) {
230 $$found++ if (defined($$found));
231 return $$rec->{$f}->[$i];
232 }
233 } else {
234 return '';
235 }
236 }
237
238 =head2 fill_in
239
240 Workhourse of all: takes record from in-memory structure of database and
241 strings with placeholders and returns string or array of with substituted
242 values from record.
243
244 $webpac->fill_in($rec,'v250^a');
245
246 Optional argument is ordinal number for repeatable fields. By default,
247 it's assume to be first repeatable field (fields are perl array, so first
248 element is 0).
249 Following example will read second value from repeatable field.
250
251 $webpac->fill_in($rec,'Title: v250^a',1);
252
253 This function B<does not> perform parsing of format to inteligenty skip
254 delimiters before fields which aren't used.
255
256 =cut
257
258 sub fill_in {
259 my $self = shift;
260
261 my $rec = shift || confess "need data record";
262 my $format = shift || confess "need format to parse";
263 # iteration (for repeatable fields)
264 my $i = shift || 0;
265
266 # FIXME remove for speedup?
267 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
268
269 my $found = 0;
270
271 # do actual replacement of placeholders
272 $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
273
274 if ($found) {
275 # do we have lookups?
276 if ($format =~ /\[[^\[\]]+\]/o) {
277 return $self->lookup($format);
278 } else {
279 return $format;
280 }
281 } else {
282 return;
283 }
284 }
285
286 =head2 lookup
287
288 Perform lookups on format supplied to it.
289
290 my $txt = $self->lookup('[v900]');
291
292 Lookups can be nested (like C<[d:[a:[v900]]]>).
293
294 =cut
295
296 sub lookup {
297 my $self = shift;
298
299 my $tmp = shift || confess "need format";
300
301 if ($tmp =~ /\[[^\[\]]+\]/o) {
302 my @in = ( $tmp );
303 #print "##lookup $tmp\n";
304 my @out;
305 while (my $f = shift @in) {
306 if ($f =~ /\[([^\[\]]+)\]/) {
307 my $k = $1;
308 if ($self->{'lookup'}->{$k}) {
309 #print "## lookup key = $k\n";
310 foreach my $nv (@{$self->{'lookup'}->{$k}}) {
311 my $tmp2 = $f;
312 $tmp2 =~ s/\[$k\]/$nv/g;
313 push @in, $tmp2;
314 #print "## lookup in => $tmp2\n";
315 }
316 } else {
317 undef $f;
318 }
319 } elsif ($f) {
320 push @out, $f;
321 #print "## lookup out => $f\n";
322 }
323 }
324 return @out;
325 } else {
326 return $tmp;
327 }
328 }
329
330 =head2 parse
331
332 Perform smart parsing of string, skipping delimiters for fields which aren't
333 defined. It can also eval code in format starting with C<eval{...}> and
334 return output or nothing depending on eval code.
335
336 $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
337
338 =cut
339
340 sub parse {
341 my $self = shift;
342
343 my ($rec, $format, $i) = @_;
344
345 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
346
347 $i = 0 if (! $i);
348
349 my @out;
350
351 my $eval_code;
352 # remove eval{...} from beginning
353 $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
354
355 my $prefix;
356 my $all_found=0;
357
358 print "## $format\n";
359 while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {
360 print "## [ $1 | $2 | $3 ] $format\n";
361
362 my $del = $1 || '';
363 $prefix ||= $del;
364
365 my $found = 0;
366 my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
367
368 if ($found) {
369 push @out, $del;
370 push @out, $tmp;
371 $all_found += $found;
372 }
373 }
374
375 return if (! $all_found);
376
377 print Dumper($prefix, \@out);
378
379 my $out = join('',@out) . $format;
380
381 # add prefix if not there
382 $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
383
384 return $out;
385 }
386
387 1;

  ViewVC Help
Powered by ViewVC 1.1.26