/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 363 - (show annotations)
Wed Jun 16 20:05:19 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 9574 byte(s)
open_import_xml, debug option to new

1 package WebPAC;
2
3 use Carp;
4 use Text::Iconv;
5 use Config::IniFiles;
6 use XML::Simple;
7
8 use Data::Dumper;
9
10 =head1 NAME
11
12 WebPAC - base class for WebPAC
13
14 =head1 DESCRIPTION
15
16 This module implements methods used by WebPAC.
17
18 =head1 METHODS
19
20 =head2 new
21
22 This will create new instance of WebPAC using configuration specified by C<config_file>.
23
24 my $webpac = new WebPAC(
25 config_file => 'name.conf',
26 [code_page => 'ISO-8859-2',]
27 );
28
29 Default C<code_page> is C<ISO-8859-2>.
30
31 It will also read configuration files
32 C<global.conf> (used by indexer and Web font-end)
33 and configuration file specified by C<config_file>
34 which describes databases to be indexed.
35
36 =cut
37
38 # mapping between data type and tag which specify
39 # format in XML file
40 my %type2tag = (
41 'isis' => 'isis',
42 # 'excel' => 'column',
43 # 'marc' => 'marc',
44 # 'feed' => 'feed'
45 );
46
47 sub new {
48 my $class = shift;
49 my $self = {@_};
50 bless($self, $class);
51
52 # fill in default values
53 # output codepage
54 $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
55
56 #
57 # read global.conf
58 #
59
60 $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
61
62 # read global config parametars
63 foreach my $var (qw(
64 dbi_dbd
65 dbi_dsn
66 dbi_user
67 dbi_passwd
68 show_progress
69 my_unac_filter
70 )) {
71 $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
72 }
73
74 #
75 # read indexer config file
76 #
77
78 $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
79
80 return $self;
81 }
82
83 =head2 open_isis
84
85 Open CDS/ISIS database using OpenIsis module and read all records to memory.
86
87 $webpac->open_isis(
88 filename => '/data/ISIS/ISIS',
89 code_page => '852',
90 limit_mfn => '500',
91 lookup => [ ... ],
92 );
93
94 By default, ISIS code page is assumed to be C<852>.
95
96 If optional parametar C<limit_mfn> is set, it will read just 500 records
97 from database in example above.
98
99 Returns number of last record read into memory (size of database, really).
100
101 C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
102 C<val>. Optional parametar C<eval> is perl code to evaluate before storing
103 value in index.
104
105 lookup => [
106 { 'key' => 'd:v900', 'val' => 'v250^a' },
107 { 'eval' => '"v901^a" eq "Podruèje"',
108 'key' => 'pa:v561^4:v562^4:v461^1',
109 'val' => 'v900' },
110 ]
111
112 =cut
113
114 sub open_isis {
115 my $self = shift;
116 my $arg = {@_};
117
118 croak "need filename" if (! $arg->{'filename'});
119 my $code_page = $arg->{'code_page'} || '852';
120
121 use OpenIsis;
122
123 #$self->{'isis_code_page'} = $code_page;
124
125 # create Text::Iconv object
126 my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
127
128 print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'});
129
130 my $isis_db = OpenIsis::open($arg->{'filename'});
131
132 my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
133
134 $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
135
136 print STDERR "processing $maxmfn records...\n" if ($self->{'debug'});
137
138 # read database
139 for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
140
141 # read record
142 my $row = OpenIsis::read( $isis_db, $mfn );
143 foreach my $k (keys %{$row}) {
144 if ($k ne "mfn") {
145 foreach my $l (@{$row->{$k}}) {
146 $l = $cp->convert($l);
147 # has subfields?
148 my $val;
149 if ($l =~ m/\^/) {
150 foreach my $t (split(/\^/,$l)) {
151 next if (! $t);
152 $val->{substr($t,0,1)} = substr($t,1);
153 }
154 } else {
155 $val = $l;
156 }
157
158 push @{$self->{'data'}->{$mfn}->{$k}}, $val;
159 }
160 }
161
162 }
163
164 # create lookup
165 my $rec = $self->{'data'}->{$mfn};
166 $self->create_lookup($rec, @{$arg->{'lookup'}});
167
168 }
169
170 $self->{'current_mfn'} = 1;
171
172 # store max mfn and return it.
173 return $self->{'max_mfn'} = $maxmfn;
174 }
175
176 =head2 fetch_rec
177
178 Fetch next record from database. It will also display progress bar (once
179 it's implemented, that is).
180
181 my $rec = $webpac->fetch_rec;
182
183 =cut
184
185 sub fetch_rec {
186 my $self = shift;
187
188 my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!";
189
190 if ($mfn > $self->{'max_mfn'}) {
191 $self->{'current_mfn'} = $self->{'max_mfn'};
192 return;
193 }
194
195 return $self->{'data'}->{$mfn};
196 }
197
198 =head2 open_import_xml
199
200 Read file from C<import_xml/> directory and parse it.
201
202 $webpac->open_import_xml(type => 'isis');
203
204 =cut
205
206 sub open_import_xml {
207 my $self = shift;
208
209 my $arg = {@_};
210 confess "need type to load file from import_xml/" if (! $arg->{'type'});
211
212 my $type = $arg->{'type'};
213
214 my $type_base = $type;
215 $type_base =~ s/_.*$//g;
216
217 my $f = "./import_xml/$type.xml";
218 confess "import_xml file '$f' doesn't exist!" if (! -e "$f");
219
220 print STDERR "reading '$f'\n" if ($self->{'debug'});
221
222 $self->{'import_xml'} = XMLin($f,
223 ForceArray => [ $type2tag{$type_base}, 'config', 'format' ],
224 ForceContent => 1
225 );
226
227 print Dumper($self->{'import_xml'});
228
229 }
230
231 =head2 create_lookup
232
233 Create lookup from record using lookup definition.
234
235 =cut
236
237 sub create_lookup {
238 my $self = shift;
239
240 my $rec = shift || confess "need record to create lookup";
241 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
242
243 foreach my $i (@_) {
244 if ($i->{'eval'}) {
245 my $eval = $self->fill_in($rec,$i->{'eval'});
246 my $key = $self->fill_in($rec,$i->{'key'});
247 my @val = $self->fill_in($rec,$i->{'val'});
248 if ($key && @val && eval $eval) {
249 push @{$self->{'lookup'}->{$key}}, @val;
250 }
251 } else {
252 my $key = $self->fill_in($rec,$i->{'key'});
253 my @val = $self->fill_in($rec,$i->{'val'});
254 if ($key && @val) {
255 push @{$self->{'lookup'}->{$key}}, @val;
256 }
257 }
258 }
259 }
260
261 =head2 get_data
262
263 Returns value from record.
264
265 $self->get_data(\$rec,$f,$sf,$i,\$found);
266
267 Arguments are:
268 record reference C<$rec>,
269 field C<$f>,
270 optional subfiled C<$sf>,
271 index for repeatable values C<$i>.
272
273 Optinal variable C<$found> will be incremeted if thre
274 is field.
275
276 Returns value or empty string.
277
278 =cut
279
280 sub get_data {
281 my $self = shift;
282
283 my ($rec,$f,$sf,$i,$found) = @_;
284 if ($$rec->{$f}) {
285 if ($sf && $$rec->{$f}->[$i]->{$sf}) {
286 $$found++ if (defined($$found));
287 return $$rec->{$f}->[$i]->{$sf};
288 } elsif ($$rec->{$f}->[$i]) {
289 $$found++ if (defined($$found));
290 return $$rec->{$f}->[$i];
291 }
292 } else {
293 return '';
294 }
295 }
296
297 =head2 fill_in
298
299 Workhourse of all: takes record from in-memory structure of database and
300 strings with placeholders and returns string or array of with substituted
301 values from record.
302
303 $webpac->fill_in($rec,'v250^a');
304
305 Optional argument is ordinal number for repeatable fields. By default,
306 it's assume to be first repeatable field (fields are perl array, so first
307 element is 0).
308 Following example will read second value from repeatable field.
309
310 $webpac->fill_in($rec,'Title: v250^a',1);
311
312 This function B<does not> perform parsing of format to inteligenty skip
313 delimiters before fields which aren't used.
314
315 =cut
316
317 sub fill_in {
318 my $self = shift;
319
320 my $rec = shift || confess "need data record";
321 my $format = shift || confess "need format to parse";
322 # iteration (for repeatable fields)
323 my $i = shift || 0;
324
325 # FIXME remove for speedup?
326 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
327
328 my $found = 0;
329
330 my $eval_code;
331 # remove eval{...} from beginning
332 $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
333
334 # do actual replacement of placeholders
335 $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
336
337 if ($found) {
338 if ($eval_code) {
339 my $eval = $self->fill_in($rec,$eval_code,$i);
340 return if (! eval $eval);
341 }
342 # do we have lookups?
343 if ($format =~ /\[[^\[\]]+\]/o) {
344 return $self->lookup($format);
345 } else {
346 return $format;
347 }
348 } else {
349 return;
350 }
351 }
352
353 =head2 lookup
354
355 Perform lookups on format supplied to it.
356
357 my $txt = $self->lookup('[v900]');
358
359 Lookups can be nested (like C<[d:[a:[v900]]]>).
360
361 =cut
362
363 sub lookup {
364 my $self = shift;
365
366 my $tmp = shift || confess "need format";
367
368 if ($tmp =~ /\[[^\[\]]+\]/o) {
369 my @in = ( $tmp );
370 #print "##lookup $tmp\n";
371 my @out;
372 while (my $f = shift @in) {
373 if ($f =~ /\[([^\[\]]+)\]/) {
374 my $k = $1;
375 if ($self->{'lookup'}->{$k}) {
376 #print "## lookup key = $k\n";
377 foreach my $nv (@{$self->{'lookup'}->{$k}}) {
378 my $tmp2 = $f;
379 $tmp2 =~ s/\[$k\]/$nv/g;
380 push @in, $tmp2;
381 #print "## lookup in => $tmp2\n";
382 }
383 } else {
384 undef $f;
385 }
386 } elsif ($f) {
387 push @out, $f;
388 #print "## lookup out => $f\n";
389 }
390 }
391 return @out;
392 } else {
393 return $tmp;
394 }
395 }
396
397 =head2 parse
398
399 Perform smart parsing of string, skipping delimiters for fields which aren't
400 defined. It can also eval code in format starting with C<eval{...}> and
401 return output or nothing depending on eval code.
402
403 $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
404
405 =cut
406
407 sub parse {
408 my $self = shift;
409
410 my ($rec, $format, $i) = @_;
411
412 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
413
414 $i = 0 if (! $i);
415
416 my @out;
417
418 my $eval_code;
419 # remove eval{...} from beginning
420 $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
421
422 my $prefix;
423 my $all_found=0;
424
425 #print "## $format\n";
426 while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {
427 #print "## [ $1 | $2 | $3 ] $format\n";
428
429 my $del = $1 || '';
430 $prefix ||= $del if ($all_found == 0);
431
432 my $found = 0;
433 my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
434
435 if ($found) {
436 push @out, $del;
437 push @out, $tmp;
438 $all_found += $found;
439 }
440 }
441
442 return if (! $all_found);
443
444 my $out = join('',@out) . $format;
445
446 # add prefix if not there
447 $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
448
449 if ($eval_code) {
450 my $eval = $self->fill_in($rec,$eval_code,$i);
451 return if (! eval $eval);
452 }
453
454 return $out;
455 }
456
457 1;

  ViewVC Help
Powered by ViewVC 1.1.26