/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 362 - (hide annotations)
Wed Jun 16 16:50:30 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 8795 byte(s)
fetch_rec method

1 dpavlin 354 package WebPAC;
2 dpavlin 352
3     use Carp;
4 dpavlin 353 use Text::Iconv;
5     use Config::IniFiles;
6 dpavlin 352
7 dpavlin 358 use Data::Dumper;
8    
9 dpavlin 352 =head1 NAME
10    
11 dpavlin 354 WebPAC - base class for WebPAC
12 dpavlin 352
13     =head1 DESCRIPTION
14    
15 dpavlin 354 This module implements methods used by WebPAC.
16 dpavlin 352
17     =head1 METHODS
18    
19     =head2 new
20    
21 dpavlin 354 This will create new instance of WebPAC using configuration specified by C<config_file>.
22 dpavlin 352
23 dpavlin 354 my $webpac = new WebPAC(
24 dpavlin 352 config_file => 'name.conf',
25     [code_page => 'ISO-8859-2',]
26     );
27    
28     Default C<code_page> is C<ISO-8859-2>.
29    
30 dpavlin 353 It will also read configuration files
31     C<global.conf> (used by indexer and Web font-end)
32     and configuration file specified by C<config_file>
33     which describes databases to be indexed.
34    
35 dpavlin 352 =cut
36    
37     sub new {
38     my $class = shift;
39     my $self = {@_};
40     bless($self, $class);
41    
42     # fill in default values
43     # output codepage
44     $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
45    
46 dpavlin 353 #
47     # read global.conf
48     #
49 dpavlin 352
50     $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
51    
52     # read global config parametars
53     foreach my $var (qw(
54     dbi_dbd
55     dbi_dsn
56     dbi_user
57     dbi_passwd
58     show_progress
59     my_unac_filter
60     )) {
61     $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
62     }
63    
64 dpavlin 353 #
65     # read indexer config file
66     #
67 dpavlin 352
68     $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
69    
70     # read global config parametars
71     foreach my $var (qw(
72     dbi_dbd
73     dbi_dsn
74     dbi_user
75     dbi_passwd
76     show_progress
77     my_unac_filter
78     )) {
79     $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
80     }
81    
82     return $self;
83     }
84    
85     =head2 open_isis
86    
87     Open CDS/ISIS database using OpenIsis module and read all records to memory.
88    
89     $webpac->open_isis(
90     filename => '/data/ISIS/ISIS',
91     code_page => '852',
92     limit_mfn => '500',
93     lookup => [ ... ],
94     );
95    
96     By default, ISIS code page is assumed to be C<852>.
97    
98 dpavlin 353 If optional parametar C<limit_mfn> is set, it will read just 500 records
99     from database in example above.
100 dpavlin 352
101     Returns number of last record read into memory (size of database, really).
102    
103     C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
104     C<val>. Optional parametar C<eval> is perl code to evaluate before storing
105     value in index.
106    
107     lookup => [
108     { 'key' => 'd:v900', 'val' => 'v250^a' },
109     { 'eval' => '"v901^a" eq "Podruèje"',
110     'key' => 'pa:v561^4:v562^4:v461^1',
111     'val' => 'v900' },
112     ]
113    
114     =cut
115    
116     sub open_isis {
117     my $self = shift;
118     my $arg = {@_};
119    
120     croak "need filename" if (! $arg->{'filename'});
121     my $code_page = $arg->{'code_page'} || '852';
122    
123 dpavlin 353 use OpenIsis;
124    
125 dpavlin 352 #$self->{'isis_code_page'} = $code_page;
126    
127     # create Text::Iconv object
128     my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
129    
130     my $isis_db = OpenIsis::open($arg->{'filename'});
131    
132     my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
133    
134 dpavlin 357 $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
135    
136 dpavlin 352 # read database
137     for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
138    
139     # read record
140     my $row = OpenIsis::read( $isis_db, $mfn );
141     foreach my $k (keys %{$row}) {
142     if ($k ne "mfn") {
143     foreach my $l (@{$row->{$k}}) {
144     $l = $cp->convert($l);
145     # has subfields?
146     my $val;
147     if ($l =~ m/\^/) {
148     foreach my $t (split(/\^/,$l)) {
149     next if (! $t);
150     $val->{substr($t,0,1)} = substr($t,1);
151     }
152     } else {
153     $val = $l;
154     }
155    
156     push @{$self->{'data'}->{$mfn}->{$k}}, $val;
157     }
158     }
159    
160     }
161    
162     # create lookup
163 dpavlin 355 my $rec = $self->{'data'}->{$mfn};
164     $self->create_lookup($rec, @{$arg->{'lookup'}});
165 dpavlin 352
166     }
167    
168 dpavlin 362 $self->{'current_mfn'} = 1;
169    
170 dpavlin 352 # store max mfn and return it.
171     return $self->{'max_mfn'} = $maxmfn;
172     }
173    
174 dpavlin 362 =head2 fetch_rec
175    
176     Fetch next record from database. It will also display progress bar (once
177     it's implemented, that is).
178    
179     my $rec = $webpac->fetch_rec;
180    
181     =cut
182    
183     sub fetch_rec {
184     my $self = shift;
185    
186     my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!";
187    
188     if ($mfn > $self->{'max_mfn'}) {
189     $self->{'current_mfn'} = $self->{'max_mfn'};
190     return;
191     }
192    
193     return $self->{'data'}->{$mfn};
194     }
195    
196 dpavlin 355 =head2 create_lookup
197    
198     Create lookup from record using lookup definition.
199    
200     =cut
201    
202     sub create_lookup {
203     my $self = shift;
204    
205     my $rec = shift || confess "need record to create lookup";
206     confess("need HASH as first argument!") if ($rec !~ /HASH/o);
207    
208     foreach my $i (@_) {
209     if ($i->{'eval'}) {
210     my $eval = $self->fill_in($rec,$i->{'eval'});
211     my $key = $self->fill_in($rec,$i->{'key'});
212     my @val = $self->fill_in($rec,$i->{'val'});
213     if ($key && @val && eval $eval) {
214     push @{$self->{'lookup'}->{$key}}, @val;
215     }
216     } else {
217     my $key = $self->fill_in($rec,$i->{'key'});
218     my @val = $self->fill_in($rec,$i->{'val'});
219     if ($key && @val) {
220     push @{$self->{'lookup'}->{$key}}, @val;
221     }
222     }
223     }
224     }
225    
226 dpavlin 356 =head2 get_data
227    
228     Returns value from record.
229    
230     $self->get_data(\$rec,$f,$sf,$i,\$found);
231    
232     Arguments are:
233     record reference C<$rec>,
234     field C<$f>,
235     optional subfiled C<$sf>,
236     index for repeatable values C<$i>.
237    
238     Optinal variable C<$found> will be incremeted if thre
239     is field.
240    
241     Returns value or empty string.
242    
243     =cut
244    
245     sub get_data {
246     my $self = shift;
247    
248     my ($rec,$f,$sf,$i,$found) = @_;
249     if ($$rec->{$f}) {
250     if ($sf && $$rec->{$f}->[$i]->{$sf}) {
251     $$found++ if (defined($$found));
252     return $$rec->{$f}->[$i]->{$sf};
253     } elsif ($$rec->{$f}->[$i]) {
254     $$found++ if (defined($$found));
255     return $$rec->{$f}->[$i];
256     }
257     } else {
258     return '';
259     }
260     }
261    
262 dpavlin 352 =head2 fill_in
263    
264     Workhourse of all: takes record from in-memory structure of database and
265     strings with placeholders and returns string or array of with substituted
266     values from record.
267    
268     $webpac->fill_in($rec,'v250^a');
269    
270     Optional argument is ordinal number for repeatable fields. By default,
271 dpavlin 353 it's assume to be first repeatable field (fields are perl array, so first
272     element is 0).
273     Following example will read second value from repeatable field.
274 dpavlin 352
275 dpavlin 353 $webpac->fill_in($rec,'Title: v250^a',1);
276    
277     This function B<does not> perform parsing of format to inteligenty skip
278     delimiters before fields which aren't used.
279    
280 dpavlin 352 =cut
281    
282     sub fill_in {
283     my $self = shift;
284    
285     my $rec = shift || confess "need data record";
286     my $format = shift || confess "need format to parse";
287     # iteration (for repeatable fields)
288     my $i = shift || 0;
289    
290     # FIXME remove for speedup?
291 dpavlin 355 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
292 dpavlin 352
293     my $found = 0;
294    
295 dpavlin 359 my $eval_code;
296     # remove eval{...} from beginning
297     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
298    
299 dpavlin 352 # do actual replacement of placeholders
300 dpavlin 356 $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
301 dpavlin 352
302 dpavlin 353 if ($found) {
303 dpavlin 359 if ($eval_code) {
304     my $eval = $self->fill_in($rec,$eval_code,$i);
305     return if (! eval $eval);
306     }
307 dpavlin 353 # do we have lookups?
308     if ($format =~ /\[[^\[\]]+\]/o) {
309     return $self->lookup($format);
310     } else {
311     return $format;
312     }
313 dpavlin 352 } else {
314     return;
315     }
316     }
317    
318     =head2 lookup
319    
320 dpavlin 355 Perform lookups on format supplied to it.
321 dpavlin 352
322     my $txt = $self->lookup('[v900]');
323    
324 dpavlin 355 Lookups can be nested (like C<[d:[a:[v900]]]>).
325    
326 dpavlin 352 =cut
327    
328     sub lookup {
329     my $self = shift;
330    
331     my $tmp = shift || confess "need format";
332    
333 dpavlin 353 if ($tmp =~ /\[[^\[\]]+\]/o) {
334 dpavlin 352 my @in = ( $tmp );
335 dpavlin 353 #print "##lookup $tmp\n";
336 dpavlin 352 my @out;
337     while (my $f = shift @in) {
338     if ($f =~ /\[([^\[\]]+)\]/) {
339     my $k = $1;
340     if ($self->{'lookup'}->{$k}) {
341 dpavlin 353 #print "## lookup key = $k\n";
342 dpavlin 352 foreach my $nv (@{$self->{'lookup'}->{$k}}) {
343     my $tmp2 = $f;
344     $tmp2 =~ s/\[$k\]/$nv/g;
345     push @in, $tmp2;
346 dpavlin 353 #print "## lookup in => $tmp2\n";
347 dpavlin 352 }
348     } else {
349     undef $f;
350     }
351     } elsif ($f) {
352     push @out, $f;
353 dpavlin 353 #print "## lookup out => $f\n";
354 dpavlin 352 }
355     }
356     return @out;
357     } else {
358     return $tmp;
359     }
360     }
361    
362 dpavlin 356 =head2 parse
363    
364     Perform smart parsing of string, skipping delimiters for fields which aren't
365     defined. It can also eval code in format starting with C<eval{...}> and
366     return output or nothing depending on eval code.
367    
368     $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
369    
370     =cut
371    
372     sub parse {
373     my $self = shift;
374    
375     my ($rec, $format, $i) = @_;
376    
377 dpavlin 358 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
378    
379     $i = 0 if (! $i);
380    
381 dpavlin 356 my @out;
382    
383     my $eval_code;
384     # remove eval{...} from beginning
385     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
386    
387 dpavlin 358 my $prefix;
388     my $all_found=0;
389 dpavlin 356
390 dpavlin 359 #print "## $format\n";
391 dpavlin 358 while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {
392 dpavlin 359 #print "## [ $1 | $2 | $3 ] $format\n";
393 dpavlin 356
394 dpavlin 358 my $del = $1 || '';
395 dpavlin 359 $prefix ||= $del if ($all_found == 0);
396 dpavlin 358
397     my $found = 0;
398     my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
399    
400 dpavlin 356 if ($found) {
401 dpavlin 358 push @out, $del;
402     push @out, $tmp;
403     $all_found += $found;
404 dpavlin 356 }
405     }
406    
407 dpavlin 358 return if (! $all_found);
408 dpavlin 356
409 dpavlin 358 my $out = join('',@out) . $format;
410    
411     # add prefix if not there
412     $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
413    
414 dpavlin 359 if ($eval_code) {
415     my $eval = $self->fill_in($rec,$eval_code,$i);
416     return if (! eval $eval);
417     }
418    
419 dpavlin 358 return $out;
420 dpavlin 356 }
421    
422 dpavlin 352 1;

  ViewVC Help
Powered by ViewVC 1.1.26