/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 356 - (hide annotations)
Wed Jun 16 13:41:54 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 7745 byte(s)
begin of parse

1 dpavlin 354 package WebPAC;
2 dpavlin 352
3     use Carp;
4 dpavlin 353 use Text::Iconv;
5     use Config::IniFiles;
6 dpavlin 352
7     =head1 NAME
8    
9 dpavlin 354 WebPAC - base class for WebPAC
10 dpavlin 352
11     =head1 DESCRIPTION
12    
13 dpavlin 354 This module implements methods used by WebPAC.
14 dpavlin 352
15     =head1 METHODS
16    
17     =head2 new
18    
19 dpavlin 354 This will create new instance of WebPAC using configuration specified by C<config_file>.
20 dpavlin 352
21 dpavlin 354 my $webpac = new WebPAC(
22 dpavlin 352 config_file => 'name.conf',
23     [code_page => 'ISO-8859-2',]
24     );
25    
26     Default C<code_page> is C<ISO-8859-2>.
27    
28 dpavlin 353 It will also read configuration files
29     C<global.conf> (used by indexer and Web font-end)
30     and configuration file specified by C<config_file>
31     which describes databases to be indexed.
32    
33 dpavlin 352 =cut
34    
35     sub new {
36     my $class = shift;
37     my $self = {@_};
38     bless($self, $class);
39    
40     # fill in default values
41     # output codepage
42     $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
43    
44 dpavlin 353 #
45     # read global.conf
46     #
47 dpavlin 352
48     $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
49    
50     # read global config parametars
51     foreach my $var (qw(
52     dbi_dbd
53     dbi_dsn
54     dbi_user
55     dbi_passwd
56     show_progress
57     my_unac_filter
58     )) {
59     $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
60     }
61    
62 dpavlin 353 #
63     # read indexer config file
64     #
65 dpavlin 352
66     $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
67    
68     # read global config parametars
69     foreach my $var (qw(
70     dbi_dbd
71     dbi_dsn
72     dbi_user
73     dbi_passwd
74     show_progress
75     my_unac_filter
76     )) {
77     $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
78     }
79    
80     return $self;
81     }
82    
83     =head2 open_isis
84    
85     Open CDS/ISIS database using OpenIsis module and read all records to memory.
86    
87     $webpac->open_isis(
88     filename => '/data/ISIS/ISIS',
89     code_page => '852',
90     limit_mfn => '500',
91     lookup => [ ... ],
92     );
93    
94     By default, ISIS code page is assumed to be C<852>.
95    
96 dpavlin 353 If optional parametar C<limit_mfn> is set, it will read just 500 records
97     from database in example above.
98 dpavlin 352
99     Returns number of last record read into memory (size of database, really).
100    
101     C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
102     C<val>. Optional parametar C<eval> is perl code to evaluate before storing
103     value in index.
104    
105     lookup => [
106     { 'key' => 'd:v900', 'val' => 'v250^a' },
107     { 'eval' => '"v901^a" eq "Podruèje"',
108     'key' => 'pa:v561^4:v562^4:v461^1',
109     'val' => 'v900' },
110     ]
111    
112     =cut
113    
114     sub open_isis {
115     my $self = shift;
116     my $arg = {@_};
117    
118     croak "need filename" if (! $arg->{'filename'});
119     my $code_page = $arg->{'code_page'} || '852';
120    
121 dpavlin 353 use OpenIsis;
122    
123 dpavlin 352 #$self->{'isis_code_page'} = $code_page;
124    
125     # create Text::Iconv object
126     my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
127    
128     my $isis_db = OpenIsis::open($arg->{'filename'});
129    
130     my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
131    
132     # read database
133     for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
134    
135     # read record
136     my $row = OpenIsis::read( $isis_db, $mfn );
137     foreach my $k (keys %{$row}) {
138     if ($k ne "mfn") {
139     foreach my $l (@{$row->{$k}}) {
140     $l = $cp->convert($l);
141     # has subfields?
142     my $val;
143     if ($l =~ m/\^/) {
144     foreach my $t (split(/\^/,$l)) {
145     next if (! $t);
146     $val->{substr($t,0,1)} = substr($t,1);
147     }
148     } else {
149     $val = $l;
150     }
151    
152     push @{$self->{'data'}->{$mfn}->{$k}}, $val;
153     }
154     }
155    
156     }
157    
158     # create lookup
159 dpavlin 355 my $rec = $self->{'data'}->{$mfn};
160     $self->create_lookup($rec, @{$arg->{'lookup'}});
161 dpavlin 352
162     }
163    
164     # store max mfn and return it.
165     return $self->{'max_mfn'} = $maxmfn;
166     }
167    
168 dpavlin 355 =head2 create_lookup
169    
170     Create lookup from record using lookup definition.
171    
172     =cut
173    
174     sub create_lookup {
175     my $self = shift;
176    
177     my $rec = shift || confess "need record to create lookup";
178     confess("need HASH as first argument!") if ($rec !~ /HASH/o);
179    
180     foreach my $i (@_) {
181     if ($i->{'eval'}) {
182     my $eval = $self->fill_in($rec,$i->{'eval'});
183     my $key = $self->fill_in($rec,$i->{'key'});
184     my @val = $self->fill_in($rec,$i->{'val'});
185     if ($key && @val && eval $eval) {
186     push @{$self->{'lookup'}->{$key}}, @val;
187     }
188     } else {
189     my $key = $self->fill_in($rec,$i->{'key'});
190     my @val = $self->fill_in($rec,$i->{'val'});
191     if ($key && @val) {
192     push @{$self->{'lookup'}->{$key}}, @val;
193     }
194     }
195     }
196     }
197    
198 dpavlin 356 =head2 get_data
199    
200     Returns value from record.
201    
202     $self->get_data(\$rec,$f,$sf,$i,\$found);
203    
204     Arguments are:
205     record reference C<$rec>,
206     field C<$f>,
207     optional subfiled C<$sf>,
208     index for repeatable values C<$i>.
209    
210     Optinal variable C<$found> will be incremeted if thre
211     is field.
212    
213     Returns value or empty string.
214    
215     =cut
216    
217     sub get_data {
218     my $self = shift;
219    
220     my ($rec,$f,$sf,$i,$found) = @_;
221     if ($$rec->{$f}) {
222     if ($sf && $$rec->{$f}->[$i]->{$sf}) {
223     $$found++ if (defined($$found));
224     return $$rec->{$f}->[$i]->{$sf};
225     } elsif ($$rec->{$f}->[$i]) {
226     $$found++ if (defined($$found));
227     return $$rec->{$f}->[$i];
228     }
229     } else {
230     return '';
231     }
232     }
233    
234 dpavlin 352 =head2 fill_in
235    
236     Workhourse of all: takes record from in-memory structure of database and
237     strings with placeholders and returns string or array of with substituted
238     values from record.
239    
240     $webpac->fill_in($rec,'v250^a');
241    
242     Optional argument is ordinal number for repeatable fields. By default,
243 dpavlin 353 it's assume to be first repeatable field (fields are perl array, so first
244     element is 0).
245     Following example will read second value from repeatable field.
246 dpavlin 352
247 dpavlin 353 $webpac->fill_in($rec,'Title: v250^a',1);
248    
249     This function B<does not> perform parsing of format to inteligenty skip
250     delimiters before fields which aren't used.
251    
252 dpavlin 352 =cut
253    
254     sub fill_in {
255     my $self = shift;
256    
257     my $rec = shift || confess "need data record";
258     my $format = shift || confess "need format to parse";
259     # iteration (for repeatable fields)
260     my $i = shift || 0;
261    
262     # FIXME remove for speedup?
263 dpavlin 355 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
264 dpavlin 352
265     my $found = 0;
266    
267     # do actual replacement of placeholders
268 dpavlin 356 $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
269 dpavlin 352
270 dpavlin 353 if ($found) {
271     # do we have lookups?
272     if ($format =~ /\[[^\[\]]+\]/o) {
273     return $self->lookup($format);
274     } else {
275     return $format;
276     }
277 dpavlin 352 } else {
278     return;
279     }
280     }
281    
282     =head2 lookup
283    
284 dpavlin 355 Perform lookups on format supplied to it.
285 dpavlin 352
286     my $txt = $self->lookup('[v900]');
287    
288 dpavlin 355 Lookups can be nested (like C<[d:[a:[v900]]]>).
289    
290 dpavlin 352 =cut
291    
292     sub lookup {
293     my $self = shift;
294    
295     my $tmp = shift || confess "need format";
296    
297 dpavlin 353 if ($tmp =~ /\[[^\[\]]+\]/o) {
298 dpavlin 352 my @in = ( $tmp );
299 dpavlin 353 #print "##lookup $tmp\n";
300 dpavlin 352 my @out;
301     while (my $f = shift @in) {
302     if ($f =~ /\[([^\[\]]+)\]/) {
303     my $k = $1;
304     if ($self->{'lookup'}->{$k}) {
305 dpavlin 353 #print "## lookup key = $k\n";
306 dpavlin 352 foreach my $nv (@{$self->{'lookup'}->{$k}}) {
307     my $tmp2 = $f;
308     $tmp2 =~ s/\[$k\]/$nv/g;
309     push @in, $tmp2;
310 dpavlin 353 #print "## lookup in => $tmp2\n";
311 dpavlin 352 }
312     } else {
313     undef $f;
314     }
315     } elsif ($f) {
316     push @out, $f;
317 dpavlin 353 #print "## lookup out => $f\n";
318 dpavlin 352 }
319     }
320     return @out;
321     } else {
322     return $tmp;
323     }
324     }
325    
326 dpavlin 356 =head2 parse
327    
328     Perform smart parsing of string, skipping delimiters for fields which aren't
329     defined. It can also eval code in format starting with C<eval{...}> and
330     return output or nothing depending on eval code.
331    
332     $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
333    
334     =cut
335    
336     sub parse {
337     my $self = shift;
338    
339     my ($rec, $format, $i) = @_;
340    
341     my @out;
342    
343     my $eval_code;
344     # remove eval{...} from beginning
345     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
346    
347     my $prefix = '';
348     $prefix = $1 if ($format =~ s/^(.+)(v\d+(?:\^\w)*)/$2/s);
349    
350     sub f_sf_del {
351     my ($self,$rec,$out,$f,$sf,$del,$i) = @_;
352    
353     my $found=0;
354     my $tmp = $self->get_data($rec,$f,$sf,$i,\$found);
355     if ($found) {
356     push @{$$out}, $tmp;
357     push @{$$out}, $del;
358     }
359     return '';
360     }
361    
362     #$format =~ s/(.*)v(\d+)(?:\^(\w))*/f_sf_del($self,\$rec,\@out,$2,$3,$1,$i/ges;
363    
364     print Dumper(@out);
365    
366     }
367    
368 dpavlin 352 1;

  ViewVC Help
Powered by ViewVC 1.1.26