/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 357 - (hide annotations)
Wed Jun 16 13:39:17 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 7801 byte(s)
implement limit_mfn

1 dpavlin 354 package WebPAC;
2 dpavlin 352
3     use Carp;
4 dpavlin 353 use Text::Iconv;
5     use Config::IniFiles;
6 dpavlin 352
7     =head1 NAME
8    
9 dpavlin 354 WebPAC - base class for WebPAC
10 dpavlin 352
11     =head1 DESCRIPTION
12    
13 dpavlin 354 This module implements methods used by WebPAC.
14 dpavlin 352
15     =head1 METHODS
16    
17     =head2 new
18    
19 dpavlin 354 This will create new instance of WebPAC using configuration specified by C<config_file>.
20 dpavlin 352
21 dpavlin 354 my $webpac = new WebPAC(
22 dpavlin 352 config_file => 'name.conf',
23     [code_page => 'ISO-8859-2',]
24     );
25    
26     Default C<code_page> is C<ISO-8859-2>.
27    
28 dpavlin 353 It will also read configuration files
29     C<global.conf> (used by indexer and Web font-end)
30     and configuration file specified by C<config_file>
31     which describes databases to be indexed.
32    
33 dpavlin 352 =cut
34    
35     sub new {
36     my $class = shift;
37     my $self = {@_};
38     bless($self, $class);
39    
40     # fill in default values
41     # output codepage
42     $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
43    
44 dpavlin 353 #
45     # read global.conf
46     #
47 dpavlin 352
48     $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
49    
50     # read global config parametars
51     foreach my $var (qw(
52     dbi_dbd
53     dbi_dsn
54     dbi_user
55     dbi_passwd
56     show_progress
57     my_unac_filter
58     )) {
59     $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
60     }
61    
62 dpavlin 353 #
63     # read indexer config file
64     #
65 dpavlin 352
66     $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
67    
68     # read global config parametars
69     foreach my $var (qw(
70     dbi_dbd
71     dbi_dsn
72     dbi_user
73     dbi_passwd
74     show_progress
75     my_unac_filter
76     )) {
77     $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
78     }
79    
80     return $self;
81     }
82    
83     =head2 open_isis
84    
85     Open CDS/ISIS database using OpenIsis module and read all records to memory.
86    
87     $webpac->open_isis(
88     filename => '/data/ISIS/ISIS',
89     code_page => '852',
90     limit_mfn => '500',
91     lookup => [ ... ],
92     );
93    
94     By default, ISIS code page is assumed to be C<852>.
95    
96 dpavlin 353 If optional parametar C<limit_mfn> is set, it will read just 500 records
97     from database in example above.
98 dpavlin 352
99     Returns number of last record read into memory (size of database, really).
100    
101     C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
102     C<val>. Optional parametar C<eval> is perl code to evaluate before storing
103     value in index.
104    
105     lookup => [
106     { 'key' => 'd:v900', 'val' => 'v250^a' },
107     { 'eval' => '"v901^a" eq "Podruèje"',
108     'key' => 'pa:v561^4:v562^4:v461^1',
109     'val' => 'v900' },
110     ]
111    
112     =cut
113    
114     sub open_isis {
115     my $self = shift;
116     my $arg = {@_};
117    
118     croak "need filename" if (! $arg->{'filename'});
119     my $code_page = $arg->{'code_page'} || '852';
120    
121 dpavlin 353 use OpenIsis;
122    
123 dpavlin 352 #$self->{'isis_code_page'} = $code_page;
124    
125     # create Text::Iconv object
126     my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
127    
128     my $isis_db = OpenIsis::open($arg->{'filename'});
129    
130     my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
131    
132 dpavlin 357 $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
133    
134 dpavlin 352 # read database
135     for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
136    
137     # read record
138     my $row = OpenIsis::read( $isis_db, $mfn );
139     foreach my $k (keys %{$row}) {
140     if ($k ne "mfn") {
141     foreach my $l (@{$row->{$k}}) {
142     $l = $cp->convert($l);
143     # has subfields?
144     my $val;
145     if ($l =~ m/\^/) {
146     foreach my $t (split(/\^/,$l)) {
147     next if (! $t);
148     $val->{substr($t,0,1)} = substr($t,1);
149     }
150     } else {
151     $val = $l;
152     }
153    
154     push @{$self->{'data'}->{$mfn}->{$k}}, $val;
155     }
156     }
157    
158     }
159    
160     # create lookup
161 dpavlin 355 my $rec = $self->{'data'}->{$mfn};
162     $self->create_lookup($rec, @{$arg->{'lookup'}});
163 dpavlin 352
164     }
165    
166     # store max mfn and return it.
167     return $self->{'max_mfn'} = $maxmfn;
168     }
169    
170 dpavlin 355 =head2 create_lookup
171    
172     Create lookup from record using lookup definition.
173    
174     =cut
175    
176     sub create_lookup {
177     my $self = shift;
178    
179     my $rec = shift || confess "need record to create lookup";
180     confess("need HASH as first argument!") if ($rec !~ /HASH/o);
181    
182     foreach my $i (@_) {
183     if ($i->{'eval'}) {
184     my $eval = $self->fill_in($rec,$i->{'eval'});
185     my $key = $self->fill_in($rec,$i->{'key'});
186     my @val = $self->fill_in($rec,$i->{'val'});
187     if ($key && @val && eval $eval) {
188     push @{$self->{'lookup'}->{$key}}, @val;
189     }
190     } else {
191     my $key = $self->fill_in($rec,$i->{'key'});
192     my @val = $self->fill_in($rec,$i->{'val'});
193     if ($key && @val) {
194     push @{$self->{'lookup'}->{$key}}, @val;
195     }
196     }
197     }
198     }
199    
200 dpavlin 356 =head2 get_data
201    
202     Returns value from record.
203    
204     $self->get_data(\$rec,$f,$sf,$i,\$found);
205    
206     Arguments are:
207     record reference C<$rec>,
208     field C<$f>,
209     optional subfiled C<$sf>,
210     index for repeatable values C<$i>.
211    
212     Optinal variable C<$found> will be incremeted if thre
213     is field.
214    
215     Returns value or empty string.
216    
217     =cut
218    
219     sub get_data {
220     my $self = shift;
221    
222     my ($rec,$f,$sf,$i,$found) = @_;
223     if ($$rec->{$f}) {
224     if ($sf && $$rec->{$f}->[$i]->{$sf}) {
225     $$found++ if (defined($$found));
226     return $$rec->{$f}->[$i]->{$sf};
227     } elsif ($$rec->{$f}->[$i]) {
228     $$found++ if (defined($$found));
229     return $$rec->{$f}->[$i];
230     }
231     } else {
232     return '';
233     }
234     }
235    
236 dpavlin 352 =head2 fill_in
237    
238     Workhourse of all: takes record from in-memory structure of database and
239     strings with placeholders and returns string or array of with substituted
240     values from record.
241    
242     $webpac->fill_in($rec,'v250^a');
243    
244     Optional argument is ordinal number for repeatable fields. By default,
245 dpavlin 353 it's assume to be first repeatable field (fields are perl array, so first
246     element is 0).
247     Following example will read second value from repeatable field.
248 dpavlin 352
249 dpavlin 353 $webpac->fill_in($rec,'Title: v250^a',1);
250    
251     This function B<does not> perform parsing of format to inteligenty skip
252     delimiters before fields which aren't used.
253    
254 dpavlin 352 =cut
255    
256     sub fill_in {
257     my $self = shift;
258    
259     my $rec = shift || confess "need data record";
260     my $format = shift || confess "need format to parse";
261     # iteration (for repeatable fields)
262     my $i = shift || 0;
263    
264     # FIXME remove for speedup?
265 dpavlin 355 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
266 dpavlin 352
267     my $found = 0;
268    
269     # do actual replacement of placeholders
270 dpavlin 356 $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
271 dpavlin 352
272 dpavlin 353 if ($found) {
273     # do we have lookups?
274     if ($format =~ /\[[^\[\]]+\]/o) {
275     return $self->lookup($format);
276     } else {
277     return $format;
278     }
279 dpavlin 352 } else {
280     return;
281     }
282     }
283    
284     =head2 lookup
285    
286 dpavlin 355 Perform lookups on format supplied to it.
287 dpavlin 352
288     my $txt = $self->lookup('[v900]');
289    
290 dpavlin 355 Lookups can be nested (like C<[d:[a:[v900]]]>).
291    
292 dpavlin 352 =cut
293    
294     sub lookup {
295     my $self = shift;
296    
297     my $tmp = shift || confess "need format";
298    
299 dpavlin 353 if ($tmp =~ /\[[^\[\]]+\]/o) {
300 dpavlin 352 my @in = ( $tmp );
301 dpavlin 353 #print "##lookup $tmp\n";
302 dpavlin 352 my @out;
303     while (my $f = shift @in) {
304     if ($f =~ /\[([^\[\]]+)\]/) {
305     my $k = $1;
306     if ($self->{'lookup'}->{$k}) {
307 dpavlin 353 #print "## lookup key = $k\n";
308 dpavlin 352 foreach my $nv (@{$self->{'lookup'}->{$k}}) {
309     my $tmp2 = $f;
310     $tmp2 =~ s/\[$k\]/$nv/g;
311     push @in, $tmp2;
312 dpavlin 353 #print "## lookup in => $tmp2\n";
313 dpavlin 352 }
314     } else {
315     undef $f;
316     }
317     } elsif ($f) {
318     push @out, $f;
319 dpavlin 353 #print "## lookup out => $f\n";
320 dpavlin 352 }
321     }
322     return @out;
323     } else {
324     return $tmp;
325     }
326     }
327    
328 dpavlin 356 =head2 parse
329    
330     Perform smart parsing of string, skipping delimiters for fields which aren't
331     defined. It can also eval code in format starting with C<eval{...}> and
332     return output or nothing depending on eval code.
333    
334     $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
335    
336     =cut
337    
338     sub parse {
339     my $self = shift;
340    
341     my ($rec, $format, $i) = @_;
342    
343     my @out;
344    
345     my $eval_code;
346     # remove eval{...} from beginning
347     $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
348    
349     my $prefix = '';
350     $prefix = $1 if ($format =~ s/^(.+)(v\d+(?:\^\w)*)/$2/s);
351    
352     sub f_sf_del {
353     my ($self,$rec,$out,$f,$sf,$del,$i) = @_;
354    
355     my $found=0;
356     my $tmp = $self->get_data($rec,$f,$sf,$i,\$found);
357     if ($found) {
358     push @{$$out}, $tmp;
359     push @{$$out}, $del;
360     }
361     return '';
362     }
363    
364     #$format =~ s/(.*)v(\d+)(?:\^(\w))*/f_sf_del($self,\$rec,\@out,$2,$3,$1,$i/ges;
365    
366     print Dumper(@out);
367    
368     }
369    
370 dpavlin 352 1;

  ViewVC Help
Powered by ViewVC 1.1.26