/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 356 - (show annotations)
Wed Jun 16 13:41:54 2004 UTC (15 years, 7 months ago) by dpavlin
File size: 7745 byte(s)
begin of parse

1 package WebPAC;
2
3 use Carp;
4 use Text::Iconv;
5 use Config::IniFiles;
6
7 =head1 NAME
8
9 WebPAC - base class for WebPAC
10
11 =head1 DESCRIPTION
12
13 This module implements methods used by WebPAC.
14
15 =head1 METHODS
16
17 =head2 new
18
19 This will create new instance of WebPAC using configuration specified by C<config_file>.
20
21 my $webpac = new WebPAC(
22 config_file => 'name.conf',
23 [code_page => 'ISO-8859-2',]
24 );
25
26 Default C<code_page> is C<ISO-8859-2>.
27
28 It will also read configuration files
29 C<global.conf> (used by indexer and Web font-end)
30 and configuration file specified by C<config_file>
31 which describes databases to be indexed.
32
33 =cut
34
35 sub new {
36 my $class = shift;
37 my $self = {@_};
38 bless($self, $class);
39
40 # fill in default values
41 # output codepage
42 $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
43
44 #
45 # read global.conf
46 #
47
48 $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
49
50 # read global config parametars
51 foreach my $var (qw(
52 dbi_dbd
53 dbi_dsn
54 dbi_user
55 dbi_passwd
56 show_progress
57 my_unac_filter
58 )) {
59 $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
60 }
61
62 #
63 # read indexer config file
64 #
65
66 $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
67
68 # read global config parametars
69 foreach my $var (qw(
70 dbi_dbd
71 dbi_dsn
72 dbi_user
73 dbi_passwd
74 show_progress
75 my_unac_filter
76 )) {
77 $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
78 }
79
80 return $self;
81 }
82
83 =head2 open_isis
84
85 Open CDS/ISIS database using OpenIsis module and read all records to memory.
86
87 $webpac->open_isis(
88 filename => '/data/ISIS/ISIS',
89 code_page => '852',
90 limit_mfn => '500',
91 lookup => [ ... ],
92 );
93
94 By default, ISIS code page is assumed to be C<852>.
95
96 If optional parametar C<limit_mfn> is set, it will read just 500 records
97 from database in example above.
98
99 Returns number of last record read into memory (size of database, really).
100
101 C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
102 C<val>. Optional parametar C<eval> is perl code to evaluate before storing
103 value in index.
104
105 lookup => [
106 { 'key' => 'd:v900', 'val' => 'v250^a' },
107 { 'eval' => '"v901^a" eq "Podruèje"',
108 'key' => 'pa:v561^4:v562^4:v461^1',
109 'val' => 'v900' },
110 ]
111
112 =cut
113
114 sub open_isis {
115 my $self = shift;
116 my $arg = {@_};
117
118 croak "need filename" if (! $arg->{'filename'});
119 my $code_page = $arg->{'code_page'} || '852';
120
121 use OpenIsis;
122
123 #$self->{'isis_code_page'} = $code_page;
124
125 # create Text::Iconv object
126 my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
127
128 my $isis_db = OpenIsis::open($arg->{'filename'});
129
130 my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
131
132 # read database
133 for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
134
135 # read record
136 my $row = OpenIsis::read( $isis_db, $mfn );
137 foreach my $k (keys %{$row}) {
138 if ($k ne "mfn") {
139 foreach my $l (@{$row->{$k}}) {
140 $l = $cp->convert($l);
141 # has subfields?
142 my $val;
143 if ($l =~ m/\^/) {
144 foreach my $t (split(/\^/,$l)) {
145 next if (! $t);
146 $val->{substr($t,0,1)} = substr($t,1);
147 }
148 } else {
149 $val = $l;
150 }
151
152 push @{$self->{'data'}->{$mfn}->{$k}}, $val;
153 }
154 }
155
156 }
157
158 # create lookup
159 my $rec = $self->{'data'}->{$mfn};
160 $self->create_lookup($rec, @{$arg->{'lookup'}});
161
162 }
163
164 # store max mfn and return it.
165 return $self->{'max_mfn'} = $maxmfn;
166 }
167
168 =head2 create_lookup
169
170 Create lookup from record using lookup definition.
171
172 =cut
173
174 sub create_lookup {
175 my $self = shift;
176
177 my $rec = shift || confess "need record to create lookup";
178 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
179
180 foreach my $i (@_) {
181 if ($i->{'eval'}) {
182 my $eval = $self->fill_in($rec,$i->{'eval'});
183 my $key = $self->fill_in($rec,$i->{'key'});
184 my @val = $self->fill_in($rec,$i->{'val'});
185 if ($key && @val && eval $eval) {
186 push @{$self->{'lookup'}->{$key}}, @val;
187 }
188 } else {
189 my $key = $self->fill_in($rec,$i->{'key'});
190 my @val = $self->fill_in($rec,$i->{'val'});
191 if ($key && @val) {
192 push @{$self->{'lookup'}->{$key}}, @val;
193 }
194 }
195 }
196 }
197
198 =head2 get_data
199
200 Returns value from record.
201
202 $self->get_data(\$rec,$f,$sf,$i,\$found);
203
204 Arguments are:
205 record reference C<$rec>,
206 field C<$f>,
207 optional subfiled C<$sf>,
208 index for repeatable values C<$i>.
209
210 Optinal variable C<$found> will be incremeted if thre
211 is field.
212
213 Returns value or empty string.
214
215 =cut
216
217 sub get_data {
218 my $self = shift;
219
220 my ($rec,$f,$sf,$i,$found) = @_;
221 if ($$rec->{$f}) {
222 if ($sf && $$rec->{$f}->[$i]->{$sf}) {
223 $$found++ if (defined($$found));
224 return $$rec->{$f}->[$i]->{$sf};
225 } elsif ($$rec->{$f}->[$i]) {
226 $$found++ if (defined($$found));
227 return $$rec->{$f}->[$i];
228 }
229 } else {
230 return '';
231 }
232 }
233
234 =head2 fill_in
235
236 Workhourse of all: takes record from in-memory structure of database and
237 strings with placeholders and returns string or array of with substituted
238 values from record.
239
240 $webpac->fill_in($rec,'v250^a');
241
242 Optional argument is ordinal number for repeatable fields. By default,
243 it's assume to be first repeatable field (fields are perl array, so first
244 element is 0).
245 Following example will read second value from repeatable field.
246
247 $webpac->fill_in($rec,'Title: v250^a',1);
248
249 This function B<does not> perform parsing of format to inteligenty skip
250 delimiters before fields which aren't used.
251
252 =cut
253
254 sub fill_in {
255 my $self = shift;
256
257 my $rec = shift || confess "need data record";
258 my $format = shift || confess "need format to parse";
259 # iteration (for repeatable fields)
260 my $i = shift || 0;
261
262 # FIXME remove for speedup?
263 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
264
265 my $found = 0;
266
267 # do actual replacement of placeholders
268 $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
269
270 if ($found) {
271 # do we have lookups?
272 if ($format =~ /\[[^\[\]]+\]/o) {
273 return $self->lookup($format);
274 } else {
275 return $format;
276 }
277 } else {
278 return;
279 }
280 }
281
282 =head2 lookup
283
284 Perform lookups on format supplied to it.
285
286 my $txt = $self->lookup('[v900]');
287
288 Lookups can be nested (like C<[d:[a:[v900]]]>).
289
290 =cut
291
292 sub lookup {
293 my $self = shift;
294
295 my $tmp = shift || confess "need format";
296
297 if ($tmp =~ /\[[^\[\]]+\]/o) {
298 my @in = ( $tmp );
299 #print "##lookup $tmp\n";
300 my @out;
301 while (my $f = shift @in) {
302 if ($f =~ /\[([^\[\]]+)\]/) {
303 my $k = $1;
304 if ($self->{'lookup'}->{$k}) {
305 #print "## lookup key = $k\n";
306 foreach my $nv (@{$self->{'lookup'}->{$k}}) {
307 my $tmp2 = $f;
308 $tmp2 =~ s/\[$k\]/$nv/g;
309 push @in, $tmp2;
310 #print "## lookup in => $tmp2\n";
311 }
312 } else {
313 undef $f;
314 }
315 } elsif ($f) {
316 push @out, $f;
317 #print "## lookup out => $f\n";
318 }
319 }
320 return @out;
321 } else {
322 return $tmp;
323 }
324 }
325
326 =head2 parse
327
328 Perform smart parsing of string, skipping delimiters for fields which aren't
329 defined. It can also eval code in format starting with C<eval{...}> and
330 return output or nothing depending on eval code.
331
332 $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
333
334 =cut
335
336 sub parse {
337 my $self = shift;
338
339 my ($rec, $format, $i) = @_;
340
341 my @out;
342
343 my $eval_code;
344 # remove eval{...} from beginning
345 $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
346
347 my $prefix = '';
348 $prefix = $1 if ($format =~ s/^(.+)(v\d+(?:\^\w)*)/$2/s);
349
350 sub f_sf_del {
351 my ($self,$rec,$out,$f,$sf,$del,$i) = @_;
352
353 my $found=0;
354 my $tmp = $self->get_data($rec,$f,$sf,$i,\$found);
355 if ($found) {
356 push @{$$out}, $tmp;
357 push @{$$out}, $del;
358 }
359 return '';
360 }
361
362 #$format =~ s/(.*)v(\d+)(?:\^(\w))*/f_sf_del($self,\$rec,\@out,$2,$3,$1,$i/ges;
363
364 print Dumper(@out);
365
366 }
367
368 1;

  ViewVC Help
Powered by ViewVC 1.1.26