/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 355 - (show annotations)
Wed Jun 16 11:41:50 2004 UTC (15 years, 7 months ago) by dpavlin
File size: 6675 byte(s)
added create_lookup

1 package WebPAC;
2
3 use Carp;
4 use Text::Iconv;
5 use Config::IniFiles;
6
7 =head1 NAME
8
9 WebPAC - base class for WebPAC
10
11 =head1 DESCRIPTION
12
13 This module implements methods used by WebPAC.
14
15 =head1 METHODS
16
17 =head2 new
18
19 This will create new instance of WebPAC using configuration specified by C<config_file>.
20
21 my $webpac = new WebPAC(
22 config_file => 'name.conf',
23 [code_page => 'ISO-8859-2',]
24 );
25
26 Default C<code_page> is C<ISO-8859-2>.
27
28 It will also read configuration files
29 C<global.conf> (used by indexer and Web font-end)
30 and configuration file specified by C<config_file>
31 which describes databases to be indexed.
32
33 =cut
34
35 sub new {
36 my $class = shift;
37 my $self = {@_};
38 bless($self, $class);
39
40 # fill in default values
41 # output codepage
42 $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
43
44 #
45 # read global.conf
46 #
47
48 $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
49
50 # read global config parametars
51 foreach my $var (qw(
52 dbi_dbd
53 dbi_dsn
54 dbi_user
55 dbi_passwd
56 show_progress
57 my_unac_filter
58 )) {
59 $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
60 }
61
62 #
63 # read indexer config file
64 #
65
66 $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
67
68 # read global config parametars
69 foreach my $var (qw(
70 dbi_dbd
71 dbi_dsn
72 dbi_user
73 dbi_passwd
74 show_progress
75 my_unac_filter
76 )) {
77 $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
78 }
79
80 return $self;
81 }
82
83 =head2 open_isis
84
85 Open CDS/ISIS database using OpenIsis module and read all records to memory.
86
87 $webpac->open_isis(
88 filename => '/data/ISIS/ISIS',
89 code_page => '852',
90 limit_mfn => '500',
91 lookup => [ ... ],
92 );
93
94 By default, ISIS code page is assumed to be C<852>.
95
96 If optional parametar C<limit_mfn> is set, it will read just 500 records
97 from database in example above.
98
99 Returns number of last record read into memory (size of database, really).
100
101 C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
102 C<val>. Optional parametar C<eval> is perl code to evaluate before storing
103 value in index.
104
105 lookup => [
106 { 'key' => 'd:v900', 'val' => 'v250^a' },
107 { 'eval' => '"v901^a" eq "Podruèje"',
108 'key' => 'pa:v561^4:v562^4:v461^1',
109 'val' => 'v900' },
110 ]
111
112 =cut
113
114 sub open_isis {
115 my $self = shift;
116 my $arg = {@_};
117
118 croak "need filename" if (! $arg->{'filename'});
119 my $code_page = $arg->{'code_page'} || '852';
120
121 use OpenIsis;
122
123 #$self->{'isis_code_page'} = $code_page;
124
125 # create Text::Iconv object
126 my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
127
128 my $isis_db = OpenIsis::open($arg->{'filename'});
129
130 my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
131
132 # read database
133 for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
134
135 # read record
136 my $row = OpenIsis::read( $isis_db, $mfn );
137 foreach my $k (keys %{$row}) {
138 if ($k ne "mfn") {
139 foreach my $l (@{$row->{$k}}) {
140 $l = $cp->convert($l);
141 # has subfields?
142 my $val;
143 if ($l =~ m/\^/) {
144 foreach my $t (split(/\^/,$l)) {
145 next if (! $t);
146 $val->{substr($t,0,1)} = substr($t,1);
147 }
148 } else {
149 $val = $l;
150 }
151
152 push @{$self->{'data'}->{$mfn}->{$k}}, $val;
153 }
154 }
155
156 }
157
158 # create lookup
159 my $rec = $self->{'data'}->{$mfn};
160 $self->create_lookup($rec, @{$arg->{'lookup'}});
161
162 }
163
164 # store max mfn and return it.
165 return $self->{'max_mfn'} = $maxmfn;
166 }
167
168 =head2 create_lookup
169
170 Create lookup from record using lookup definition.
171
172 =cut
173
174 sub create_lookup {
175 my $self = shift;
176
177 my $rec = shift || confess "need record to create lookup";
178 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
179
180 foreach my $i (@_) {
181 if ($i->{'eval'}) {
182 my $eval = $self->fill_in($rec,$i->{'eval'});
183 my $key = $self->fill_in($rec,$i->{'key'});
184 my @val = $self->fill_in($rec,$i->{'val'});
185 if ($key && @val && eval $eval) {
186 push @{$self->{'lookup'}->{$key}}, @val;
187 }
188 } else {
189 my $key = $self->fill_in($rec,$i->{'key'});
190 my @val = $self->fill_in($rec,$i->{'val'});
191 if ($key && @val) {
192 push @{$self->{'lookup'}->{$key}}, @val;
193 }
194 }
195 }
196 }
197
198 =head2 fill_in
199
200 Workhourse of all: takes record from in-memory structure of database and
201 strings with placeholders and returns string or array of with substituted
202 values from record.
203
204 $webpac->fill_in($rec,'v250^a');
205
206 Optional argument is ordinal number for repeatable fields. By default,
207 it's assume to be first repeatable field (fields are perl array, so first
208 element is 0).
209 Following example will read second value from repeatable field.
210
211 $webpac->fill_in($rec,'Title: v250^a',1);
212
213 This function B<does not> perform parsing of format to inteligenty skip
214 delimiters before fields which aren't used.
215
216 =cut
217
218 sub fill_in {
219 my $self = shift;
220
221 my $rec = shift || confess "need data record";
222 my $format = shift || confess "need format to parse";
223 # iteration (for repeatable fields)
224 my $i = shift || 0;
225
226 # FIXME remove for speedup?
227 confess("need HASH as first argument!") if ($rec !~ /HASH/o);
228
229 my $found = 0;
230
231 # get field with subfield
232 sub get_sf {
233 my ($found,$rec,$f,$sf,$i) = @_;
234 if ($$rec->{$f} && $$rec->{$f}->[$i]->{$sf}) {
235 $$found++;
236 return $$rec->{$f}->[$i]->{$sf};
237 } else {
238 return '';
239 }
240 }
241
242 # get field (without subfield)
243 sub get_nosf {
244 my ($found,$rec,$f,$i) = @_;
245 if ($$rec->{$f} && $$rec->{$f}->[$i]) {
246 $$found++;
247 return $$rec->{$f}->[$i];
248 } else {
249 return '';
250 }
251 }
252
253 # do actual replacement of placeholders
254 $format =~ s/v(\d+)\^(\w)/get_sf(\$found,\$rec,$1,$2,$i)/ges;
255 $format =~ s/v(\d+)/get_nosf(\$found,\$rec,$1,$i)/ges;
256
257 if ($found) {
258 # do we have lookups?
259 if ($format =~ /\[[^\[\]]+\]/o) {
260 return $self->lookup($format);
261 } else {
262 return $format;
263 }
264 } else {
265 return;
266 }
267 }
268
269 =head2 lookup
270
271 Perform lookups on format supplied to it.
272
273 my $txt = $self->lookup('[v900]');
274
275 Lookups can be nested (like C<[d:[a:[v900]]]>).
276
277 =cut
278
279 sub lookup {
280 my $self = shift;
281
282 my $tmp = shift || confess "need format";
283
284 if ($tmp =~ /\[[^\[\]]+\]/o) {
285 my @in = ( $tmp );
286 #print "##lookup $tmp\n";
287 my @out;
288 while (my $f = shift @in) {
289 if ($f =~ /\[([^\[\]]+)\]/) {
290 my $k = $1;
291 if ($self->{'lookup'}->{$k}) {
292 #print "## lookup key = $k\n";
293 foreach my $nv (@{$self->{'lookup'}->{$k}}) {
294 my $tmp2 = $f;
295 $tmp2 =~ s/\[$k\]/$nv/g;
296 push @in, $tmp2;
297 #print "## lookup in => $tmp2\n";
298 }
299 } else {
300 undef $f;
301 }
302 } elsif ($f) {
303 push @out, $f;
304 #print "## lookup out => $f\n";
305 }
306 }
307 return @out;
308 } else {
309 return $tmp;
310 }
311 }
312
313 1;

  ViewVC Help
Powered by ViewVC 1.1.26