/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 353 - (show annotations)
Wed Jun 16 11:29:37 2004 UTC (15 years, 7 months ago) by dpavlin
Original Path: trunk2/lib/WebPac.pm
File size: 6386 byte(s)
varous clenups

1 package WebPac;
2
3 use Carp;
4 use Text::Iconv;
5 use Config::IniFiles;
6
7 =head1 NAME
8
9 WebPac - base class for WebPac
10
11 =head1 DESCRIPTION
12
13 This module implements methods used by WebPac.
14
15 =head1 METHODS
16
17 =head2 new
18
19 This will create new instance of WebPac using configuration specified by C<config_file>.
20
21 my $webpac = new WebPac(
22 config_file => 'name.conf',
23 [code_page => 'ISO-8859-2',]
24 );
25
26 Default C<code_page> is C<ISO-8859-2>.
27
28 It will also read configuration files
29 C<global.conf> (used by indexer and Web font-end)
30 and configuration file specified by C<config_file>
31 which describes databases to be indexed.
32
33 =cut
34
35 sub new {
36 my $class = shift;
37 my $self = {@_};
38 bless($self, $class);
39
40 # fill in default values
41 # output codepage
42 $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
43
44 #
45 # read global.conf
46 #
47
48 $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
49
50 # read global config parametars
51 foreach my $var (qw(
52 dbi_dbd
53 dbi_dsn
54 dbi_user
55 dbi_passwd
56 show_progress
57 my_unac_filter
58 )) {
59 $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
60 }
61
62 #
63 # read indexer config file
64 #
65
66 $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
67
68 # read global config parametars
69 foreach my $var (qw(
70 dbi_dbd
71 dbi_dsn
72 dbi_user
73 dbi_passwd
74 show_progress
75 my_unac_filter
76 )) {
77 $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
78 }
79
80 return $self;
81 }
82
83 =head2 open_isis
84
85 Open CDS/ISIS database using OpenIsis module and read all records to memory.
86
87 $webpac->open_isis(
88 filename => '/data/ISIS/ISIS',
89 code_page => '852',
90 limit_mfn => '500',
91 lookup => [ ... ],
92 );
93
94 By default, ISIS code page is assumed to be C<852>.
95
96 If optional parametar C<limit_mfn> is set, it will read just 500 records
97 from database in example above.
98
99 Returns number of last record read into memory (size of database, really).
100
101 C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
102 C<val>. Optional parametar C<eval> is perl code to evaluate before storing
103 value in index.
104
105 lookup => [
106 { 'key' => 'd:v900', 'val' => 'v250^a' },
107 { 'eval' => '"v901^a" eq "Podruèje"',
108 'key' => 'pa:v561^4:v562^4:v461^1',
109 'val' => 'v900' },
110 ]
111
112 =cut
113
114 sub open_isis {
115 my $self = shift;
116 my $arg = {@_};
117
118 croak "need filename" if (! $arg->{'filename'});
119 my $code_page = $arg->{'code_page'} || '852';
120
121 use OpenIsis;
122
123 #$self->{'isis_code_page'} = $code_page;
124
125 # create Text::Iconv object
126 my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
127
128 my $isis_db = OpenIsis::open($arg->{'filename'});
129
130 my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
131
132 # read database
133 for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
134
135 # read record
136 my $row = OpenIsis::read( $isis_db, $mfn );
137 foreach my $k (keys %{$row}) {
138 if ($k ne "mfn") {
139 foreach my $l (@{$row->{$k}}) {
140 $l = $cp->convert($l);
141 # has subfields?
142 my $val;
143 if ($l =~ m/\^/) {
144 foreach my $t (split(/\^/,$l)) {
145 next if (! $t);
146 $val->{substr($t,0,1)} = substr($t,1);
147 }
148 } else {
149 $val = $l;
150 }
151
152 push @{$self->{'data'}->{$mfn}->{$k}}, $val;
153 }
154 }
155
156 }
157
158 # create lookup
159
160 foreach my $i (@{$arg->{lookup}}) {
161 my $rec = $self->{'data'}->{$mfn};
162 if ($i->{'eval'}) {
163 my $eval = $self->fill_in($rec,$i->{'eval'});
164 my $key = $self->fill_in($rec,$i->{'key'});
165 my @val = $self->fill_in($rec,$i->{'val'});
166 if ($key && @val && eval $eval) {
167 push @{$self->{'lookup'}->{$key}}, @val;
168 }
169 } else {
170 my $key = $self->fill_in($rec,$i->{'key'});
171 my @val = $self->fill_in($rec,$i->{'val'});
172 if ($key && @val) {
173 push @{$self->{'lookup'}->{$key}}, @val;
174 }
175 }
176 }
177 }
178
179 # store max mfn and return it.
180 return $self->{'max_mfn'} = $maxmfn;
181 }
182
183 =head2 fill_in
184
185 Workhourse of all: takes record from in-memory structure of database and
186 strings with placeholders and returns string or array of with substituted
187 values from record.
188
189 $webpac->fill_in($rec,'v250^a');
190
191 Optional argument is ordinal number for repeatable fields. By default,
192 it's assume to be first repeatable field (fields are perl array, so first
193 element is 0).
194 Following example will read second value from repeatable field.
195
196 $webpac->fill_in($rec,'Title: v250^a',1);
197
198 This function B<does not> perform parsing of format to inteligenty skip
199 delimiters before fields which aren't used.
200
201 =cut
202
203 sub fill_in {
204 my $self = shift;
205
206 my $rec = shift || confess "need data record";
207 my $format = shift || confess "need format to parse";
208 # iteration (for repeatable fields)
209 my $i = shift || 0;
210
211 # FIXME remove for speedup?
212 if ($rec !~ /HASH/o) {
213 confess("need HASH as first argument!");
214 }
215
216 my $found = 0;
217
218 # get field with subfield
219 sub get_sf {
220 my ($found,$rec,$f,$sf,$i) = @_;
221 if ($$rec->{$f} && $$rec->{$f}->[$i]->{$sf}) {
222 $$found++;
223 return $$rec->{$f}->[$i]->{$sf};
224 } else {
225 return '';
226 }
227 }
228
229 # get field (without subfield)
230 sub get_nosf {
231 my ($found,$rec,$f,$i) = @_;
232 if ($$rec->{$f} && $$rec->{$f}->[$i]) {
233 $$found++;
234 return $$rec->{$f}->[$i];
235 } else {
236 return '';
237 }
238 }
239
240 # do actual replacement of placeholders
241 $format =~ s/v(\d+)\^(\w)/get_sf(\$found,\$rec,$1,$2,$i)/ges;
242 $format =~ s/v(\d+)/get_nosf(\$found,\$rec,$1,$i)/ges;
243
244 if ($found) {
245 # do we have lookups?
246 if ($format =~ /\[[^\[\]]+\]/o) {
247 return $self->lookup($format);
248 } else {
249 return $format;
250 }
251 } else {
252 return;
253 }
254 }
255
256 =head2 lookup
257
258 This function will perform lookups on format supplied to it.
259
260 my $txt = $self->lookup('[v900]');
261
262 =cut
263
264 sub lookup {
265 my $self = shift;
266
267 my $tmp = shift || confess "need format";
268
269 if ($tmp =~ /\[[^\[\]]+\]/o) {
270 my @in = ( $tmp );
271 #print "##lookup $tmp\n";
272 my @out;
273 while (my $f = shift @in) {
274 if ($f =~ /\[([^\[\]]+)\]/) {
275 my $k = $1;
276 if ($self->{'lookup'}->{$k}) {
277 #print "## lookup key = $k\n";
278 foreach my $nv (@{$self->{'lookup'}->{$k}}) {
279 my $tmp2 = $f;
280 $tmp2 =~ s/\[$k\]/$nv/g;
281 push @in, $tmp2;
282 #print "## lookup in => $tmp2\n";
283 }
284 } else {
285 undef $f;
286 }
287 } elsif ($f) {
288 push @out, $f;
289 #print "## lookup out => $f\n";
290 }
291 }
292 return @out;
293 } else {
294 return $tmp;
295 }
296 }
297
298 1;

  ViewVC Help
Powered by ViewVC 1.1.26