/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 352 - (show annotations)
Tue Jun 15 22:40:07 2004 UTC (15 years, 7 months ago) by dpavlin
Original Path: trunk2/lib/WebPac.pm
File size: 5991 byte(s)
Object-orineted design re-implementation: simple field substitution and
lookups are working well.
Added some documentation about new features.

1 package WebPac;
2
3 use Carp;
4
5 =head1 NAME
6
7 WebPac - base class for WebPac
8
9 =head1 DESCRIPTION
10
11 This class does basic thing for WebPac.
12
13 =head1 METHODS
14
15 =head2 new
16
17 This will create new instance of WebPac using configuration specified by C<config_file>.
18
19 my $webpac = new WebPac(
20 config_file => 'name.conf',
21 [code_page => 'ISO-8859-2',]
22 );
23
24 Default C<code_page> is C<ISO-8859-2>.
25
26 =cut
27
28 sub new {
29 my $class = shift;
30 my $self = {@_};
31 bless($self, $class);
32
33 # fill in default values
34 # output codepage
35 $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
36
37 return $self;
38 }
39
40 =head2 read_global_config
41
42 Read global configuration (used by indexer and Web font-end)
43
44 =cut
45
46 sub read_global_config {
47 my $self = shift;
48
49 $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
50
51 # read global config parametars
52 foreach my $var (qw(
53 dbi_dbd
54 dbi_dsn
55 dbi_user
56 dbi_passwd
57 show_progress
58 my_unac_filter
59 )) {
60 $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
61 }
62
63 return $self;
64 }
65
66 =head2 read_indexer_config
67
68 Read indexer configuration (specify databases, types etc.)
69
70 =cut
71
72 sub read_indexer_config {
73 my $self = shift;
74
75 $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
76
77 # read global config parametars
78 foreach my $var (qw(
79 dbi_dbd
80 dbi_dsn
81 dbi_user
82 dbi_passwd
83 show_progress
84 my_unac_filter
85 )) {
86 $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
87 }
88
89 return $self;
90 }
91
92 =head2 open_isis
93
94 Open CDS/ISIS database using OpenIsis module and read all records to memory.
95
96 $webpac->open_isis(
97 filename => '/data/ISIS/ISIS',
98 code_page => '852',
99 limit_mfn => '500',
100 lookup => [ ... ],
101 );
102
103 By default, ISIS code page is assumed to be C<852>.
104
105 If C<limit_mfn> is set, it will read just 500 records from
106 database in example above.
107
108 Returns number of last record read into memory (size of database, really).
109
110 C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
111 C<val>. Optional parametar C<eval> is perl code to evaluate before storing
112 value in index.
113
114 lookup => [
115 { 'key' => 'd:v900', 'val' => 'v250^a' },
116 { 'eval' => '"v901^a" eq "Podruèje"',
117 'key' => 'pa:v561^4:v562^4:v461^1',
118 'val' => 'v900' },
119 ]
120
121 =cut
122
123 sub open_isis {
124 my $self = shift;
125 my $arg = {@_};
126
127 croak "need filename" if (! $arg->{'filename'});
128 my $code_page = $arg->{'code_page'} || '852';
129
130 #$self->{'isis_code_page'} = $code_page;
131
132 # create Text::Iconv object
133 my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
134
135 my $isis_db = OpenIsis::open($arg->{'filename'});
136
137 my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
138
139 # read database
140 for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
141
142 # read record
143 my $row = OpenIsis::read( $isis_db, $mfn );
144 foreach my $k (keys %{$row}) {
145 if ($k ne "mfn") {
146 foreach my $l (@{$row->{$k}}) {
147 $l = $cp->convert($l);
148 # has subfields?
149 my $val;
150 if ($l =~ m/\^/) {
151 foreach my $t (split(/\^/,$l)) {
152 next if (! $t);
153 $val->{substr($t,0,1)} = substr($t,1);
154 }
155 } else {
156 $val = $l;
157 }
158
159 push @{$self->{'data'}->{$mfn}->{$k}}, $val;
160 }
161 }
162
163 }
164
165 # create lookup
166
167 foreach my $i (@{$arg->{lookup}}) {
168 my $rec = $self->{'data'}->{$mfn};
169 if ($i->{'eval'}) {
170 my $eval = $self->fill_in($rec,$i->{'eval'});
171 my $key = $self->fill_in($rec,$i->{'key'});
172 my @val = $self->fill_in($rec,$i->{'val'});
173 if ($key && @val && eval $eval) {
174 push @{$self->{'lookup'}->{$key}}, @val;
175 }
176 } else {
177 my $key = $self->fill_in($rec,$i->{'key'});
178 my @val = $self->fill_in($rec,$i->{'val'});
179 if ($key && @val) {
180 push @{$self->{'lookup'}->{$key}}, @val;
181 }
182 }
183 }
184 }
185
186 # store max mfn and return it.
187 return $self->{'max_mfn'} = $maxmfn;
188 }
189
190 =head2 fill_in
191
192 Workhourse of all: takes record from in-memory structure of database and
193 strings with placeholders and returns string or array of with substituted
194 values from record.
195
196 $webpac->fill_in($rec,'v250^a');
197
198 Optional argument is ordinal number for repeatable fields. By default,
199 it's assume to be first repeatable field.
200
201 =cut
202
203 sub fill_in {
204 my $self = shift;
205
206 my $rec = shift || confess "need data record";
207 my $format = shift || confess "need format to parse";
208 # iteration (for repeatable fields)
209 my $i = shift || 0;
210
211 # FIXME remove for speedup?
212 if ($rec !~ /HASH/) {
213 confess("need HASH as first argument!");
214 }
215
216 my $found = 0;
217
218 # get field with subfield
219 sub get_sf {
220 my ($found,$rec,$f,$sf,$i) = @_;
221 if ($$rec->{$f} && $$rec->{$f}->[$i]->{$sf}) {
222 $$found++;
223 return $$rec->{$f}->[$i]->{$sf};
224 } else {
225 return '';
226 }
227 }
228
229 # get field (without subfield)
230 sub get_nosf {
231 my ($found,$rec,$f,$i) = @_;
232 if ($$rec->{$f} && $$rec->{$f}->[$i]) {
233 $$found++;
234 return $$rec->{$f}->[$i];
235 } else {
236 return '';
237 }
238 }
239
240 # do actual replacement of placeholders
241 $format =~ s/v(\d+)\^(\w)/get_sf(\$found,\$rec,$1,$2,$i)/ges;
242 $format =~ s/v(\d+)/get_nosf(\$found,\$rec,$1,$i)/ges;
243
244 if ($found) {
245 return $format;
246 } else {
247 return;
248 }
249 }
250
251 =head2 lookup
252
253 This function will perform lookups on format supplied to it.
254
255 my $txt = $self->lookup('[v900]');
256
257 =cut
258
259 sub lookup {
260 my $self = shift;
261
262 my $tmp = shift || confess "need format";
263
264 if ($tmp =~ /\[[^\[\]]+\]/) {
265 my @in = ( $tmp );
266 print "##lookup $tmp\n";
267 my @out;
268 while (my $f = shift @in) {
269 if ($f =~ /\[([^\[\]]+)\]/) {
270 my $k = $1;
271 if ($self->{'lookup'}->{$k}) {
272 print "## lookup key = $k\n";
273 foreach my $nv (@{$self->{'lookup'}->{$k}}) {
274 my $tmp2 = $f;
275 $tmp2 =~ s/\[$k\]/$nv/g;
276 push @in, $tmp2;
277 print "## lookup in => $tmp2\n";
278 }
279 } else {
280 undef $f;
281 }
282 } elsif ($f) {
283 push @out, $f;
284 print "## lookup out => $f\n";
285 }
286 }
287 return @out;
288 } else {
289 return $tmp;
290 }
291 }
292
293 1;

  ViewVC Help
Powered by ViewVC 1.1.26