/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 353 - (hide annotations)
Wed Jun 16 11:29:37 2004 UTC (19 years, 9 months ago) by dpavlin
Original Path: trunk2/lib/WebPac.pm
File size: 6386 byte(s)
varous clenups

1 dpavlin 352 package WebPac;
2    
3     use Carp;
4 dpavlin 353 use Text::Iconv;
5     use Config::IniFiles;
6 dpavlin 352
7     =head1 NAME
8    
9     WebPac - base class for WebPac
10    
11     =head1 DESCRIPTION
12    
13 dpavlin 353 This module implements methods used by WebPac.
14 dpavlin 352
15     =head1 METHODS
16    
17     =head2 new
18    
19     This will create new instance of WebPac using configuration specified by C<config_file>.
20    
21     my $webpac = new WebPac(
22     config_file => 'name.conf',
23     [code_page => 'ISO-8859-2',]
24     );
25    
26     Default C<code_page> is C<ISO-8859-2>.
27    
28 dpavlin 353 It will also read configuration files
29     C<global.conf> (used by indexer and Web font-end)
30     and configuration file specified by C<config_file>
31     which describes databases to be indexed.
32    
33 dpavlin 352 =cut
34    
35     sub new {
36     my $class = shift;
37     my $self = {@_};
38     bless($self, $class);
39    
40     # fill in default values
41     # output codepage
42     $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
43    
44 dpavlin 353 #
45     # read global.conf
46     #
47 dpavlin 352
48     $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
49    
50     # read global config parametars
51     foreach my $var (qw(
52     dbi_dbd
53     dbi_dsn
54     dbi_user
55     dbi_passwd
56     show_progress
57     my_unac_filter
58     )) {
59     $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
60     }
61    
62 dpavlin 353 #
63     # read indexer config file
64     #
65 dpavlin 352
66     $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
67    
68     # read global config parametars
69     foreach my $var (qw(
70     dbi_dbd
71     dbi_dsn
72     dbi_user
73     dbi_passwd
74     show_progress
75     my_unac_filter
76     )) {
77     $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
78     }
79    
80     return $self;
81     }
82    
83     =head2 open_isis
84    
85     Open CDS/ISIS database using OpenIsis module and read all records to memory.
86    
87     $webpac->open_isis(
88     filename => '/data/ISIS/ISIS',
89     code_page => '852',
90     limit_mfn => '500',
91     lookup => [ ... ],
92     );
93    
94     By default, ISIS code page is assumed to be C<852>.
95    
96 dpavlin 353 If optional parametar C<limit_mfn> is set, it will read just 500 records
97     from database in example above.
98 dpavlin 352
99     Returns number of last record read into memory (size of database, really).
100    
101     C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
102     C<val>. Optional parametar C<eval> is perl code to evaluate before storing
103     value in index.
104    
105     lookup => [
106     { 'key' => 'd:v900', 'val' => 'v250^a' },
107     { 'eval' => '"v901^a" eq "Podruèje"',
108     'key' => 'pa:v561^4:v562^4:v461^1',
109     'val' => 'v900' },
110     ]
111    
112     =cut
113    
114     sub open_isis {
115     my $self = shift;
116     my $arg = {@_};
117    
118     croak "need filename" if (! $arg->{'filename'});
119     my $code_page = $arg->{'code_page'} || '852';
120    
121 dpavlin 353 use OpenIsis;
122    
123 dpavlin 352 #$self->{'isis_code_page'} = $code_page;
124    
125     # create Text::Iconv object
126     my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
127    
128     my $isis_db = OpenIsis::open($arg->{'filename'});
129    
130     my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
131    
132     # read database
133     for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
134    
135     # read record
136     my $row = OpenIsis::read( $isis_db, $mfn );
137     foreach my $k (keys %{$row}) {
138     if ($k ne "mfn") {
139     foreach my $l (@{$row->{$k}}) {
140     $l = $cp->convert($l);
141     # has subfields?
142     my $val;
143     if ($l =~ m/\^/) {
144     foreach my $t (split(/\^/,$l)) {
145     next if (! $t);
146     $val->{substr($t,0,1)} = substr($t,1);
147     }
148     } else {
149     $val = $l;
150     }
151    
152     push @{$self->{'data'}->{$mfn}->{$k}}, $val;
153     }
154     }
155    
156     }
157    
158     # create lookup
159    
160     foreach my $i (@{$arg->{lookup}}) {
161     my $rec = $self->{'data'}->{$mfn};
162     if ($i->{'eval'}) {
163     my $eval = $self->fill_in($rec,$i->{'eval'});
164     my $key = $self->fill_in($rec,$i->{'key'});
165     my @val = $self->fill_in($rec,$i->{'val'});
166     if ($key && @val && eval $eval) {
167     push @{$self->{'lookup'}->{$key}}, @val;
168     }
169     } else {
170     my $key = $self->fill_in($rec,$i->{'key'});
171     my @val = $self->fill_in($rec,$i->{'val'});
172     if ($key && @val) {
173     push @{$self->{'lookup'}->{$key}}, @val;
174     }
175     }
176     }
177     }
178    
179     # store max mfn and return it.
180     return $self->{'max_mfn'} = $maxmfn;
181     }
182    
183     =head2 fill_in
184    
185     Workhourse of all: takes record from in-memory structure of database and
186     strings with placeholders and returns string or array of with substituted
187     values from record.
188    
189     $webpac->fill_in($rec,'v250^a');
190    
191     Optional argument is ordinal number for repeatable fields. By default,
192 dpavlin 353 it's assume to be first repeatable field (fields are perl array, so first
193     element is 0).
194     Following example will read second value from repeatable field.
195 dpavlin 352
196 dpavlin 353 $webpac->fill_in($rec,'Title: v250^a',1);
197    
198     This function B<does not> perform parsing of format to inteligenty skip
199     delimiters before fields which aren't used.
200    
201 dpavlin 352 =cut
202    
203     sub fill_in {
204     my $self = shift;
205    
206     my $rec = shift || confess "need data record";
207     my $format = shift || confess "need format to parse";
208     # iteration (for repeatable fields)
209     my $i = shift || 0;
210    
211     # FIXME remove for speedup?
212 dpavlin 353 if ($rec !~ /HASH/o) {
213 dpavlin 352 confess("need HASH as first argument!");
214     }
215    
216     my $found = 0;
217    
218     # get field with subfield
219     sub get_sf {
220     my ($found,$rec,$f,$sf,$i) = @_;
221     if ($$rec->{$f} && $$rec->{$f}->[$i]->{$sf}) {
222     $$found++;
223     return $$rec->{$f}->[$i]->{$sf};
224     } else {
225     return '';
226     }
227     }
228    
229     # get field (without subfield)
230     sub get_nosf {
231     my ($found,$rec,$f,$i) = @_;
232     if ($$rec->{$f} && $$rec->{$f}->[$i]) {
233     $$found++;
234     return $$rec->{$f}->[$i];
235     } else {
236     return '';
237     }
238     }
239    
240     # do actual replacement of placeholders
241     $format =~ s/v(\d+)\^(\w)/get_sf(\$found,\$rec,$1,$2,$i)/ges;
242     $format =~ s/v(\d+)/get_nosf(\$found,\$rec,$1,$i)/ges;
243    
244 dpavlin 353 if ($found) {
245     # do we have lookups?
246     if ($format =~ /\[[^\[\]]+\]/o) {
247     return $self->lookup($format);
248     } else {
249     return $format;
250     }
251 dpavlin 352 } else {
252     return;
253     }
254     }
255    
256     =head2 lookup
257    
258     This function will perform lookups on format supplied to it.
259    
260     my $txt = $self->lookup('[v900]');
261    
262     =cut
263    
264     sub lookup {
265     my $self = shift;
266    
267     my $tmp = shift || confess "need format";
268    
269 dpavlin 353 if ($tmp =~ /\[[^\[\]]+\]/o) {
270 dpavlin 352 my @in = ( $tmp );
271 dpavlin 353 #print "##lookup $tmp\n";
272 dpavlin 352 my @out;
273     while (my $f = shift @in) {
274     if ($f =~ /\[([^\[\]]+)\]/) {
275     my $k = $1;
276     if ($self->{'lookup'}->{$k}) {
277 dpavlin 353 #print "## lookup key = $k\n";
278 dpavlin 352 foreach my $nv (@{$self->{'lookup'}->{$k}}) {
279     my $tmp2 = $f;
280     $tmp2 =~ s/\[$k\]/$nv/g;
281     push @in, $tmp2;
282 dpavlin 353 #print "## lookup in => $tmp2\n";
283 dpavlin 352 }
284     } else {
285     undef $f;
286     }
287     } elsif ($f) {
288     push @out, $f;
289 dpavlin 353 #print "## lookup out => $f\n";
290 dpavlin 352 }
291     }
292     return @out;
293     } else {
294     return $tmp;
295     }
296     }
297    
298     1;

  ViewVC Help
Powered by ViewVC 1.1.26