/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 352 - (hide annotations)
Tue Jun 15 22:40:07 2004 UTC (19 years, 9 months ago) by dpavlin
Original Path: trunk2/lib/WebPac.pm
File size: 5991 byte(s)
Object-orineted design re-implementation: simple field substitution and
lookups are working well.
Added some documentation about new features.

1 dpavlin 352 package WebPac;
2    
3     use Carp;
4    
5     =head1 NAME
6    
7     WebPac - base class for WebPac
8    
9     =head1 DESCRIPTION
10    
11     This class does basic thing for WebPac.
12    
13     =head1 METHODS
14    
15     =head2 new
16    
17     This will create new instance of WebPac using configuration specified by C<config_file>.
18    
19     my $webpac = new WebPac(
20     config_file => 'name.conf',
21     [code_page => 'ISO-8859-2',]
22     );
23    
24     Default C<code_page> is C<ISO-8859-2>.
25    
26     =cut
27    
28     sub new {
29     my $class = shift;
30     my $self = {@_};
31     bless($self, $class);
32    
33     # fill in default values
34     # output codepage
35     $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
36    
37     return $self;
38     }
39    
40     =head2 read_global_config
41    
42     Read global configuration (used by indexer and Web font-end)
43    
44     =cut
45    
46     sub read_global_config {
47     my $self = shift;
48    
49     $self->{global_config_file} = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";
50    
51     # read global config parametars
52     foreach my $var (qw(
53     dbi_dbd
54     dbi_dsn
55     dbi_user
56     dbi_passwd
57     show_progress
58     my_unac_filter
59     )) {
60     $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
61     }
62    
63     return $self;
64     }
65    
66     =head2 read_indexer_config
67    
68     Read indexer configuration (specify databases, types etc.)
69    
70     =cut
71    
72     sub read_indexer_config {
73     my $self = shift;
74    
75     $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";
76    
77     # read global config parametars
78     foreach my $var (qw(
79     dbi_dbd
80     dbi_dsn
81     dbi_user
82     dbi_passwd
83     show_progress
84     my_unac_filter
85     )) {
86     $self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var);
87     }
88    
89     return $self;
90     }
91    
92     =head2 open_isis
93    
94     Open CDS/ISIS database using OpenIsis module and read all records to memory.
95    
96     $webpac->open_isis(
97     filename => '/data/ISIS/ISIS',
98     code_page => '852',
99     limit_mfn => '500',
100     lookup => [ ... ],
101     );
102    
103     By default, ISIS code page is assumed to be C<852>.
104    
105     If C<limit_mfn> is set, it will read just 500 records from
106     database in example above.
107    
108     Returns number of last record read into memory (size of database, really).
109    
110     C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
111     C<val>. Optional parametar C<eval> is perl code to evaluate before storing
112     value in index.
113    
114     lookup => [
115     { 'key' => 'd:v900', 'val' => 'v250^a' },
116     { 'eval' => '"v901^a" eq "Podruèje"',
117     'key' => 'pa:v561^4:v562^4:v461^1',
118     'val' => 'v900' },
119     ]
120    
121     =cut
122    
123     sub open_isis {
124     my $self = shift;
125     my $arg = {@_};
126    
127     croak "need filename" if (! $arg->{'filename'});
128     my $code_page = $arg->{'code_page'} || '852';
129    
130     #$self->{'isis_code_page'} = $code_page;
131    
132     # create Text::Iconv object
133     my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
134    
135     my $isis_db = OpenIsis::open($arg->{'filename'});
136    
137     my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
138    
139     # read database
140     for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
141    
142     # read record
143     my $row = OpenIsis::read( $isis_db, $mfn );
144     foreach my $k (keys %{$row}) {
145     if ($k ne "mfn") {
146     foreach my $l (@{$row->{$k}}) {
147     $l = $cp->convert($l);
148     # has subfields?
149     my $val;
150     if ($l =~ m/\^/) {
151     foreach my $t (split(/\^/,$l)) {
152     next if (! $t);
153     $val->{substr($t,0,1)} = substr($t,1);
154     }
155     } else {
156     $val = $l;
157     }
158    
159     push @{$self->{'data'}->{$mfn}->{$k}}, $val;
160     }
161     }
162    
163     }
164    
165     # create lookup
166    
167     foreach my $i (@{$arg->{lookup}}) {
168     my $rec = $self->{'data'}->{$mfn};
169     if ($i->{'eval'}) {
170     my $eval = $self->fill_in($rec,$i->{'eval'});
171     my $key = $self->fill_in($rec,$i->{'key'});
172     my @val = $self->fill_in($rec,$i->{'val'});
173     if ($key && @val && eval $eval) {
174     push @{$self->{'lookup'}->{$key}}, @val;
175     }
176     } else {
177     my $key = $self->fill_in($rec,$i->{'key'});
178     my @val = $self->fill_in($rec,$i->{'val'});
179     if ($key && @val) {
180     push @{$self->{'lookup'}->{$key}}, @val;
181     }
182     }
183     }
184     }
185    
186     # store max mfn and return it.
187     return $self->{'max_mfn'} = $maxmfn;
188     }
189    
190     =head2 fill_in
191    
192     Workhourse of all: takes record from in-memory structure of database and
193     strings with placeholders and returns string or array of with substituted
194     values from record.
195    
196     $webpac->fill_in($rec,'v250^a');
197    
198     Optional argument is ordinal number for repeatable fields. By default,
199     it's assume to be first repeatable field.
200    
201     =cut
202    
203     sub fill_in {
204     my $self = shift;
205    
206     my $rec = shift || confess "need data record";
207     my $format = shift || confess "need format to parse";
208     # iteration (for repeatable fields)
209     my $i = shift || 0;
210    
211     # FIXME remove for speedup?
212     if ($rec !~ /HASH/) {
213     confess("need HASH as first argument!");
214     }
215    
216     my $found = 0;
217    
218     # get field with subfield
219     sub get_sf {
220     my ($found,$rec,$f,$sf,$i) = @_;
221     if ($$rec->{$f} && $$rec->{$f}->[$i]->{$sf}) {
222     $$found++;
223     return $$rec->{$f}->[$i]->{$sf};
224     } else {
225     return '';
226     }
227     }
228    
229     # get field (without subfield)
230     sub get_nosf {
231     my ($found,$rec,$f,$i) = @_;
232     if ($$rec->{$f} && $$rec->{$f}->[$i]) {
233     $$found++;
234     return $$rec->{$f}->[$i];
235     } else {
236     return '';
237     }
238     }
239    
240     # do actual replacement of placeholders
241     $format =~ s/v(\d+)\^(\w)/get_sf(\$found,\$rec,$1,$2,$i)/ges;
242     $format =~ s/v(\d+)/get_nosf(\$found,\$rec,$1,$i)/ges;
243    
244     if ($found) {
245     return $format;
246     } else {
247     return;
248     }
249     }
250    
251     =head2 lookup
252    
253     This function will perform lookups on format supplied to it.
254    
255     my $txt = $self->lookup('[v900]');
256    
257     =cut
258    
259     sub lookup {
260     my $self = shift;
261    
262     my $tmp = shift || confess "need format";
263    
264     if ($tmp =~ /\[[^\[\]]+\]/) {
265     my @in = ( $tmp );
266     print "##lookup $tmp\n";
267     my @out;
268     while (my $f = shift @in) {
269     if ($f =~ /\[([^\[\]]+)\]/) {
270     my $k = $1;
271     if ($self->{'lookup'}->{$k}) {
272     print "## lookup key = $k\n";
273     foreach my $nv (@{$self->{'lookup'}->{$k}}) {
274     my $tmp2 = $f;
275     $tmp2 =~ s/\[$k\]/$nv/g;
276     push @in, $tmp2;
277     print "## lookup in => $tmp2\n";
278     }
279     } else {
280     undef $f;
281     }
282     } elsif ($f) {
283     push @out, $f;
284     print "## lookup out => $f\n";
285     }
286     }
287     return @out;
288     } else {
289     return $tmp;
290     }
291     }
292    
293     1;

  ViewVC Help
Powered by ViewVC 1.1.26