3 |
use Carp; |
use Carp; |
4 |
use Text::Iconv; |
use Text::Iconv; |
5 |
use Config::IniFiles; |
use Config::IniFiles; |
6 |
|
use XML::Simple; |
7 |
|
|
8 |
use Data::Dumper; |
use Data::Dumper; |
9 |
|
|
35 |
|
|
36 |
=cut |
=cut |
37 |
|
|
38 |
|
# mapping between data type and tag which specify |
39 |
|
# format in XML file |
40 |
|
my %type2tag = ( |
41 |
|
'isis' => 'isis', |
42 |
|
# 'excel' => 'column', |
43 |
|
# 'marc' => 'marc', |
44 |
|
# 'feed' => 'feed' |
45 |
|
); |
46 |
|
|
47 |
sub new { |
sub new { |
48 |
my $class = shift; |
my $class = shift; |
49 |
my $self = {@_}; |
my $self = {@_}; |
77 |
|
|
78 |
$self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'"; |
$self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'"; |
79 |
|
|
|
# read global config parametars |
|
|
foreach my $var (qw( |
|
|
dbi_dbd |
|
|
dbi_dsn |
|
|
dbi_user |
|
|
dbi_passwd |
|
|
show_progress |
|
|
my_unac_filter |
|
|
)) { |
|
|
$self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var); |
|
|
} |
|
|
|
|
80 |
return $self; |
return $self; |
81 |
} |
} |
82 |
|
|
125 |
# create Text::Iconv object |
# create Text::Iconv object |
126 |
my $cp = Text::Iconv->new($code_page,$self->{'code_page'}); |
my $cp = Text::Iconv->new($code_page,$self->{'code_page'}); |
127 |
|
|
128 |
|
print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'}); |
129 |
|
|
130 |
my $isis_db = OpenIsis::open($arg->{'filename'}); |
my $isis_db = OpenIsis::open($arg->{'filename'}); |
131 |
|
|
132 |
my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1; |
my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1; |
133 |
|
|
134 |
$maxmfn = $self->{limit_mfn} if ($self->{limit_mfn}); |
$maxmfn = $self->{limit_mfn} if ($self->{limit_mfn}); |
135 |
|
|
136 |
|
print STDERR "processing $maxmfn records...\n" if ($self->{'debug'}); |
137 |
|
|
138 |
# read database |
# read database |
139 |
for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { |
for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { |
140 |
|
|
167 |
|
|
168 |
} |
} |
169 |
|
|
170 |
|
$self->{'current_mfn'} = 1; |
171 |
|
|
172 |
# store max mfn and return it. |
# store max mfn and return it. |
173 |
return $self->{'max_mfn'} = $maxmfn; |
return $self->{'max_mfn'} = $maxmfn; |
174 |
} |
} |
175 |
|
|
176 |
|
=head2 fetch_rec |
177 |
|
|
178 |
|
Fetch next record from database. It will also display progress bar (once |
179 |
|
it's implemented, that is). |
180 |
|
|
181 |
|
my $rec = $webpac->fetch_rec; |
182 |
|
|
183 |
|
=cut |
184 |
|
|
185 |
|
sub fetch_rec { |
186 |
|
my $self = shift; |
187 |
|
|
188 |
|
my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!"; |
189 |
|
|
190 |
|
if ($mfn > $self->{'max_mfn'}) { |
191 |
|
$self->{'current_mfn'} = $self->{'max_mfn'}; |
192 |
|
return; |
193 |
|
} |
194 |
|
|
195 |
|
return $self->{'data'}->{$mfn}; |
196 |
|
} |
197 |
|
|
198 |
|
=head2 open_import_xml |
199 |
|
|
200 |
|
Read file from C<import_xml/> directory and parse it. |
201 |
|
|
202 |
|
$webpac->open_import_xml(type => 'isis'); |
203 |
|
|
204 |
|
=cut |
205 |
|
|
206 |
|
sub open_import_xml { |
207 |
|
my $self = shift; |
208 |
|
|
209 |
|
my $arg = {@_}; |
210 |
|
confess "need type to load file from import_xml/" if (! $arg->{'type'}); |
211 |
|
|
212 |
|
my $type = $arg->{'type'}; |
213 |
|
|
214 |
|
my $type_base = $type; |
215 |
|
$type_base =~ s/_.*$//g; |
216 |
|
|
217 |
|
my $f = "./import_xml/$type.xml"; |
218 |
|
confess "import_xml file '$f' doesn't exist!" if (! -e "$f"); |
219 |
|
|
220 |
|
print STDERR "reading '$f'\n" if ($self->{'debug'}); |
221 |
|
|
222 |
|
$self->{'import_xml'} = XMLin($f, |
223 |
|
ForceArray => [ $type2tag{$type_base}, 'config', 'format' ], |
224 |
|
ForceContent => 1 |
225 |
|
); |
226 |
|
|
227 |
|
print Dumper($self->{'import_xml'}); |
228 |
|
|
229 |
|
} |
230 |
|
|
231 |
=head2 create_lookup |
=head2 create_lookup |
232 |
|
|
233 |
Create lookup from record using lookup definition. |
Create lookup from record using lookup definition. |
327 |
|
|
328 |
my $found = 0; |
my $found = 0; |
329 |
|
|
330 |
|
my $eval_code; |
331 |
|
# remove eval{...} from beginning |
332 |
|
$eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s); |
333 |
|
|
334 |
# do actual replacement of placeholders |
# do actual replacement of placeholders |
335 |
$format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges; |
$format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges; |
336 |
|
|
337 |
if ($found) { |
if ($found) { |
338 |
|
if ($eval_code) { |
339 |
|
my $eval = $self->fill_in($rec,$eval_code,$i); |
340 |
|
return if (! eval $eval); |
341 |
|
} |
342 |
# do we have lookups? |
# do we have lookups? |
343 |
if ($format =~ /\[[^\[\]]+\]/o) { |
if ($format =~ /\[[^\[\]]+\]/o) { |
344 |
return $self->lookup($format); |
return $self->lookup($format); |
422 |
my $prefix; |
my $prefix; |
423 |
my $all_found=0; |
my $all_found=0; |
424 |
|
|
425 |
print "## $format\n"; |
#print "## $format\n"; |
426 |
while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) { |
while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) { |
427 |
print "## [ $1 | $2 | $3 ] $format\n"; |
#print "## [ $1 | $2 | $3 ] $format\n"; |
428 |
|
|
429 |
my $del = $1 || ''; |
my $del = $1 || ''; |
430 |
$prefix ||= $del; |
$prefix ||= $del if ($all_found == 0); |
431 |
|
|
432 |
my $found = 0; |
my $found = 0; |
433 |
my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found); |
my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found); |
441 |
|
|
442 |
return if (! $all_found); |
return if (! $all_found); |
443 |
|
|
|
print Dumper($prefix, \@out); |
|
|
|
|
444 |
my $out = join('',@out) . $format; |
my $out = join('',@out) . $format; |
445 |
|
|
446 |
# add prefix if not there |
# add prefix if not there |
447 |
$out = $prefix . $out if ($out !~ m/^\Q$prefix\E/); |
$out = $prefix . $out if ($out !~ m/^\Q$prefix\E/); |
448 |
|
|
449 |
|
if ($eval_code) { |
450 |
|
my $eval = $self->fill_in($rec,$eval_code,$i); |
451 |
|
return if (! eval $eval); |
452 |
|
} |
453 |
|
|
454 |
return $out; |
return $out; |
455 |
} |
} |
456 |
|
|