29 |
|
|
30 |
=head2 new |
=head2 new |
31 |
|
|
32 |
This will create new instance of WebPAC using configuration specified by C<config_file>. |
Create new instance of WebPAC using configuration specified by C<config_file>. |
33 |
|
|
34 |
my $webpac = new WebPAC( |
my $webpac = new WebPAC( |
35 |
config_file => 'name.conf', |
config_file => 'name.conf', |
36 |
[code_page => 'ISO-8859-2',] |
[code_page => 'ISO-8859-2',] |
37 |
|
[low_mem => 1,] |
38 |
); |
); |
39 |
|
|
40 |
Default C<code_page> is C<ISO-8859-2>. |
Default C<code_page> is C<ISO-8859-2>. |
41 |
|
|
42 |
It will also read configuration files |
This method will also read configuration files |
43 |
C<global.conf> (used by indexer and Web font-end) |
C<global.conf> (used by indexer and Web font-end) |
44 |
and configuration file specified by C<config_file> |
and configuration file specified by C<config_file> |
45 |
which describes databases to be indexed. |
which describes databases to be indexed. |
46 |
|
|
47 |
|
C<low_mem> options is double-edged sword. If enabled, WebPAC |
48 |
|
will run on memory constraint machines (which doesn't have enough |
49 |
|
physical RAM to create memory structure for whole ISIS database). |
50 |
|
|
51 |
|
If your machine has 512Mb or more and database is around 10000 records, |
52 |
|
memory shouldn't be an issue. If you don't have enough physical RAM, you |
53 |
|
might consider using virtual memory (if your operating system is handling it |
54 |
|
well, like on FreeBSD or Linux) instead of dropping to L<DBD::Deep> to handle |
55 |
|
parsed structure of ISIS database. |
56 |
|
|
57 |
|
However, when WebPAC is running on desktop machines (or laptops :-), it's |
58 |
|
highly undesireable for system to start swapping. Using C<low_mem> option can |
59 |
|
reduce WecPAC memory usage to 16Mb for same database with lookup fields and |
60 |
|
sorted indexes which stay in RAM. Performance will suffer, but memory usage |
61 |
|
will really be minimal. It might be also more confortable to run WebPAC reniced |
62 |
|
on those machines. |
63 |
|
|
64 |
=cut |
=cut |
65 |
|
|
66 |
# mapping between data type and tag which specify |
# mapping between data type and tag which specify |
124 |
EVAL_PERL => 1, |
EVAL_PERL => 1, |
125 |
); |
); |
126 |
|
|
127 |
|
# running with low_mem flag? well, use DBM::Deep then. |
128 |
|
if ($self->{'low_mem'}) { |
129 |
|
$log->info("running with low_mem which impacts performance (<64 Mb memory usage)"); |
130 |
|
|
131 |
|
my $db_file = "data.db"; |
132 |
|
|
133 |
|
if (-e $db_file) { |
134 |
|
unlink $db_file or $log->logdie("can't remove '$db_file' from last run"); |
135 |
|
$log->debug("removed '$db_file' from last run"); |
136 |
|
} |
137 |
|
|
138 |
|
use DBM::Deep; |
139 |
|
|
140 |
|
my $db = new DBM::Deep $db_file; |
141 |
|
|
142 |
|
$log->logdie("DBM::Deep error: $!") unless ($db); |
143 |
|
|
144 |
|
if ($db->error()) { |
145 |
|
$log->logdie("can't open '$db_file' under low_mem: ",$db->error()); |
146 |
|
} else { |
147 |
|
$log->debug("using file $db_file for DBM::Deep"); |
148 |
|
} |
149 |
|
|
150 |
|
$self->{'db'} = $db; |
151 |
|
} |
152 |
|
|
153 |
return $self; |
return $self; |
154 |
} |
} |
155 |
|
|
193 |
$log->logcroak("need filename") if (! $arg->{'filename'}); |
$log->logcroak("need filename") if (! $arg->{'filename'}); |
194 |
my $code_page = $arg->{'code_page'} || '852'; |
my $code_page = $arg->{'code_page'} || '852'; |
195 |
|
|
196 |
|
# store data in object |
197 |
|
$self->{'isis_filename'} = $arg->{'filename'}; |
198 |
|
$self->{'isis_code_page'} = $code_page; |
199 |
|
|
200 |
use OpenIsis; |
use OpenIsis; |
201 |
|
|
202 |
#$self->{'isis_code_page'} = $code_page; |
#$self->{'isis_code_page'} = $code_page; |
205 |
my $cp = Text::Iconv->new($code_page,$self->{'code_page'}); |
my $cp = Text::Iconv->new($code_page,$self->{'code_page'}); |
206 |
|
|
207 |
$log->info("reading ISIS database '",$arg->{'filename'},"'"); |
$log->info("reading ISIS database '",$arg->{'filename'},"'"); |
208 |
|
$log->debug("isis code page: $code_page"); |
209 |
|
|
210 |
my $isis_db = OpenIsis::open($arg->{'filename'}); |
my $isis_db = OpenIsis::open($arg->{'filename'}); |
211 |
|
|
218 |
# read database |
# read database |
219 |
for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { |
for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { |
220 |
|
|
221 |
|
|
222 |
|
$log->debug("mfn: $mfn\n"); |
223 |
|
|
224 |
|
my $rec; |
225 |
|
|
226 |
# read record |
# read record |
227 |
my $row = OpenIsis::read( $isis_db, $mfn ); |
my $row = OpenIsis::read( $isis_db, $mfn ); |
228 |
foreach my $k (keys %{$row}) { |
foreach my $k (keys %{$row}) { |
240 |
$val = $l; |
$val = $l; |
241 |
} |
} |
242 |
|
|
243 |
push @{$self->{'data'}->{$mfn}->{$k}}, $val; |
push @{$rec->{$k}}, $val; |
244 |
} |
} |
245 |
} else { |
} else { |
246 |
push @{$self->{'data'}->{$mfn}->{'000'}}, $mfn; |
push @{$rec->{'000'}}, $mfn; |
247 |
} |
} |
248 |
|
|
249 |
} |
} |
250 |
|
|
251 |
|
$log->confess("record $mfn empty?") unless ($rec); |
252 |
|
|
253 |
|
# store |
254 |
|
if ($self->{'low_mem'}) { |
255 |
|
$self->{'db'}->put($mfn, $rec); |
256 |
|
} else { |
257 |
|
$self->{'data'}->{$mfn} = $rec; |
258 |
|
} |
259 |
|
|
260 |
# create lookup |
# create lookup |
|
my $rec = $self->{'data'}->{$mfn}; |
|
261 |
$self->create_lookup($rec, @{$arg->{'lookup'}}); |
$self->create_lookup($rec, @{$arg->{'lookup'}}); |
262 |
|
|
263 |
$self->progress_bar($mfn,$maxmfn); |
$self->progress_bar($mfn,$maxmfn); |
267 |
$self->{'current_mfn'} = 1; |
$self->{'current_mfn'} = 1; |
268 |
$self->{'last_pcnt'} = 0; |
$self->{'last_pcnt'} = 0; |
269 |
|
|
270 |
|
$log->debug("max mfn: $maxmfn"); |
271 |
|
|
272 |
# store max mfn and return it. |
# store max mfn and return it. |
273 |
return $self->{'max_mfn'} = $maxmfn; |
return $self->{'max_mfn'} = $maxmfn; |
274 |
} |
} |
297 |
|
|
298 |
$self->progress_bar($mfn,$self->{'max_mfn'}); |
$self->progress_bar($mfn,$self->{'max_mfn'}); |
299 |
|
|
300 |
return $self->{'data'}->{$mfn}; |
if ($self->{'low_mem'}) { |
301 |
|
return $self->{'db'}->get($mfn); |
302 |
|
} else { |
303 |
|
return $self->{'data'}->{$mfn}; |
304 |
|
} |
305 |
} |
} |
306 |
|
|
307 |
=head2 progress_bar |
=head2 progress_bar |
336 |
printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$curr,$max,"=" x ($p/2).">", $p ); |
printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$curr,$max,"=" x ($p/2).">", $p ); |
337 |
$self->{'last_pcnt'} = $p; |
$self->{'last_pcnt'} = $p; |
338 |
} |
} |
339 |
|
print STDERR "\n" if ($p == 100); |
340 |
} |
} |
341 |
|
|
342 |
=head2 open_import_xml |
=head2 open_import_xml |
398 |
$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); |
$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); |
399 |
|
|
400 |
foreach my $i (@_) { |
foreach my $i (@_) { |
401 |
if ($i->{'eval'}) { |
$log->logconfess("need key") unless defined($i->{'key'}); |
402 |
my $eval = $self->fill_in($rec,$i->{'eval'}); |
$log->logconfess("need val") unless defined($i->{'val'}); |
403 |
my $key = $self->fill_in($rec,$i->{'key'}); |
|
404 |
my @val = $self->fill_in($rec,$i->{'val'}); |
if (defined($i->{'eval'})) { |
405 |
if ($key && @val && eval $eval) { |
# eval first, so we can skip fill_in for key and val |
406 |
|
my $eval = $self->fill_in($rec,$i->{'eval'}) || next; |
407 |
|
if ($self->_eval($eval)) { |
408 |
|
my $key = $self->fill_in($rec,$i->{'key'}) || next; |
409 |
|
my @val = $self->fill_in($rec,$i->{'val'}) || next; |
410 |
$log->debug("stored $key = ",sub { join(" | ",@val) }); |
$log->debug("stored $key = ",sub { join(" | ",@val) }); |
411 |
push @{$self->{'lookup'}->{$key}}, @val; |
push @{$self->{'lookup'}->{$key}}, @val; |
412 |
} |
} |
413 |
} else { |
} else { |
414 |
my $key = $self->fill_in($rec,$i->{'key'}); |
my $key = $self->fill_in($rec,$i->{'key'}) || next; |
415 |
my @val = $self->fill_in($rec,$i->{'val'}); |
my @val = $self->fill_in($rec,$i->{'val'}) || next; |
416 |
if ($key && @val) { |
$log->debug("stored $key = ",sub { join(" | ",@val) }); |
417 |
$log->debug("stored $key = ",sub { join(" | ",@val) }); |
push @{$self->{'lookup'}->{$key}}, @val; |
|
push @{$self->{'lookup'}->{$key}}, @val; |
|
|
} |
|
418 |
} |
} |
419 |
} |
} |
420 |
} |
} |
727 |
my @ds = $webpac->data_structure($rec); |
my @ds = $webpac->data_structure($rec); |
728 |
|
|
729 |
This method will also set C<$webpac->{'currnet_filename'}> if there is |
This method will also set C<$webpac->{'currnet_filename'}> if there is |
730 |
<filename> tag in C<import_xml>. |
<filename> tag in C<import_xml> and C<$webpac->{'headline'}> if there is |
731 |
|
<headline> tag. |
732 |
|
|
733 |
=cut |
=cut |
734 |
|
|
741 |
$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); |
$log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); |
742 |
|
|
743 |
undef $self->{'currnet_filename'}; |
undef $self->{'currnet_filename'}; |
744 |
|
undef $self->{'headline'}; |
745 |
|
|
746 |
my @sorted_tags; |
my @sorted_tags; |
747 |
if ($self->{tags_by_order}) { |
if ($self->{tags_by_order}) { |
779 |
@v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v; |
@v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v; |
780 |
} |
} |
781 |
|
|
782 |
|
if ($field eq 'filename') { |
783 |
|
$self->{'current_filename'} = join('',@v); |
784 |
|
$log->debug("filename: ",$self->{'current_filename'}); |
785 |
|
} elsif ($field eq 'headline') { |
786 |
|
$self->{'headline'} .= join('',@v); |
787 |
|
$log->debug("headline: ",$self->{'headline'}); |
788 |
|
next; # don't return headline in data_structure! |
789 |
|
} |
790 |
|
|
791 |
# does tag have type? |
# does tag have type? |
792 |
if ($tag->{'type'}) { |
if ($tag->{'type'}) { |
793 |
push @{$row->{$tag->{'type'}}}, @v; |
push @{$row->{$tag->{'type'}}}, @v; |
796 |
push @{$row->{'swish'}}, @v; |
push @{$row->{'swish'}}, @v; |
797 |
} |
} |
798 |
|
|
|
if ($field eq 'filename') { |
|
|
$self->{'current_filename'} = join('',@v); |
|
|
$log->debug("filename: ",$self->{'current_filename'}); |
|
|
} |
|
799 |
|
|
800 |
} |
} |
801 |
|
|
846 |
return $out; |
return $out; |
847 |
} |
} |
848 |
|
|
849 |
|
=head2 output_file |
850 |
|
|
851 |
|
Create output from in-memory data structure using Template Toolkit template |
852 |
|
to a file. |
853 |
|
|
854 |
|
$webpac->output_file( |
855 |
|
file => 'out.txt', |
856 |
|
template => 'text.tt', |
857 |
|
data => @ds |
858 |
|
); |
859 |
|
|
860 |
|
=cut |
861 |
|
|
862 |
|
sub output_file { |
863 |
|
my $self = shift; |
864 |
|
|
865 |
|
my $args = {@_}; |
866 |
|
|
867 |
|
my $log = $self->_get_logger(); |
868 |
|
|
869 |
|
my $file = $args->{'file'} || $log->logconfess("need file name"); |
870 |
|
|
871 |
|
$log->debug("creating file ",$file); |
872 |
|
|
873 |
|
open(my $fh, ">", $file) || $log->logdie("can't open output file '$file': $!"); |
874 |
|
print $fh $self->output( |
875 |
|
template => $args->{'template'}, |
876 |
|
data => $args->{'data'}, |
877 |
|
) || $log->logdie("print: $!"); |
878 |
|
close($fh) || $log->logdie("close: $!"); |
879 |
|
} |
880 |
|
|
881 |
=head2 apply_format |
=head2 apply_format |
882 |
|
|
883 |
Apply format specified in tag with C<format_name="name"> and |
Apply format specified in tag with C<format_name="name"> and |