34 |
|
|
35 |
my $webpac = new WebPAC( |
my $webpac = new WebPAC( |
36 |
config_file => 'name.conf', |
config_file => 'name.conf', |
37 |
[code_page => 'ISO-8859-2',] |
code_page => 'ISO-8859-2', |
38 |
[low_mem => 1,] |
low_mem => 1, |
39 |
); |
); |
40 |
|
|
41 |
Default C<code_page> is C<ISO-8859-2>. |
Default C<code_page> is C<ISO-8859-2>. |
123 |
$log->debug("removed '$db_file' from last run"); |
$log->debug("removed '$db_file' from last run"); |
124 |
} |
} |
125 |
|
|
126 |
use DBM::Deep; |
require DBM::Deep; |
127 |
|
|
128 |
my $db = new DBM::Deep $db_file; |
my $db = new DBM::Deep $db_file; |
129 |
|
|
148 |
$webpac->open_isis( |
$webpac->open_isis( |
149 |
filename => '/data/ISIS/ISIS', |
filename => '/data/ISIS/ISIS', |
150 |
code_page => '852', |
code_page => '852', |
151 |
limit_mfn => '500', |
limit_mfn => 500, |
152 |
|
start_mfn => 6000, |
153 |
lookup => [ ... ], |
lookup => [ ... ], |
154 |
); |
); |
155 |
|
|
156 |
By default, ISIS code page is assumed to be C<852>. |
By default, ISIS code page is assumed to be C<852>. |
157 |
|
|
158 |
|
If optional parametar C<start_mfn> is set, this will be first MFN to read |
159 |
|
from database (so you can skip beginning of your database if you need to). |
160 |
|
|
161 |
If optional parametar C<limit_mfn> is set, it will read just 500 records |
If optional parametar C<limit_mfn> is set, it will read just 500 records |
162 |
from database in example above. |
from database in example above. |
163 |
|
|
185 |
$log->logcroak("need filename") if (! $arg->{'filename'}); |
$log->logcroak("need filename") if (! $arg->{'filename'}); |
186 |
my $code_page = $arg->{'code_page'} || '852'; |
my $code_page = $arg->{'code_page'} || '852'; |
187 |
|
|
188 |
|
$log->logdie("can't find database ",$arg->{'filename'}) unless (glob($arg->{'filename'}.'.*')); |
189 |
|
|
190 |
# store data in object |
# store data in object |
191 |
$self->{'isis_filename'} = $arg->{'filename'}; |
$self->{'isis_filename'} = $arg->{'filename'}; |
192 |
$self->{'isis_code_page'} = $code_page; |
$self->{'isis_code_page'} = $code_page; |
204 |
my $isis_db = OpenIsis::open($arg->{'filename'}); |
my $isis_db = OpenIsis::open($arg->{'filename'}); |
205 |
|
|
206 |
my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1; |
my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1; |
207 |
|
my $startmfn = 1; |
208 |
|
|
209 |
|
if (my $s = $self->{'start_mfn'}) { |
210 |
|
$log->info("skipping to MFN $s"); |
211 |
|
$startmfn = $s; |
212 |
|
} else { |
213 |
|
$self->{'start_mfn'} = $startmfn; |
214 |
|
} |
215 |
|
|
216 |
$maxmfn = $self->{limit_mfn} if ($self->{limit_mfn}); |
$maxmfn = $startmfn + $self->{limit_mfn} if ($self->{limit_mfn}); |
217 |
|
|
218 |
$log->info("processing $maxmfn records..."); |
$log->info("processing ",($maxmfn-$startmfn)." records..."); |
219 |
|
|
220 |
# read database |
# read database |
221 |
for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { |
for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) { |
222 |
|
|
223 |
|
|
224 |
$log->debug("mfn: $mfn\n"); |
$log->debug("mfn: $mfn\n"); |
266 |
|
|
267 |
} |
} |
268 |
|
|
269 |
$self->{'current_mfn'} = 1; |
$self->{'current_mfn'} = -1; |
270 |
$self->{'last_pcnt'} = 0; |
$self->{'last_pcnt'} = 0; |
271 |
|
|
272 |
$log->debug("max mfn: $maxmfn"); |
$log->debug("max mfn: $maxmfn"); |
289 |
|
|
290 |
my $log = $self->_get_logger(); |
my $log = $self->_get_logger(); |
291 |
|
|
292 |
my $mfn = $self->{'current_mfn'}++ || $log->logconfess("it seems that you didn't load database!"); |
$log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'}); |
293 |
|
|
294 |
|
if ($self->{'current_mfn'} == -1) { |
295 |
|
$self->{'current_mfn'} = $self->{'start_mfn'}; |
296 |
|
} else { |
297 |
|
$self->{'current_mfn'}++; |
298 |
|
} |
299 |
|
|
300 |
|
my $mfn = $self->{'current_mfn'}; |
301 |
|
|
302 |
if ($mfn > $self->{'max_mfn'}) { |
if ($mfn > $self->{'max_mfn'}) { |
303 |
$self->{'current_mfn'} = $self->{'max_mfn'}; |
$self->{'current_mfn'} = $self->{'max_mfn'}; |
314 |
} |
} |
315 |
} |
} |
316 |
|
|
317 |
|
=head2 mfn |
318 |
|
|
319 |
|
Returns current record number (MFN). |
320 |
|
|
321 |
|
print $webpac->mfn; |
322 |
|
|
323 |
|
=cut |
324 |
|
|
325 |
|
sub mfn { |
326 |
|
my $self = shift; |
327 |
|
return $self->{'current_mfn'}; |
328 |
|
} |
329 |
|
|
330 |
=head2 progress_bar |
=head2 progress_bar |
331 |
|
|
332 |
Draw progress bar on STDERR. |
Draw progress bar on STDERR. |
358 |
if ($p < $self->{'last_pcnt'}) { |
if ($p < $self->{'last_pcnt'}) { |
359 |
$self->{'last_pcnt'} = $p; |
$self->{'last_pcnt'} = $p; |
360 |
$self->{'last_t'} = time(); |
$self->{'last_t'} = time(); |
361 |
$self->{'last_curr'} = 1; |
$self->{'last_curr'} = undef; |
362 |
} |
} |
363 |
|
|
364 |
if ($p != $self->{'last_pcnt'}) { |
if ($p != $self->{'last_pcnt'}) { |
777 |
return @arr; |
return @arr; |
778 |
} |
} |
779 |
|
|
780 |
|
=head2 sort_arr |
781 |
|
|
782 |
|
Sort array ignoring case and html in data |
783 |
|
|
784 |
|
my @sorted = $webpac->sort_arr(@unsorted); |
785 |
|
|
786 |
|
=cut |
787 |
|
|
788 |
|
sub sort_arr { |
789 |
|
my $self = shift; |
790 |
|
|
791 |
|
my $log = $self->_get_logger(); |
792 |
|
|
793 |
|
# FIXME add Schwartzian Transformation? |
794 |
|
|
795 |
|
my @sorted = sort { |
796 |
|
$a =~ s#<[^>]+/*>##; |
797 |
|
$b =~ s#<[^>]+/*>##; |
798 |
|
lc($b) cmp lc($a) |
799 |
|
} @_; |
800 |
|
$log->debug("sorted values: ",sub { join(", ",@sorted) }); |
801 |
|
|
802 |
|
return @sorted; |
803 |
|
} |
804 |
|
|
805 |
|
|
806 |
=head2 data_structure |
=head2 data_structure |
807 |
|
|
858 |
} |
} |
859 |
next if (! @v); |
next if (! @v); |
860 |
|
|
861 |
|
if ($tag->{'sort'}) { |
862 |
|
@v = $self->sort_arr(@v); |
863 |
|
$log->warn("sort within tag is usually not what you want!"); |
864 |
|
} |
865 |
|
|
866 |
# use format? |
# use format? |
867 |
if ($tag->{'format_name'}) { |
if ($tag->{'format_name'}) { |
868 |
@v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v; |
@v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v; |
877 |
next; # don't return headline in data_structure! |
next; # don't return headline in data_structure! |
878 |
} |
} |
879 |
|
|
880 |
# does tag have type? |
# delimiter will join repeatable fields |
881 |
if ($tag->{'type'}) { |
if ($tag->{'delimiter'}) { |
882 |
push @{$row->{$tag->{'type'}}}, @v; |
@v = ( join($tag->{'delimiter'}, @v) ); |
883 |
} else { |
} |
884 |
push @{$row->{'display'}}, @v; |
|
885 |
push @{$row->{'swish'}}, @v; |
# default types |
886 |
|
my @types = qw(display swish); |
887 |
|
# override by type attribute |
888 |
|
@types = ( $tag->{'type'} ) if ($tag->{'type'}); |
889 |
|
|
890 |
|
foreach my $type (@types) { |
891 |
|
# append to previous line? |
892 |
|
$log->debug("type: $type ",sub { join(" ",@v) }, $row->{'append'} || 'no append'); |
893 |
|
if ($tag->{'append'}) { |
894 |
|
|
895 |
|
# I will delimit appended part with |
896 |
|
# delimiter (or ,) |
897 |
|
my $d = $tag->{'delimiter'}; |
898 |
|
# default delimiter |
899 |
|
$d ||= ", "; |
900 |
|
|
901 |
|
my $last = pop @{$row->{$type}}; |
902 |
|
$d = "" if (! $last); |
903 |
|
$last .= $d . join($d, @v); |
904 |
|
push @{$row->{$type}}, $last; |
905 |
|
|
906 |
|
} else { |
907 |
|
push @{$row->{$type}}, @v; |
908 |
|
} |
909 |
} |
} |
910 |
|
|
911 |
|
|
918 |
my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'}; |
my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'}; |
919 |
$row->{'name'} = $name ? $self->_x($name) : $field; |
$row->{'name'} = $name ? $self->_x($name) : $field; |
920 |
|
|
921 |
|
# post-sort all values in field |
922 |
|
if ($self->{'import_xml'}->{'indexer'}->{$field}->{'sort'}) { |
923 |
|
$log->warn("sort at field tag not implemented"); |
924 |
|
} |
925 |
|
|
926 |
push @ds, $row; |
push @ds, $row; |
927 |
|
|
928 |
$log->debug("row $field: ",sub { Dumper($row) }); |
$log->debug("row $field: ",sub { Dumper($row) }); |