34 |
|
|
35 |
my $webpac = new WebPAC( |
my $webpac = new WebPAC( |
36 |
config_file => 'name.conf', |
config_file => 'name.conf', |
37 |
[code_page => 'ISO-8859-2',] |
code_page => 'ISO-8859-2', |
38 |
[low_mem => 1,] |
low_mem => 1, |
39 |
); |
); |
40 |
|
|
41 |
Default C<code_page> is C<ISO-8859-2>. |
Default C<code_page> is C<ISO-8859-2>. |
209 |
if (my $s = $self->{'start_mfn'}) { |
if (my $s = $self->{'start_mfn'}) { |
210 |
$log->info("skipping to MFN $s"); |
$log->info("skipping to MFN $s"); |
211 |
$startmfn = $s; |
$startmfn = $s; |
212 |
|
} else { |
213 |
|
$self->{'start_mfn'} = $startmfn; |
214 |
} |
} |
215 |
|
|
216 |
$maxmfn = $startmfn + $self->{limit_mfn} if ($self->{limit_mfn}); |
$maxmfn = $startmfn + $self->{limit_mfn} if ($self->{limit_mfn}); |
266 |
|
|
267 |
} |
} |
268 |
|
|
269 |
$self->{'current_mfn'} = $startmfn; |
$self->{'current_mfn'} = -1; |
270 |
$self->{'last_pcnt'} = 0; |
$self->{'last_pcnt'} = 0; |
271 |
|
|
272 |
$log->debug("max mfn: $maxmfn"); |
$log->debug("max mfn: $maxmfn"); |
289 |
|
|
290 |
my $log = $self->_get_logger(); |
my $log = $self->_get_logger(); |
291 |
|
|
292 |
my $mfn = $self->{'current_mfn'}++ || $log->logconfess("it seems that you didn't load database!"); |
$log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'}); |
293 |
|
|
294 |
|
if ($self->{'current_mfn'} == -1) { |
295 |
|
$self->{'current_mfn'} = $self->{'start_mfn'}; |
296 |
|
} else { |
297 |
|
$self->{'current_mfn'}++; |
298 |
|
} |
299 |
|
|
300 |
|
my $mfn = $self->{'current_mfn'}; |
301 |
|
|
302 |
if ($mfn > $self->{'max_mfn'}) { |
if ($mfn > $self->{'max_mfn'}) { |
303 |
$self->{'current_mfn'} = $self->{'max_mfn'}; |
$self->{'current_mfn'} = $self->{'max_mfn'}; |
314 |
} |
} |
315 |
} |
} |
316 |
|
|
317 |
|
=head2 mfn |
318 |
|
|
319 |
|
Returns current record number (MFN). |
320 |
|
|
321 |
|
print $webpac->mfn; |
322 |
|
|
323 |
|
=cut |
324 |
|
|
325 |
|
sub mfn { |
326 |
|
my $self = shift; |
327 |
|
return $self->{'current_mfn'}; |
328 |
|
} |
329 |
|
|
330 |
=head2 progress_bar |
=head2 progress_bar |
331 |
|
|
332 |
Draw progress bar on STDERR. |
Draw progress bar on STDERR. |
352 |
|
|
353 |
$self->{'last_pcnt'} ||= 1; |
$self->{'last_pcnt'} ||= 1; |
354 |
|
|
355 |
my $p = int($curr * 100 / $max); |
my $p = int($curr * 100 / $max) || 1; |
356 |
|
|
357 |
# reset on re-run |
# reset on re-run |
358 |
if ($p < $self->{'last_pcnt'}) { |
if ($p < $self->{'last_pcnt'}) { |
359 |
$self->{'last_pcnt'} = $p; |
$self->{'last_pcnt'} = $p; |
360 |
$self->{'last_t'} = time(); |
$self->{'last_t'} = time(); |
361 |
$self->{'last_curr'} = 1; |
$self->{'last_curr'} = undef; |
362 |
} |
} |
363 |
|
|
364 |
|
$self->{'last_t'} ||= time(); |
365 |
|
|
366 |
if ($p != $self->{'last_pcnt'}) { |
if ($p != $self->{'last_pcnt'}) { |
367 |
|
|
368 |
my $last_curr = $self->{'last_curr'} || $curr; |
my $last_curr = $self->{'last_curr'} || $curr; |
779 |
return @arr; |
return @arr; |
780 |
} |
} |
781 |
|
|
782 |
|
=head2 sort_arr |
783 |
|
|
784 |
|
Sort array ignoring case and html in data |
785 |
|
|
786 |
|
my @sorted = $webpac->sort_arr(@unsorted); |
787 |
|
|
788 |
|
=cut |
789 |
|
|
790 |
|
sub sort_arr { |
791 |
|
my $self = shift; |
792 |
|
|
793 |
|
my $log = $self->_get_logger(); |
794 |
|
|
795 |
|
# FIXME add Schwartzian Transformation? |
796 |
|
|
797 |
|
my @sorted = sort { |
798 |
|
$a =~ s#<[^>]+/*>##; |
799 |
|
$b =~ s#<[^>]+/*>##; |
800 |
|
lc($b) cmp lc($a) |
801 |
|
} @_; |
802 |
|
$log->debug("sorted values: ",sub { join(", ",@sorted) }); |
803 |
|
|
804 |
|
return @sorted; |
805 |
|
} |
806 |
|
|
807 |
|
|
808 |
=head2 data_structure |
=head2 data_structure |
809 |
|
|
860 |
} |
} |
861 |
next if (! @v); |
next if (! @v); |
862 |
|
|
863 |
|
if ($tag->{'sort'}) { |
864 |
|
@v = $self->sort_arr(@v); |
865 |
|
$log->warn("sort within tag is usually not what you want!"); |
866 |
|
} |
867 |
|
|
868 |
# use format? |
# use format? |
869 |
if ($tag->{'format_name'}) { |
if ($tag->{'format_name'}) { |
870 |
@v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v; |
@v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v; |
879 |
next; # don't return headline in data_structure! |
next; # don't return headline in data_structure! |
880 |
} |
} |
881 |
|
|
882 |
# does tag have type? |
# delimiter will join repeatable fields |
883 |
if ($tag->{'type'}) { |
if ($tag->{'delimiter'}) { |
884 |
push @{$row->{$tag->{'type'}}}, @v; |
@v = ( join($tag->{'delimiter'}, @v) ); |
885 |
} else { |
} |
886 |
push @{$row->{'display'}}, @v; |
|
887 |
push @{$row->{'swish'}}, @v; |
# default types |
888 |
|
my @types = qw(display swish); |
889 |
|
# override by type attribute |
890 |
|
@types = ( $tag->{'type'} ) if ($tag->{'type'}); |
891 |
|
|
892 |
|
foreach my $type (@types) { |
893 |
|
# append to previous line? |
894 |
|
$log->debug("type: $type ",sub { join(" ",@v) }, $row->{'append'} || 'no append'); |
895 |
|
if ($tag->{'append'}) { |
896 |
|
|
897 |
|
# I will delimit appended part with |
898 |
|
# delimiter (or ,) |
899 |
|
my $d = $tag->{'delimiter'}; |
900 |
|
# default delimiter |
901 |
|
$d ||= ", "; |
902 |
|
|
903 |
|
my $last = pop @{$row->{$type}}; |
904 |
|
$d = "" if (! $last); |
905 |
|
$last .= $d . join($d, @v); |
906 |
|
push @{$row->{$type}}, $last; |
907 |
|
|
908 |
|
} else { |
909 |
|
push @{$row->{$type}}, @v; |
910 |
|
} |
911 |
} |
} |
912 |
|
|
913 |
|
|
920 |
my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'}; |
my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'}; |
921 |
$row->{'name'} = $name ? $self->_x($name) : $field; |
$row->{'name'} = $name ? $self->_x($name) : $field; |
922 |
|
|
923 |
|
# post-sort all values in field |
924 |
|
if ($self->{'import_xml'}->{'indexer'}->{$field}->{'sort'}) { |
925 |
|
$log->warn("sort at field tag not implemented"); |
926 |
|
} |
927 |
|
|
928 |
push @ds, $row; |
push @ds, $row; |
929 |
|
|
930 |
$log->debug("row $field: ",sub { Dumper($row) }); |
$log->debug("row $field: ",sub { Dumper($row) }); |