34 |
|
|
35 |
my $webpac = new WebPAC( |
my $webpac = new WebPAC( |
36 |
config_file => 'name.conf', |
config_file => 'name.conf', |
37 |
[code_page => 'ISO-8859-2',] |
code_page => 'ISO-8859-2', |
38 |
[low_mem => 1,] |
low_mem => 1, |
39 |
); |
); |
40 |
|
|
41 |
Default C<code_page> is C<ISO-8859-2>. |
Default C<code_page> is C<ISO-8859-2>. |
123 |
$log->debug("removed '$db_file' from last run"); |
$log->debug("removed '$db_file' from last run"); |
124 |
} |
} |
125 |
|
|
126 |
use DBM::Deep; |
require DBM::Deep; |
127 |
|
|
128 |
my $db = new DBM::Deep $db_file; |
my $db = new DBM::Deep $db_file; |
129 |
|
|
148 |
$webpac->open_isis( |
$webpac->open_isis( |
149 |
filename => '/data/ISIS/ISIS', |
filename => '/data/ISIS/ISIS', |
150 |
code_page => '852', |
code_page => '852', |
151 |
limit_mfn => '500', |
limit_mfn => 500, |
152 |
|
start_mfn => 6000, |
153 |
lookup => [ ... ], |
lookup => [ ... ], |
154 |
); |
); |
155 |
|
|
156 |
By default, ISIS code page is assumed to be C<852>. |
By default, ISIS code page is assumed to be C<852>. |
157 |
|
|
158 |
|
If optional parametar C<start_mfn> is set, this will be first MFN to read |
159 |
|
from database (so you can skip beginning of your database if you need to). |
160 |
|
|
161 |
If optional parametar C<limit_mfn> is set, it will read just 500 records |
If optional parametar C<limit_mfn> is set, it will read just 500 records |
162 |
from database in example above. |
from database in example above. |
163 |
|
|
185 |
$log->logcroak("need filename") if (! $arg->{'filename'}); |
$log->logcroak("need filename") if (! $arg->{'filename'}); |
186 |
my $code_page = $arg->{'code_page'} || '852'; |
my $code_page = $arg->{'code_page'} || '852'; |
187 |
|
|
188 |
|
$log->logdie("can't find database ",$arg->{'filename'}) unless (glob($arg->{'filename'}.'.*')); |
189 |
|
|
190 |
# store data in object |
# store data in object |
191 |
$self->{'isis_filename'} = $arg->{'filename'}; |
$self->{'isis_filename'} = $arg->{'filename'}; |
192 |
$self->{'isis_code_page'} = $code_page; |
$self->{'isis_code_page'} = $code_page; |
204 |
my $isis_db = OpenIsis::open($arg->{'filename'}); |
my $isis_db = OpenIsis::open($arg->{'filename'}); |
205 |
|
|
206 |
my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1; |
my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1; |
207 |
|
my $startmfn = 1; |
208 |
|
|
209 |
|
if (my $s = $self->{'start_mfn'}) { |
210 |
|
$log->info("skipping to MFN $s"); |
211 |
|
$startmfn = $s; |
212 |
|
} |
213 |
|
|
214 |
$maxmfn = $self->{limit_mfn} if ($self->{limit_mfn}); |
$maxmfn = $startmfn + $self->{limit_mfn} if ($self->{limit_mfn}); |
215 |
|
|
216 |
$log->info("processing $maxmfn records..."); |
$log->info("processing ",($maxmfn-$startmfn)." records..."); |
217 |
|
|
218 |
# read database |
# read database |
219 |
for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { |
for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) { |
220 |
|
|
221 |
|
|
222 |
$log->debug("mfn: $mfn\n"); |
$log->debug("mfn: $mfn\n"); |
264 |
|
|
265 |
} |
} |
266 |
|
|
267 |
$self->{'current_mfn'} = 1; |
$self->{'current_mfn'} = $startmfn; |
268 |
$self->{'last_pcnt'} = 0; |
$self->{'last_pcnt'} = 0; |
269 |
|
|
270 |
$log->debug("max mfn: $maxmfn"); |
$log->debug("max mfn: $maxmfn"); |
754 |
return @arr; |
return @arr; |
755 |
} |
} |
756 |
|
|
757 |
|
=head2 sort_arr |
758 |
|
|
759 |
|
Sort array ignoring case and html in data |
760 |
|
|
761 |
|
my @sorted = $webpac->sort_arr(@unsorted); |
762 |
|
|
763 |
|
=cut |
764 |
|
|
765 |
|
sub sort_arr { |
766 |
|
my $self = shift; |
767 |
|
|
768 |
|
my $log = $self->_get_logger(); |
769 |
|
|
770 |
|
# FIXME add Schwartzian Transformation? |
771 |
|
|
772 |
|
my @sorted = sort { |
773 |
|
$a =~ s#<[^>]+/*>##; |
774 |
|
$b =~ s#<[^>]+/*>##; |
775 |
|
lc($b) cmp lc($a) |
776 |
|
} @_; |
777 |
|
$log->debug("sorted values: ",sub { join(", ",@sorted) }); |
778 |
|
|
779 |
|
return @sorted; |
780 |
|
} |
781 |
|
|
782 |
|
|
783 |
=head2 data_structure |
=head2 data_structure |
784 |
|
|
835 |
} |
} |
836 |
next if (! @v); |
next if (! @v); |
837 |
|
|
838 |
|
if ($tag->{'sort'}) { |
839 |
|
@v = $self->sort_arr(@v); |
840 |
|
$log->warn("sort within tag is usually not what you want!"); |
841 |
|
} |
842 |
|
|
843 |
# use format? |
# use format? |
844 |
if ($tag->{'format_name'}) { |
if ($tag->{'format_name'}) { |
845 |
@v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v; |
@v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v; |
854 |
next; # don't return headline in data_structure! |
next; # don't return headline in data_structure! |
855 |
} |
} |
856 |
|
|
857 |
# does tag have type? |
# delimiter will join repeatable fields |
858 |
if ($tag->{'type'}) { |
if ($tag->{'delimiter'}) { |
859 |
push @{$row->{$tag->{'type'}}}, @v; |
@v = ( join($tag->{'delimiter'}, @v) ); |
860 |
} else { |
} |
861 |
push @{$row->{'display'}}, @v; |
|
862 |
push @{$row->{'swish'}}, @v; |
# default types |
863 |
|
my @types = qw(display swish); |
864 |
|
# override by type attribute |
865 |
|
@types = ( $tag->{'type'} ) if ($tag->{'type'}); |
866 |
|
|
867 |
|
foreach my $type (@types) { |
868 |
|
# append to previous line? |
869 |
|
$log->debug("type: $type ",sub { join(" ",@v) }, $row->{'append'} || 'no append'); |
870 |
|
if ($tag->{'append'}) { |
871 |
|
|
872 |
|
# I will delimit appended part with |
873 |
|
# delimiter (or ,) |
874 |
|
my $d = $tag->{'delimiter'}; |
875 |
|
# default delimiter |
876 |
|
$d ||= ", "; |
877 |
|
|
878 |
|
my $last = pop @{$row->{$type}}; |
879 |
|
$d = "" if (! $last); |
880 |
|
$last .= $d . join($d, @v); |
881 |
|
push @{$row->{$type}}, $last; |
882 |
|
|
883 |
|
} else { |
884 |
|
push @{$row->{$type}}, @v; |
885 |
|
} |
886 |
} |
} |
887 |
|
|
888 |
|
|
895 |
my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'}; |
my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'}; |
896 |
$row->{'name'} = $name ? $self->_x($name) : $field; |
$row->{'name'} = $name ? $self->_x($name) : $field; |
897 |
|
|
898 |
|
# post-sort all values in field |
899 |
|
if ($self->{'import_xml'}->{'indexer'}->{$field}->{'sort'}) { |
900 |
|
$log->warn("sort at field tag not implemented"); |
901 |
|
} |
902 |
|
|
903 |
push @ds, $row; |
push @ds, $row; |
904 |
|
|
905 |
$log->debug("row $field: ",sub { Dumper($row) }); |
$log->debug("row $field: ",sub { Dumper($row) }); |