34 |
|
|
35 |
my $webpac = new WebPAC( |
my $webpac = new WebPAC( |
36 |
config_file => 'name.conf', |
config_file => 'name.conf', |
37 |
[code_page => 'ISO-8859-2',] |
code_page => 'ISO-8859-2', |
38 |
[low_mem => 1,] |
low_mem => 1, |
39 |
); |
); |
40 |
|
|
41 |
Default C<code_page> is C<ISO-8859-2>. |
Default C<code_page> is C<ISO-8859-2>. |
123 |
$log->debug("removed '$db_file' from last run"); |
$log->debug("removed '$db_file' from last run"); |
124 |
} |
} |
125 |
|
|
126 |
use DBM::Deep; |
require DBM::Deep; |
127 |
|
|
128 |
my $db = new DBM::Deep $db_file; |
my $db = new DBM::Deep $db_file; |
129 |
|
|
148 |
$webpac->open_isis( |
$webpac->open_isis( |
149 |
filename => '/data/ISIS/ISIS', |
filename => '/data/ISIS/ISIS', |
150 |
code_page => '852', |
code_page => '852', |
151 |
limit_mfn => '500', |
limit_mfn => 500, |
152 |
|
start_mfn => 6000, |
153 |
lookup => [ ... ], |
lookup => [ ... ], |
154 |
); |
); |
155 |
|
|
156 |
By default, ISIS code page is assumed to be C<852>. |
By default, ISIS code page is assumed to be C<852>. |
157 |
|
|
158 |
|
If optional parametar C<start_mfn> is set, this will be first MFN to read |
159 |
|
from database (so you can skip beginning of your database if you need to). |
160 |
|
|
161 |
If optional parametar C<limit_mfn> is set, it will read just 500 records |
If optional parametar C<limit_mfn> is set, it will read just 500 records |
162 |
from database in example above. |
from database in example above. |
163 |
|
|
185 |
$log->logcroak("need filename") if (! $arg->{'filename'}); |
$log->logcroak("need filename") if (! $arg->{'filename'}); |
186 |
my $code_page = $arg->{'code_page'} || '852'; |
my $code_page = $arg->{'code_page'} || '852'; |
187 |
|
|
188 |
|
$log->logdie("can't find database ",$arg->{'filename'}) unless (glob($arg->{'filename'}.'.*')); |
189 |
|
|
190 |
# store data in object |
# store data in object |
191 |
$self->{'isis_filename'} = $arg->{'filename'}; |
$self->{'isis_filename'} = $arg->{'filename'}; |
192 |
$self->{'isis_code_page'} = $code_page; |
$self->{'isis_code_page'} = $code_page; |
204 |
my $isis_db = OpenIsis::open($arg->{'filename'}); |
my $isis_db = OpenIsis::open($arg->{'filename'}); |
205 |
|
|
206 |
my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1; |
my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1; |
207 |
|
my $startmfn = 1; |
208 |
|
|
209 |
|
if (my $s = $self->{'start_mfn'}) { |
210 |
|
$log->info("skipping to MFN $s"); |
211 |
|
$startmfn = $s; |
212 |
|
} |
213 |
|
|
214 |
$maxmfn = $self->{limit_mfn} if ($self->{limit_mfn}); |
$maxmfn = $startmfn + $self->{limit_mfn} if ($self->{limit_mfn}); |
215 |
|
|
216 |
$log->info("processing $maxmfn records..."); |
$log->info("processing ",($maxmfn-$startmfn)." records..."); |
217 |
|
|
218 |
# read database |
# read database |
219 |
for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { |
for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) { |
220 |
|
|
221 |
|
|
222 |
$log->debug("mfn: $mfn\n"); |
$log->debug("mfn: $mfn\n"); |
264 |
|
|
265 |
} |
} |
266 |
|
|
267 |
$self->{'current_mfn'} = 1; |
$self->{'current_mfn'} = $startmfn; |
268 |
$self->{'last_pcnt'} = 0; |
$self->{'last_pcnt'} = 0; |
269 |
|
|
270 |
$log->debug("max mfn: $maxmfn"); |
$log->debug("max mfn: $maxmfn"); |
810 |
} |
} |
811 |
next if (! @v); |
next if (! @v); |
812 |
|
|
813 |
|
if ($tag->{'sort'}) { |
814 |
|
# very special sort, ignoring case and |
815 |
|
# html |
816 |
|
@v = sort { |
817 |
|
$a =~ s#<[^>]+/*>##; |
818 |
|
$b =~ s#<[^>]+/*>##; |
819 |
|
lc($b) cmp lc($a) |
820 |
|
} @v; |
821 |
|
$log->warn("sort within tag is usually not what you want!"); |
822 |
|
$log->debug("sorted values: ",sub { join(", ",@v) }); |
823 |
|
} |
824 |
|
|
825 |
# use format? |
# use format? |
826 |
if ($tag->{'format_name'}) { |
if ($tag->{'format_name'}) { |
827 |
@v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v; |
@v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v; |
836 |
next; # don't return headline in data_structure! |
next; # don't return headline in data_structure! |
837 |
} |
} |
838 |
|
|
839 |
# does tag have type? |
# delimiter will join repeatable fields |
840 |
if ($tag->{'type'}) { |
if ($tag->{'delimiter'}) { |
841 |
push @{$row->{$tag->{'type'}}}, @v; |
@v = ( join($tag->{'delimiter'}, @v) ); |
842 |
} else { |
} |
843 |
push @{$row->{'display'}}, @v; |
|
844 |
push @{$row->{'swish'}}, @v; |
# default types |
845 |
|
my @types = qw(display swish); |
846 |
|
# override by type attribute |
847 |
|
@types = ( $tag->{'type'} ) if ($tag->{'type'}); |
848 |
|
|
849 |
|
foreach my $type (@types) { |
850 |
|
# append to previous line? |
851 |
|
$log->debug("type: $type ",sub { join(" ",@v) }, $row->{'append'} || 'no append'); |
852 |
|
if ($tag->{'append'}) { |
853 |
|
|
854 |
|
# I will delimit appended part with |
855 |
|
# delimiter (or ,) |
856 |
|
my $d = $tag->{'delimiter'}; |
857 |
|
# default delimiter |
858 |
|
$d ||= ", "; |
859 |
|
|
860 |
|
my $last = pop @{$row->{$type}}; |
861 |
|
$d = "" if (! $last); |
862 |
|
$last .= $d . join($d, @v); |
863 |
|
push @{$row->{$type}}, $last; |
864 |
|
|
865 |
|
} else { |
866 |
|
push @{$row->{$type}}, @v; |
867 |
|
} |
868 |
} |
} |
869 |
|
|
870 |
|
|
877 |
my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'}; |
my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'}; |
878 |
$row->{'name'} = $name ? $self->_x($name) : $field; |
$row->{'name'} = $name ? $self->_x($name) : $field; |
879 |
|
|
880 |
|
# post-sort all values in field |
881 |
|
if ($self->{'import_xml'}->{'indexer'}->{$field}->{'sort'}) { |
882 |
|
$log->warn("sort at field tag not implemented"); |
883 |
|
|
884 |
|
} |
885 |
|
|
886 |
push @ds, $row; |
push @ds, $row; |
887 |
|
|
888 |
$log->debug("row $field: ",sub { Dumper($row) }); |
$log->debug("row $field: ",sub { Dumper($row) }); |