--- trunk2/lib/WebPAC.pm 2004/09/11 08:36:38 422 +++ trunk2/lib/WebPAC.pm 2004/09/14 17:07:59 441 @@ -34,8 +34,8 @@ my $webpac = new WebPAC( config_file => 'name.conf', - [code_page => 'ISO-8859-2',] - [low_mem => 1,] + code_page => 'ISO-8859-2', + low_mem => 1, ); Default C is C. @@ -123,7 +123,7 @@ $log->debug("removed '$db_file' from last run"); } - use DBM::Deep; + require DBM::Deep; my $db = new DBM::Deep $db_file; @@ -148,12 +148,16 @@ $webpac->open_isis( filename => '/data/ISIS/ISIS', code_page => '852', - limit_mfn => '500', + limit_mfn => 500, + start_mfn => 6000, lookup => [ ... ], ); By default, ISIS code page is assumed to be C<852>. +If optional parametar C is set, this will be first MFN to read +from database (so you can skip beginning of your database if you need to). + If optional parametar C is set, it will read just 500 records from database in example above. @@ -181,6 +185,8 @@ $log->logcroak("need filename") if (! $arg->{'filename'}); my $code_page = $arg->{'code_page'} || '852'; + $log->logdie("can't find database ",$arg->{'filename'}) unless (glob($arg->{'filename'}.'.*')); + # store data in object $self->{'isis_filename'} = $arg->{'filename'}; $self->{'isis_code_page'} = $code_page; @@ -198,13 +204,19 @@ my $isis_db = OpenIsis::open($arg->{'filename'}); my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1; + my $startmfn = 1; + + if (my $s = $self->{'start_mfn'}) { + $log->info("skipping to MFN $s"); + $startmfn = $s; + } - $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn}); + $maxmfn = $startmfn + $self->{limit_mfn} if ($self->{limit_mfn}); - $log->info("processing $maxmfn records..."); + $log->info("processing ",($maxmfn-$startmfn)." records..."); # read database - for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { + for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) { $log->debug("mfn: $mfn\n"); @@ -252,7 +264,7 @@ } - $self->{'current_mfn'} = 1; + $self->{'current_mfn'} = $startmfn; $self->{'last_pcnt'} = 0; $log->debug("max mfn: $maxmfn"); @@ -742,6 +754,31 @@ return @arr; } +=head2 sort_arr + +Sort array ignoring case and html in data + + my @sorted = $webpac->sort_arr(@unsorted); + +=cut + +sub sort_arr { + my $self = shift; + + my $log = $self->_get_logger(); + + # FIXME add Schwartzian Transformation? + + my @sorted = sort { + $a =~ s#<[^>]+/*>##; + $b =~ s#<[^>]+/*>##; + lc($b) cmp lc($a) + } @_; + $log->debug("sorted values: ",sub { join(", ",@sorted) }); + + return @sorted; +} + =head2 data_structure @@ -798,6 +835,11 @@ } next if (! @v); + if ($tag->{'sort'}) { + @v = $self->sort_arr(@v); + $log->warn("sort within tag is usually not what you want!"); + } + # use format? if ($tag->{'format_name'}) { @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v; @@ -812,12 +854,35 @@ next; # don't return headline in data_structure! } - # does tag have type? - if ($tag->{'type'}) { - push @{$row->{$tag->{'type'}}}, @v; - } else { - push @{$row->{'display'}}, @v; - push @{$row->{'swish'}}, @v; + # delimiter will join repeatable fields + if ($tag->{'delimiter'}) { + @v = ( join($tag->{'delimiter'}, @v) ); + } + + # default types + my @types = qw(display swish); + # override by type attribute + @types = ( $tag->{'type'} ) if ($tag->{'type'}); + + foreach my $type (@types) { + # append to previous line? + $log->debug("type: $type ",sub { join(" ",@v) }, $row->{'append'} || 'no append'); + if ($tag->{'append'}) { + + # I will delimit appended part with + # delimiter (or ,) + my $d = $tag->{'delimiter'}; + # default delimiter + $d ||= ", "; + + my $last = pop @{$row->{$type}}; + $d = "" if (! $last); + $last .= $d . join($d, @v); + push @{$row->{$type}}, $last; + + } else { + push @{$row->{$type}}, @v; + } } @@ -830,6 +895,11 @@ my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'}; $row->{'name'} = $name ? $self->_x($name) : $field; + # post-sort all values in field + if ($self->{'import_xml'}->{'indexer'}->{$field}->{'sort'}) { + $log->warn("sort at field tag not implemented"); + } + push @ds, $row; $log->debug("row $field: ",sub { Dumper($row) });