--- trunk2/lib/WebPAC.pm 2004/09/12 20:31:34 431 +++ trunk2/lib/WebPAC.pm 2004/10/10 11:25:10 501 @@ -34,14 +34,20 @@ my $webpac = new WebPAC( config_file => 'name.conf', - [code_page => 'ISO-8859-2',] - [low_mem => 1,] + code_page => 'ISO-8859-2', + low_mem => 1, + filter => { + 'lower' => sub { lc($_[0]) }, + }, ); Default C is C. Default is not to use C options (see L below). +There is optinal parametar C which specify different filters which +can be applied using C notation. + This method will also read configuration files C (used by indexer and Web font-end) and configuration file specified by C @@ -123,7 +129,7 @@ $log->debug("removed '$db_file' from last run"); } - use DBM::Deep; + require DBM::Deep; my $db = new DBM::Deep $db_file; @@ -138,6 +144,8 @@ $self->{'db'} = $db; } + $log->debug("filters defined: ",Dumper($self->{'filter'})); + return $self; } @@ -185,6 +193,8 @@ $log->logcroak("need filename") if (! $arg->{'filename'}); my $code_page = $arg->{'code_page'} || '852'; + $log->logdie("can't find database ",$arg->{'filename'}) unless (glob($arg->{'filename'}.'.*')); + # store data in object $self->{'isis_filename'} = $arg->{'filename'}; $self->{'isis_code_page'} = $code_page; @@ -207,6 +217,8 @@ if (my $s = $self->{'start_mfn'}) { $log->info("skipping to MFN $s"); $startmfn = $s; + } else { + $self->{'start_mfn'} = $startmfn; } $maxmfn = $startmfn + $self->{limit_mfn} if ($self->{limit_mfn}); @@ -262,7 +274,7 @@ } - $self->{'current_mfn'} = $startmfn; + $self->{'current_mfn'} = -1; $self->{'last_pcnt'} = 0; $log->debug("max mfn: $maxmfn"); @@ -285,7 +297,15 @@ my $log = $self->_get_logger(); - my $mfn = $self->{'current_mfn'}++ || $log->logconfess("it seems that you didn't load database!"); + $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'}); + + if ($self->{'current_mfn'} == -1) { + $self->{'current_mfn'} = $self->{'start_mfn'}; + } else { + $self->{'current_mfn'}++; + } + + my $mfn = $self->{'current_mfn'}; if ($mfn > $self->{'max_mfn'}) { $self->{'current_mfn'} = $self->{'max_mfn'}; @@ -302,6 +322,19 @@ } } +=head2 mfn + +Returns current record number (MFN). + + print $webpac->mfn; + +=cut + +sub mfn { + my $self = shift; + return $self->{'current_mfn'}; +} + =head2 progress_bar Draw progress bar on STDERR. @@ -327,15 +360,17 @@ $self->{'last_pcnt'} ||= 1; - my $p = int($curr * 100 / $max); + my $p = int($curr * 100 / $max) || 1; # reset on re-run if ($p < $self->{'last_pcnt'}) { $self->{'last_pcnt'} = $p; $self->{'last_t'} = time(); - $self->{'last_curr'} = 1; + $self->{'last_curr'} = undef; } + $self->{'last_t'} ||= time(); + if ($p != $self->{'last_pcnt'}) { my $last_curr = $self->{'last_curr'} || $curr; @@ -548,6 +583,10 @@ # remove eval{...} from beginning $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s); + my $filter_name; + # remove filter{...} from beginning + $filter_name = $1 if ($format =~ s/^filter{([^}]+)}//s); + # do actual replacement of placeholders $format =~ s/v(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,$i,\$found)/ges; @@ -557,6 +596,12 @@ my $eval = $self->fill_in($rec,$eval_code,$i); return if (! $self->_eval($eval)); } + if ($filter_name && $self->{'filter'}->{$filter_name}) { + $log->debug("filter '$filter_name' for $format"); + $format = $self->{'filter'}->{$filter_name}->($format); + return unless(defined($format)); + $log->debug("filter result: $format"); + } # do we have lookups? if ($format =~ /$LOOKUP_REGEX/o) { $log->debug("format '$format' has lookup"); @@ -649,6 +694,10 @@ # remove eval{...} from beginning $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s); + my $filter_name; + # remove filter{...} from beginning + $filter_name = $1 if ($format =~ s/^filter{([^}]+)}//s); + my $prefix; my $all_found=0; @@ -682,10 +731,17 @@ } if ($eval_code) { - my $eval = $self->fill_in($rec,$eval_code,$i); - $log->debug("about to eval{",$eval,"} format: $out"); + my $eval = $self->fill_in($rec,$eval_code,$i) || return; + $log->debug("about to eval{$eval} format: $out"); return if (! $self->_eval($eval)); } + + if ($filter_name && $self->{'filter'}->{$filter_name}) { + $log->debug("about to filter{$filter_name} format: $out"); + $out = $self->{'filter'}->{$filter_name}->($out); + return unless(defined($out)); + $log->debug("filter result: $out"); + } return $out; } @@ -752,6 +808,31 @@ return @arr; } +=head2 sort_arr + +Sort array ignoring case and html in data + + my @sorted = $webpac->sort_arr(@unsorted); + +=cut + +sub sort_arr { + my $self = shift; + + my $log = $self->_get_logger(); + + # FIXME add Schwartzian Transformation? + + my @sorted = sort { + $a =~ s#<[^>]+/*>##; + $b =~ s#<[^>]+/*>##; + lc($b) cmp lc($a) + } @_; + $log->debug("sorted values: ",sub { join(", ",@sorted) }); + + return @sorted; +} + =head2 data_structure @@ -808,6 +889,11 @@ } next if (! @v); + if ($tag->{'sort'}) { + @v = $self->sort_arr(@v); + $log->warn("sort within tag is usually not what you want!"); + } + # use format? if ($tag->{'format_name'}) { @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v; @@ -822,12 +908,35 @@ next; # don't return headline in data_structure! } - # does tag have type? - if ($tag->{'type'}) { - push @{$row->{$tag->{'type'}}}, @v; - } else { - push @{$row->{'display'}}, @v; - push @{$row->{'swish'}}, @v; + # delimiter will join repeatable fields + if ($tag->{'delimiter'}) { + @v = ( join($tag->{'delimiter'}, @v) ); + } + + # default types + my @types = qw(display swish); + # override by type attribute + @types = ( $tag->{'type'} ) if ($tag->{'type'}); + + foreach my $type (@types) { + # append to previous line? + $log->debug("type: $type ",sub { join(" ",@v) }, $row->{'append'} || 'no append'); + if ($tag->{'append'}) { + + # I will delimit appended part with + # delimiter (or ,) + my $d = $tag->{'delimiter'}; + # default delimiter + $d ||= " "; + + my $last = pop @{$row->{$type}}; + $d = "" if (! $last); + $last .= $d . join($d, @v); + push @{$row->{$type}}, $last; + + } else { + push @{$row->{$type}}, @v; + } } @@ -840,6 +949,11 @@ my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'}; $row->{'name'} = $name ? $self->_x($name) : $field; + # post-sort all values in field + if ($self->{'import_xml'}->{'indexer'}->{$field}->{'sort'}) { + $log->warn("sort at field tag not implemented"); + } + push @ds, $row; $log->debug("row $field: ",sub { Dumper($row) }); @@ -985,7 +1099,7 @@ $log->debug("eval: ",$code," [",$ret,"]"); - return $ret || 0; + return $ret || undef; } =head2 _sort_by_order