/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 375 by dpavlin, Sun Jun 20 17:52:41 2004 UTC revision 421 by dpavlin, Fri Sep 10 22:24:42 2004 UTC
# Line 29  This module implements methods used by W Line 29  This module implements methods used by W
29    
30  =head2 new  =head2 new
31    
32  This will create new instance of WebPAC using configuration specified by C<config_file>.  Create new instance of WebPAC using configuration specified by C<config_file>.
33    
34   my $webpac = new WebPAC(   my $webpac = new WebPAC(
35          config_file => 'name.conf',          config_file => 'name.conf',
36          [code_page => 'ISO-8859-2',]          [code_page => 'ISO-8859-2',]
37            [low_mem => 1,]
38   );   );
39    
40  Default C<code_page> is C<ISO-8859-2>.  Default C<code_page> is C<ISO-8859-2>.
41    
42  It will also read configuration files  This method will also read configuration files
43  C<global.conf> (used by indexer and Web font-end)  C<global.conf> (used by indexer and Web font-end)
44  and configuration file specified by C<config_file>  and configuration file specified by C<config_file>
45  which describes databases to be indexed.  which describes databases to be indexed.
46    
47    C<low_mem> options is double-edged sword. If enabled, WebPAC
48    will run on memory constraint machines (which doesn't have enough
49    physical RAM to create memory structure for whole ISIS database).
50    
51    If your machine has 512Mb or more and database is around 10000 records,
52    memory shouldn't be an issue. If you don't have enough physical RAM, you
53    might consider using virtual memory (if your operating system is handling it
54    well, like on FreeBSD or Linux) instead of dropping to L<DBD::Deep> to handle
55    parsed structure of ISIS database.
56    
57    However, when WebPAC is running on desktop machines (or laptops :-), it's
58    highly undesireable for system to start swapping. Using C<low_mem> option can
59    reduce WecPAC memory usage to 16Mb for same database with lookup fields and
60    sorted indexes which stay in RAM. Performance will suffer, but memory usage
61    will really be minimal. It might be also more confortable to run WebPAC reniced
62    on those machines.
63    
64  =cut  =cut
65    
66  # mapping between data type and tag which specify  # mapping between data type and tag which specify
# Line 106  sub new { Line 124  sub new {
124                  EVAL_PERL => 1,                  EVAL_PERL => 1,
125          );          );
126    
127            # running with low_mem flag? well, use DBM::Deep then.
128            if ($self->{'low_mem'}) {
129                    $log->info("running with low_mem which impacts performance (<64 Mb memory usage)");
130    
131                    my $db_file = "data.db";
132    
133                    if (-e $db_file) {
134                            unlink $db_file or $log->logdie("can't remove '$db_file' from last run");
135                            $log->debug("removed '$db_file' from last run");
136                    }
137    
138                    use DBM::Deep;
139    
140                    my $db = new DBM::Deep $db_file;
141    
142                    $log->logdie("DBM::Deep error: $!") unless ($db);
143    
144                    if ($db->error()) {
145                            $log->logdie("can't open '$db_file' under low_mem: ",$db->error());
146                    } else {
147                            $log->debug("using file $db_file for DBM::Deep");
148                    }
149    
150                    $self->{'db'} = $db;
151            }
152    
153          return $self;          return $self;
154  }  }
155    
# Line 149  sub open_isis { Line 193  sub open_isis {
193          $log->logcroak("need filename") if (! $arg->{'filename'});          $log->logcroak("need filename") if (! $arg->{'filename'});
194          my $code_page = $arg->{'code_page'} || '852';          my $code_page = $arg->{'code_page'} || '852';
195    
196            # store data in object
197            $self->{'isis_filename'} = $arg->{'filename'};
198            $self->{'isis_code_page'} = $code_page;
199    
200          use OpenIsis;          use OpenIsis;
201    
202          #$self->{'isis_code_page'} = $code_page;          #$self->{'isis_code_page'} = $code_page;
# Line 157  sub open_isis { Line 205  sub open_isis {
205          my $cp = Text::Iconv->new($code_page,$self->{'code_page'});          my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
206    
207          $log->info("reading ISIS database '",$arg->{'filename'},"'");          $log->info("reading ISIS database '",$arg->{'filename'},"'");
208            $log->debug("isis code page: $code_page");
209    
210          my $isis_db = OpenIsis::open($arg->{'filename'});          my $isis_db = OpenIsis::open($arg->{'filename'});
211    
# Line 169  sub open_isis { Line 218  sub open_isis {
218          # read database          # read database
219          for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {          for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
220    
221    
222                    $log->debug("mfn: $mfn\n");
223    
224                    my $rec;
225    
226                  # read record                  # read record
227                  my $row = OpenIsis::read( $isis_db, $mfn );                  my $row = OpenIsis::read( $isis_db, $mfn );
228                  foreach my $k (keys %{$row}) {                  foreach my $k (keys %{$row}) {
# Line 186  sub open_isis { Line 240  sub open_isis {
240                                                  $val = $l;                                                  $val = $l;
241                                          }                                          }
242    
243                                          push @{$self->{'data'}->{$mfn}->{$k}}, $val;                                          push @{$rec->{$k}}, $val;
244                                  }                                  }
245                          } else {                          } else {
246                                  push @{$self->{'data'}->{$mfn}->{'000'}}, $mfn;                                  push @{$rec->{'000'}}, $mfn;
247                          }                          }
248    
249                  }                  }
250    
251                    $log->confess("record $mfn empty?") unless ($rec);
252    
253                    # store
254                    if ($self->{'low_mem'}) {
255                            $self->{'db'}->put($mfn, $rec);
256                    } else {
257                            $self->{'data'}->{$mfn} = $rec;
258                    }
259    
260                  # create lookup                  # create lookup
                 my $rec = $self->{'data'}->{$mfn};  
261                  $self->create_lookup($rec, @{$arg->{'lookup'}});                  $self->create_lookup($rec, @{$arg->{'lookup'}});
262    
263                    $self->progress_bar($mfn,$maxmfn);
264    
265          }          }
266    
267          $self->{'current_mfn'} = 1;          $self->{'current_mfn'} = 1;
268            $self->{'last_pcnt'} = 0;
269    
270            $log->debug("max mfn: $maxmfn");
271    
272          # store max mfn and return it.          # store max mfn and return it.
273          return $self->{'max_mfn'} = $maxmfn;          return $self->{'max_mfn'} = $maxmfn;
# Line 228  sub fetch_rec { Line 295  sub fetch_rec {
295                  return;                  return;
296          }          }
297    
298          return $self->{'data'}->{$mfn};          $self->progress_bar($mfn,$self->{'max_mfn'});
299    
300            if ($self->{'low_mem'}) {
301                    return $self->{'db'}->get($mfn);
302            } else {
303                    return $self->{'data'}->{$mfn};
304            }
305    }
306    
307    =head2 progress_bar
308    
309    Draw progress bar on STDERR.
310    
311     $webpac->progress_bar($current, $max);
312    
313    =cut
314    
315    sub progress_bar {
316            my $self = shift;
317    
318            my ($curr,$max) = @_;
319    
320            my $log = $self->_get_logger();
321    
322            $log->logconfess("no current value!") if (! $curr);
323            $log->logconfess("no maximum value!") if (! $max);
324    
325            if ($curr > $max) {
326                    $max = $curr;
327                    $log->debug("overflow to $curr");
328            }
329    
330            $self->{'last_pcnt'} ||= 1;
331    
332            $self->{'last_pcnt'} = $curr if ($curr < $self->{'last_pcnt'});
333    
334            my $p = int($curr * 100 / $max);
335            if ($p != $self->{'last_pcnt'}) {
336                    printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$curr,$max,"=" x ($p/2).">", $p );
337                    $self->{'last_pcnt'} = $p;
338            }
339            print STDERR "\n" if ($p == 100);
340  }  }
341    
342  =head2 open_import_xml  =head2 open_import_xml
# Line 290  sub create_lookup { Line 398  sub create_lookup {
398          $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);          $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
399    
400          foreach my $i (@_) {          foreach my $i (@_) {
401                  if ($i->{'eval'}) {                  $log->logconfess("need key") unless defined($i->{'key'});
402                          my $eval = $self->fill_in($rec,$i->{'eval'});                  $log->logconfess("need val") unless defined($i->{'val'});
403                          my $key = $self->fill_in($rec,$i->{'key'});  
404                          my @val = $self->fill_in($rec,$i->{'val'});                  if (defined($i->{'eval'})) {
405                          if ($key && @val && eval $eval) {                          # eval first, so we can skip fill_in for key and val
406                            my $eval = $self->fill_in($rec,$i->{'eval'}) || next;
407                            if ($self->_eval($eval)) {
408                                    my $key = $self->fill_in($rec,$i->{'key'}) || next;
409                                    my @val = $self->fill_in($rec,$i->{'val'}) || next;
410                                  $log->debug("stored $key = ",sub { join(" | ",@val) });                                  $log->debug("stored $key = ",sub { join(" | ",@val) });
411                                  push @{$self->{'lookup'}->{$key}}, @val;                                  push @{$self->{'lookup'}->{$key}}, @val;
412                          }                          }
413                  } else {                  } else {
414                          my $key = $self->fill_in($rec,$i->{'key'});                          my $key = $self->fill_in($rec,$i->{'key'}) || next;
415                          my @val = $self->fill_in($rec,$i->{'val'});                          my @val = $self->fill_in($rec,$i->{'val'}) || next;
416                          if ($key && @val) {                          $log->debug("stored $key = ",sub { join(" | ",@val) });
417                                  $log->debug("stored $key = ",sub { join(" | ",@val) });                          push @{$self->{'lookup'}->{$key}}, @val;
                                 push @{$self->{'lookup'}->{$key}}, @val;  
                         }  
418                  }                  }
419          }          }
420  }  }
# Line 335  sub get_data { Line 445  sub get_data {
445    
446          if ($$rec->{$f}) {          if ($$rec->{$f}) {
447                  return '' if (! $$rec->{$f}->[$i]);                  return '' if (! $$rec->{$f}->[$i]);
448                    no strict 'refs';
449                  if ($sf && $$rec->{$f}->[$i]->{$sf}) {                  if ($sf && $$rec->{$f}->[$i]->{$sf}) {
450                          $$found++ if (defined($$found));                          $$found++ if (defined($$found));
451                          return $$rec->{$f}->[$i]->{$sf};                          return $$rec->{$f}->[$i]->{$sf};
# Line 375  Following example will read second value Line 486  Following example will read second value
486  This function B<does not> perform parsing of format to inteligenty skip  This function B<does not> perform parsing of format to inteligenty skip
487  delimiters before fields which aren't used.  delimiters before fields which aren't used.
488    
489    This method will automatically decode UTF-8 string to local code page
490    if needed.
491    
492  =cut  =cut
493    
494  sub fill_in {  sub fill_in {
# Line 390  sub fill_in { Line 504  sub fill_in {
504          # FIXME remove for speedup?          # FIXME remove for speedup?
505          $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);          $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
506    
507            if (utf8::is_utf8($format)) {
508                    $format = $self->_x($format);
509            }
510    
511          my $found = 0;          my $found = 0;
512    
513          my $eval_code;          my $eval_code;
# Line 609  It is used later to produce output. Line 727  It is used later to produce output.
727   my @ds = $webpac->data_structure($rec);   my @ds = $webpac->data_structure($rec);
728    
729  This method will also set C<$webpac->{'currnet_filename'}> if there is  This method will also set C<$webpac->{'currnet_filename'}> if there is
730  <filename> tag in C<import_xml>.  <filename> tag in C<import_xml> and C<$webpac->{'headline'}> if there is
731    <headline> tag.
732    
733  =cut  =cut
734    
# Line 622  sub data_structure { Line 741  sub data_structure {
741          $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);          $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
742    
743          undef $self->{'currnet_filename'};          undef $self->{'currnet_filename'};
744            undef $self->{'headline'};
745    
746          my @sorted_tags;          my @sorted_tags;
747          if ($self->{tags_by_order}) {          if ($self->{tags_by_order}) {
# Line 659  sub data_structure { Line 779  sub data_structure {
779                                  @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v;                                  @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v;
780                          }                          }
781    
782                            if ($field eq 'filename') {
783                                    $self->{'current_filename'} = join('',@v);
784                                    $log->debug("filename: ",$self->{'current_filename'});
785                            } elsif ($field eq 'headline') {
786                                    $self->{'headline'} .= join('',@v);
787                                    $log->debug("headline: ",$self->{'headline'});
788                                    next; # don't return headline in data_structure!
789                            }
790    
791                          # does tag have type?                          # does tag have type?
792                          if ($tag->{'type'}) {                          if ($tag->{'type'}) {
793                                  push @{$row->{$tag->{'type'}}}, @v;                                  push @{$row->{$tag->{'type'}}}, @v;
# Line 667  sub data_structure { Line 796  sub data_structure {
796                                  push @{$row->{'swish'}}, @v;                                  push @{$row->{'swish'}}, @v;
797                          }                          }
798    
                         if ($field eq 'filename') {  
                                 $self->{'current_filename'} = join('',@v);  
                                 $log->debug("filename: ",$self->{'current_filename'});  
                         }  
799    
800                  }                  }
801    
# Line 721  sub output { Line 846  sub output {
846          return $out;          return $out;
847  }  }
848    
849    =head2 output_file
850    
851    Create output from in-memory data structure using Template Toolkit template
852    to a file.
853    
854     $webpac->output_file(
855            file => 'out.txt',
856            template => 'text.tt',
857            data => @ds
858     );
859    
860    =cut
861    
862    sub output_file {
863            my $self = shift;
864    
865            my $args = {@_};
866    
867            my $log = $self->_get_logger();
868    
869            my $file = $args->{'file'} || $log->logconfess("need file name");
870    
871            $log->debug("creating file ",$file);
872    
873            open(my $fh, ">", $file) || $log->logdie("can't open output file '$file': $!");
874            print $fh $self->output(
875                    template => $args->{'template'},
876                    data => $args->{'data'},
877            ) || $log->logdie("print: $!");
878            close($fh) || $log->logdie("close: $!");
879    }
880    
881  =head2 apply_format  =head2 apply_format
882    
883  Apply format specified in tag with C<format_name="name"> and  Apply format specified in tag with C<format_name="name"> and

Legend:
Removed from v.375  
changed lines
  Added in v.421

  ViewVC Help
Powered by ViewVC 1.1.26