/[webpac]/trunk2/lib/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk2/lib/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 418 by dpavlin, Thu Sep 9 18:08:38 2004 UTC revision 434 by dpavlin, Mon Sep 13 14:39:16 2004 UTC
# Line 9  use Config::IniFiles; Line 9  use Config::IniFiles;
9  use XML::Simple;  use XML::Simple;
10  use Template;  use Template;
11  use Log::Log4perl qw(get_logger :levels);  use Log::Log4perl qw(get_logger :levels);
12    use Time::HiRes qw(time);
13    
14  use Data::Dumper;  use Data::Dumper;
15    
# Line 29  This module implements methods used by W Line 30  This module implements methods used by W
30    
31  =head2 new  =head2 new
32    
33  This will create new instance of WebPAC using configuration specified by C<config_file>.  Create new instance of WebPAC using configuration specified by C<config_file>.
34    
35   my $webpac = new WebPAC(   my $webpac = new WebPAC(
36          config_file => 'name.conf',          config_file => 'name.conf',
37          [code_page => 'ISO-8859-2',]          [code_page => 'ISO-8859-2',]
38            [low_mem => 1,]
39   );   );
40    
41  Default C<code_page> is C<ISO-8859-2>.  Default C<code_page> is C<ISO-8859-2>.
42    
43  It will also read configuration files  Default is not to use C<low_mem> options (see L<MEMORY USAGE> below).
44    
45    This method will also read configuration files
46  C<global.conf> (used by indexer and Web font-end)  C<global.conf> (used by indexer and Web font-end)
47  and configuration file specified by C<config_file>  and configuration file specified by C<config_file>
48  which describes databases to be indexed.  which describes databases to be indexed.
# Line 59  sub new { Line 63  sub new {
63          my $self = {@_};          my $self = {@_};
64          bless($self, $class);          bless($self, $class);
65    
66            $self->{'start_t'} = time();
67    
68          my $log_file = $self->{'log'} || "log.conf";          my $log_file = $self->{'log'} || "log.conf";
69          Log::Log4perl->init($log_file);          Log::Log4perl->init($log_file);
70    
# Line 106  sub new { Line 112  sub new {
112                  EVAL_PERL => 1,                  EVAL_PERL => 1,
113          );          );
114    
115            # running with low_mem flag? well, use DBM::Deep then.
116            if ($self->{'low_mem'}) {
117                    $log->info("running with low_mem which impacts performance (<32 Mb memory usage)");
118    
119                    my $db_file = "data.db";
120    
121                    if (-e $db_file) {
122                            unlink $db_file or $log->logdie("can't remove '$db_file' from last run");
123                            $log->debug("removed '$db_file' from last run");
124                    }
125    
126                    require DBM::Deep;
127    
128                    my $db = new DBM::Deep $db_file;
129    
130                    $log->logdie("DBM::Deep error: $!") unless ($db);
131    
132                    if ($db->error()) {
133                            $log->logdie("can't open '$db_file' under low_mem: ",$db->error());
134                    } else {
135                            $log->debug("using file '$db_file' for DBM::Deep");
136                    }
137    
138                    $self->{'db'} = $db;
139            }
140    
141          return $self;          return $self;
142  }  }
143    
# Line 116  Open CDS/ISIS database using OpenIsis mo Line 148  Open CDS/ISIS database using OpenIsis mo
148   $webpac->open_isis(   $webpac->open_isis(
149          filename => '/data/ISIS/ISIS',          filename => '/data/ISIS/ISIS',
150          code_page => '852',          code_page => '852',
151          limit_mfn => '500',          limit_mfn => 500,
152            start_mfn => 6000,
153          lookup => [ ... ],          lookup => [ ... ],
154   );   );
155    
156  By default, ISIS code page is assumed to be C<852>.  By default, ISIS code page is assumed to be C<852>.
157    
158    If optional parametar C<start_mfn> is set, this will be first MFN to read
159    from database (so you can skip beginning of your database if you need to).
160    
161  If optional parametar C<limit_mfn> is set, it will read just 500 records  If optional parametar C<limit_mfn> is set, it will read just 500 records
162  from database in example above.  from database in example above.
163    
# Line 149  sub open_isis { Line 185  sub open_isis {
185          $log->logcroak("need filename") if (! $arg->{'filename'});          $log->logcroak("need filename") if (! $arg->{'filename'});
186          my $code_page = $arg->{'code_page'} || '852';          my $code_page = $arg->{'code_page'} || '852';
187    
188            # store data in object
189            $self->{'isis_filename'} = $arg->{'filename'};
190            $self->{'isis_code_page'} = $code_page;
191    
192          use OpenIsis;          use OpenIsis;
193    
194          #$self->{'isis_code_page'} = $code_page;          #$self->{'isis_code_page'} = $code_page;
# Line 157  sub open_isis { Line 197  sub open_isis {
197          my $cp = Text::Iconv->new($code_page,$self->{'code_page'});          my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
198    
199          $log->info("reading ISIS database '",$arg->{'filename'},"'");          $log->info("reading ISIS database '",$arg->{'filename'},"'");
200            $log->debug("isis code page: $code_page");
201    
202          my $isis_db = OpenIsis::open($arg->{'filename'});          my $isis_db = OpenIsis::open($arg->{'filename'});
203    
204          my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;          my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
205            my $startmfn = 1;
206    
207          $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});          if (my $s = $self->{'start_mfn'}) {
208                    $log->info("skipping to MFN $s");
209                    $startmfn = $s;
210            }
211    
212            $maxmfn = $startmfn + $self->{limit_mfn} if ($self->{limit_mfn});
213    
214          $log->info("processing $maxmfn records...");          $log->info("processing ",($maxmfn-$startmfn)." records...");
215    
216          # read database          # read database
217          for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {          for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) {
218    
219    
220                  $log->debug("mfn: $mfn\n");                  $log->debug("mfn: $mfn\n");
221    
222                    my $rec;
223    
224                  # read record                  # read record
225                  my $row = OpenIsis::read( $isis_db, $mfn );                  my $row = OpenIsis::read( $isis_db, $mfn );
226                  foreach my $k (keys %{$row}) {                  foreach my $k (keys %{$row}) {
# Line 189  sub open_isis { Line 238  sub open_isis {
238                                                  $val = $l;                                                  $val = $l;
239                                          }                                          }
240    
241                                          push @{$self->{'data'}->{$mfn}->{$k}}, $val;                                          push @{$rec->{$k}}, $val;
242                                  }                                  }
243                          } else {                          } else {
244                                  push @{$self->{'data'}->{$mfn}->{'000'}}, $mfn;                                  push @{$rec->{'000'}}, $mfn;
245                          }                          }
246    
247                  }                  }
248    
249                    $log->confess("record $mfn empty?") unless ($rec);
250    
251                    # store
252                    if ($self->{'low_mem'}) {
253                            $self->{'db'}->put($mfn, $rec);
254                    } else {
255                            $self->{'data'}->{$mfn} = $rec;
256                    }
257    
258                  # create lookup                  # create lookup
                 my $rec = $self->{'data'}->{$mfn} || $log->confess("record $mfn empty?");  
259                  $self->create_lookup($rec, @{$arg->{'lookup'}});                  $self->create_lookup($rec, @{$arg->{'lookup'}});
260    
261                  $self->progress_bar($mfn,$maxmfn);                  $self->progress_bar($mfn,$maxmfn);
262    
263          }          }
264    
265          $self->{'current_mfn'} = 1;          $self->{'current_mfn'} = $startmfn;
266          $self->{'last_pcnt'} = 0;          $self->{'last_pcnt'} = 0;
267    
268          $log->debug("max mfn: $maxmfn");          $log->debug("max mfn: $maxmfn");
# Line 238  sub fetch_rec { Line 295  sub fetch_rec {
295    
296          $self->progress_bar($mfn,$self->{'max_mfn'});          $self->progress_bar($mfn,$self->{'max_mfn'});
297    
298          return $self->{'data'}->{$mfn};          if ($self->{'low_mem'}) {
299                    return $self->{'db'}->get($mfn);
300            } else {
301                    return $self->{'data'}->{$mfn};
302            }
303  }  }
304    
305  =head2 progress_bar  =head2 progress_bar
# Line 266  sub progress_bar { Line 327  sub progress_bar {
327    
328          $self->{'last_pcnt'} ||= 1;          $self->{'last_pcnt'} ||= 1;
329    
         $self->{'last_pcnt'} = $curr if ($curr < $self->{'last_pcnt'});  
   
330          my $p = int($curr * 100 / $max);          my $p = int($curr * 100 / $max);
331    
332            # reset on re-run
333            if ($p < $self->{'last_pcnt'}) {
334                    $self->{'last_pcnt'} = $p;
335                    $self->{'last_t'} = time();
336                    $self->{'last_curr'} = 1;
337            }
338    
339          if ($p != $self->{'last_pcnt'}) {          if ($p != $self->{'last_pcnt'}) {
340                  printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$curr,$max,"=" x ($p/2).">", $p );  
341                    my $last_curr = $self->{'last_curr'} || $curr;
342                    my $t = time();
343                    my $rate = ($curr - $last_curr) / (($t - $self->{'last_t'} || 1));
344                    my $eta = ($max-$curr) / ($rate || 1);
345                    printf STDERR ("%5d [%-38s] %-5d %0.1f/s %s\r",$curr,"=" x ($p/3)."$p%>", $max, $rate, $self->fmt_time($eta));
346                  $self->{'last_pcnt'} = $p;                  $self->{'last_pcnt'} = $p;
347                    $self->{'last_t'} = time();
348                    $self->{'last_curr'} = $curr;
349          }          }
350          print STDERR "\n" if ($p == 100);          print STDERR "\n" if ($p == 100);
351  }  }
352    
353    =head2 fmt_time
354    
355    Format time (in seconds) for display.
356    
357     print $webpac->fmt_time(time());
358    
359    This method is called by L<progress_bar> to display remaining time.
360    
361    =cut
362    
363    sub fmt_time {
364            my $self = shift;
365    
366            my $t = shift || 0;
367            my $out = "";
368    
369            my ($ss,$mm,$hh) = gmtime($t);
370            $out .= "${hh}h" if ($hh);
371            $out .= sprintf("%02d:%02d", $mm,$ss);
372            $out .= "  " if ($hh == 0);
373            return $out;
374    }
375    
376  =head2 open_import_xml  =head2 open_import_xml
377    
378  Read file from C<import_xml/> directory and parse it.  Read file from C<import_xml/> directory and parse it.
# Line 803  sub output_file { Line 900  sub output_file {
900    
901          my $log = $self->_get_logger();          my $log = $self->_get_logger();
902    
903          $log->logconfess("need file name") if (! $args->{'file'});          my $file = $args->{'file'} || $log->logconfess("need file name");
904    
905          $log->debug("creating file ",$args->{'file'});          $log->debug("creating file ",$file);
906    
907          open(my $fh, ">", $args->{'file'}) || $log->logdie("can't open output file '$self->{'file'}': $!");          open(my $fh, ">", $file) || $log->logdie("can't open output file '$file': $!");
908          print $fh $self->output(          print $fh $self->output(
909                  template => $args->{'template'},                  template => $args->{'template'},
910                  data => $args->{'data'},                  data => $args->{'data'},
# Line 958  B<This is different from normal Log4perl Line 1055  B<This is different from normal Log4perl
1055  also use method names, and not only classes (which are just few)  also use method names, and not only classes (which are just few)
1056  to filter logging.  to filter logging.
1057    
1058    
1059    =head1 MEMORY USAGE
1060    
1061    C<low_mem> options is double-edged sword. If enabled, WebPAC
1062    will run on memory constraint machines (which doesn't have enough
1063    physical RAM to create memory structure for whole source database).
1064    
1065    If your machine has 512Mb or more of RAM and database is around 10000 records,
1066    memory shouldn't be an issue. If you don't have enough physical RAM, you
1067    might consider using virtual memory (if your operating system is handling it
1068    well, like on FreeBSD or Linux) instead of dropping to L<DBD::Deep> to handle
1069    parsed structure of ISIS database (this is what C<low_mem> option does).
1070    
1071    Hitting swap at end of reading source database is probably o.k. However,
1072    hitting swap before 90% will dramatically decrease performance and you will
1073    be better off with C<low_mem> and using rest of availble memory for
1074    operating system disk cache (Linux is particuallary good about this).
1075    However, every access to database record will require disk access, so
1076    generation phase will be slower 10-100 times.
1077    
1078    Parsed structures are essential - you just have option to trade RAM memory
1079    (which is fast) for disk space (which is slow). Be sure to have planty of
1080    disk space if you are using C<low_mem> and thus L<DBD::Deep>.
1081    
1082    However, when WebPAC is running on desktop machines (or laptops :-), it's
1083    highly undesireable for system to start swapping. Using C<low_mem> option can
1084    reduce WecPAC memory usage to around 64Mb for same database with lookup
1085    fields and sorted indexes which stay in RAM. Performance will suffer, but
1086    memory usage will really be minimal. It might be also more confortable to
1087    run WebPAC reniced on those machines.
1088    
1089  =cut  =cut
1090    
1091  1;  1;

Legend:
Removed from v.418  
changed lines
  Added in v.434

  ViewVC Help
Powered by ViewVC 1.1.26