--- trunk2/lib/WebPAC.pm 2004/09/10 22:24:42 421 +++ trunk2/lib/WebPAC.pm 2004/09/13 14:55:13 436 @@ -9,6 +9,7 @@ use XML::Simple; use Template; use Log::Log4perl qw(get_logger :levels); +use Time::HiRes qw(time); use Data::Dumper; @@ -39,28 +40,13 @@ Default C is C. +Default is not to use C options (see L below). + This method will also read configuration files C (used by indexer and Web font-end) and configuration file specified by C which describes databases to be indexed. -C options is double-edged sword. If enabled, WebPAC -will run on memory constraint machines (which doesn't have enough -physical RAM to create memory structure for whole ISIS database). - -If your machine has 512Mb or more and database is around 10000 records, -memory shouldn't be an issue. If you don't have enough physical RAM, you -might consider using virtual memory (if your operating system is handling it -well, like on FreeBSD or Linux) instead of dropping to L to handle -parsed structure of ISIS database. - -However, when WebPAC is running on desktop machines (or laptops :-), it's -highly undesireable for system to start swapping. Using C option can -reduce WecPAC memory usage to 16Mb for same database with lookup fields and -sorted indexes which stay in RAM. Performance will suffer, but memory usage -will really be minimal. It might be also more confortable to run WebPAC reniced -on those machines. - =cut # mapping between data type and tag which specify @@ -77,6 +63,8 @@ my $self = {@_}; bless($self, $class); + $self->{'start_t'} = time(); + my $log_file = $self->{'log'} || "log.conf"; Log::Log4perl->init($log_file); @@ -126,7 +114,7 @@ # running with low_mem flag? well, use DBM::Deep then. if ($self->{'low_mem'}) { - $log->info("running with low_mem which impacts performance (<64 Mb memory usage)"); + $log->info("running with low_mem which impacts performance (<32 Mb memory usage)"); my $db_file = "data.db"; @@ -135,7 +123,7 @@ $log->debug("removed '$db_file' from last run"); } - use DBM::Deep; + require DBM::Deep; my $db = new DBM::Deep $db_file; @@ -144,7 +132,7 @@ if ($db->error()) { $log->logdie("can't open '$db_file' under low_mem: ",$db->error()); } else { - $log->debug("using file $db_file for DBM::Deep"); + $log->debug("using file '$db_file' for DBM::Deep"); } $self->{'db'} = $db; @@ -160,12 +148,16 @@ $webpac->open_isis( filename => '/data/ISIS/ISIS', code_page => '852', - limit_mfn => '500', + limit_mfn => 500, + start_mfn => 6000, lookup => [ ... ], ); By default, ISIS code page is assumed to be C<852>. +If optional parametar C is set, this will be first MFN to read +from database (so you can skip beginning of your database if you need to). + If optional parametar C is set, it will read just 500 records from database in example above. @@ -193,6 +185,8 @@ $log->logcroak("need filename") if (! $arg->{'filename'}); my $code_page = $arg->{'code_page'} || '852'; + $log->logdie("can't find database ",$arg->{'filename'}) unless (glob($arg->{'filename'}.'.*')); + # store data in object $self->{'isis_filename'} = $arg->{'filename'}; $self->{'isis_code_page'} = $code_page; @@ -210,13 +204,19 @@ my $isis_db = OpenIsis::open($arg->{'filename'}); my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1; + my $startmfn = 1; + + if (my $s = $self->{'start_mfn'}) { + $log->info("skipping to MFN $s"); + $startmfn = $s; + } - $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn}); + $maxmfn = $startmfn + $self->{limit_mfn} if ($self->{limit_mfn}); - $log->info("processing $maxmfn records..."); + $log->info("processing ",($maxmfn-$startmfn)." records..."); # read database - for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { + for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) { $log->debug("mfn: $mfn\n"); @@ -264,7 +264,7 @@ } - $self->{'current_mfn'} = 1; + $self->{'current_mfn'} = $startmfn; $self->{'last_pcnt'} = 0; $log->debug("max mfn: $maxmfn"); @@ -329,16 +329,52 @@ $self->{'last_pcnt'} ||= 1; - $self->{'last_pcnt'} = $curr if ($curr < $self->{'last_pcnt'}); - my $p = int($curr * 100 / $max); + + # reset on re-run + if ($p < $self->{'last_pcnt'}) { + $self->{'last_pcnt'} = $p; + $self->{'last_t'} = time(); + $self->{'last_curr'} = 1; + } + if ($p != $self->{'last_pcnt'}) { - printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$curr,$max,"=" x ($p/2).">", $p ); + + my $last_curr = $self->{'last_curr'} || $curr; + my $t = time(); + my $rate = ($curr - $last_curr) / (($t - $self->{'last_t'} || 1)); + my $eta = ($max-$curr) / ($rate || 1); + printf STDERR ("%5d [%-38s] %-5d %0.1f/s %s\r",$curr,"=" x ($p/3)."$p%>", $max, $rate, $self->fmt_time($eta)); $self->{'last_pcnt'} = $p; + $self->{'last_t'} = time(); + $self->{'last_curr'} = $curr; } print STDERR "\n" if ($p == 100); } +=head2 fmt_time + +Format time (in seconds) for display. + + print $webpac->fmt_time(time()); + +This method is called by L to display remaining time. + +=cut + +sub fmt_time { + my $self = shift; + + my $t = shift || 0; + my $out = ""; + + my ($ss,$mm,$hh) = gmtime($t); + $out .= "${hh}h" if ($hh); + $out .= sprintf("%02d:%02d", $mm,$ss); + $out .= " " if ($hh == 0); + return $out; +} + =head2 open_import_xml Read file from C directory and parse it. @@ -1021,6 +1057,37 @@ also use method names, and not only classes (which are just few) to filter logging. + +=head1 MEMORY USAGE + +C options is double-edged sword. If enabled, WebPAC +will run on memory constraint machines (which doesn't have enough +physical RAM to create memory structure for whole source database). + +If your machine has 512Mb or more of RAM and database is around 10000 records, +memory shouldn't be an issue. If you don't have enough physical RAM, you +might consider using virtual memory (if your operating system is handling it +well, like on FreeBSD or Linux) instead of dropping to L to handle +parsed structure of ISIS database (this is what C option does). + +Hitting swap at end of reading source database is probably o.k. However, +hitting swap before 90% will dramatically decrease performance and you will +be better off with C and using rest of availble memory for +operating system disk cache (Linux is particuallary good about this). +However, every access to database record will require disk access, so +generation phase will be slower 10-100 times. + +Parsed structures are essential - you just have option to trade RAM memory +(which is fast) for disk space (which is slow). Be sure to have planty of +disk space if you are using C and thus L. + +However, when WebPAC is running on desktop machines (or laptops :-), it's +highly undesireable for system to start swapping. Using C option can +reduce WecPAC memory usage to around 64Mb for same database with lookup +fields and sorted indexes which stay in RAM. Performance will suffer, but +memory usage will really be minimal. It might be also more confortable to +run WebPAC reniced on those machines. + =cut 1;