1 |
dpavlin |
1 |
package WebPAC::Input; |
2 |
|
|
|
3 |
|
|
use warnings; |
4 |
|
|
use strict; |
5 |
|
|
|
6 |
|
|
=head1 NAME |
7 |
|
|
|
8 |
dpavlin |
3 |
WebPAC::Input - core module for input file format |
9 |
dpavlin |
1 |
|
10 |
|
|
=head1 VERSION |
11 |
|
|
|
12 |
|
|
Version 0.01 |
13 |
|
|
|
14 |
|
|
=cut |
15 |
|
|
|
16 |
|
|
our $VERSION = '0.01'; |
17 |
|
|
|
18 |
|
|
=head1 SYNOPSIS |
19 |
|
|
|
20 |
dpavlin |
3 |
This module will load particular loader module and execute it's functions. |
21 |
dpavlin |
1 |
|
22 |
|
|
Perhaps a little code snippet. |
23 |
|
|
|
24 |
|
|
use WebPAC::Input; |
25 |
|
|
|
26 |
dpavlin |
3 |
my $db = WebPAC::Input->new( |
27 |
|
|
format => 'NULL', |
28 |
|
|
config => $config, |
29 |
dpavlin |
6 |
lookup => $lookup_obj, |
30 |
dpavlin |
10 |
low_mem => 1, |
31 |
dpavlin |
3 |
); |
32 |
dpavlin |
1 |
|
33 |
dpavlin |
3 |
$db->open('/path/to/database'); |
34 |
|
|
print "database size: ",$db->size,"\n"; |
35 |
|
|
while (my $row = $db->fetch) { |
36 |
|
|
... |
37 |
|
|
} |
38 |
dpavlin |
1 |
|
39 |
|
|
=head1 FUNCTIONS |
40 |
|
|
|
41 |
dpavlin |
3 |
=head2 new |
42 |
dpavlin |
1 |
|
43 |
dpavlin |
3 |
Create new input database object. |
44 |
|
|
|
45 |
dpavlin |
9 |
my $db = new WebPAC::Input( |
46 |
|
|
format => 'NULL' |
47 |
|
|
code_page => 'ISO-8859-2', |
48 |
dpavlin |
10 |
low_mem => 1, |
49 |
dpavlin |
9 |
); |
50 |
dpavlin |
3 |
|
51 |
dpavlin |
9 |
Optional parametar C<code_page> specify application code page (which will be |
52 |
|
|
used internally). This should probably be your terminal encoding, and by |
53 |
|
|
default, it C<ISO-8859-2>. |
54 |
|
|
|
55 |
dpavlin |
10 |
Default is not to use C<low_mem> options (see L<MEMORY USAGE> below). |
56 |
|
|
|
57 |
dpavlin |
1 |
=cut |
58 |
|
|
|
59 |
dpavlin |
3 |
sub new { |
60 |
|
|
my $class = shift; |
61 |
|
|
my $self = {@_}; |
62 |
|
|
bless($self, $class); |
63 |
|
|
|
64 |
dpavlin |
9 |
$self->{'code_page'} ||= 'ISO-8859-2'; |
65 |
|
|
|
66 |
dpavlin |
10 |
my $log = $self->_get_logger; |
67 |
|
|
|
68 |
|
|
# running with low_mem flag? well, use DBM::Deep then. |
69 |
|
|
if ($self->{'low_mem'}) { |
70 |
|
|
$log->info("running with low_mem which impacts performance (<32 Mb memory usage)"); |
71 |
|
|
|
72 |
|
|
my $db_file = "data.db"; |
73 |
|
|
|
74 |
|
|
if (-e $db_file) { |
75 |
|
|
unlink $db_file or $log->logdie("can't remove '$db_file' from last run"); |
76 |
|
|
$log->debug("removed '$db_file' from last run"); |
77 |
|
|
} |
78 |
|
|
|
79 |
|
|
require DBM::Deep; |
80 |
|
|
|
81 |
|
|
my $db = new DBM::Deep $db_file; |
82 |
|
|
|
83 |
|
|
$log->logdie("DBM::Deep error: $!") unless ($db); |
84 |
|
|
|
85 |
|
|
if ($db->error()) { |
86 |
|
|
$log->logdie("can't open '$db_file' under low_mem: ",$db->error()); |
87 |
|
|
} else { |
88 |
|
|
$log->debug("using file '$db_file' for DBM::Deep"); |
89 |
|
|
} |
90 |
|
|
|
91 |
|
|
$self->{'db'} = $db; |
92 |
|
|
} |
93 |
|
|
|
94 |
dpavlin |
3 |
$self ? return $self : return undef; |
95 |
dpavlin |
1 |
} |
96 |
|
|
|
97 |
dpavlin |
3 |
=head1 MEMORY USAGE |
98 |
dpavlin |
1 |
|
99 |
dpavlin |
3 |
C<low_mem> options is double-edged sword. If enabled, WebPAC |
100 |
|
|
will run on memory constraint machines (which doesn't have enough |
101 |
|
|
physical RAM to create memory structure for whole source database). |
102 |
dpavlin |
1 |
|
103 |
dpavlin |
3 |
If your machine has 512Mb or more of RAM and database is around 10000 records, |
104 |
|
|
memory shouldn't be an issue. If you don't have enough physical RAM, you |
105 |
|
|
might consider using virtual memory (if your operating system is handling it |
106 |
|
|
well, like on FreeBSD or Linux) instead of dropping to L<DBM::Deep> to handle |
107 |
|
|
parsed structure of ISIS database (this is what C<low_mem> option does). |
108 |
dpavlin |
1 |
|
109 |
dpavlin |
3 |
Hitting swap at end of reading source database is probably o.k. However, |
110 |
|
|
hitting swap before 90% will dramatically decrease performance and you will |
111 |
|
|
be better off with C<low_mem> and using rest of availble memory for |
112 |
|
|
operating system disk cache (Linux is particuallary good about this). |
113 |
|
|
However, every access to database record will require disk access, so |
114 |
|
|
generation phase will be slower 10-100 times. |
115 |
|
|
|
116 |
|
|
Parsed structures are essential - you just have option to trade RAM memory |
117 |
|
|
(which is fast) for disk space (which is slow). Be sure to have planty of |
118 |
|
|
disk space if you are using C<low_mem> and thus L<DBM::Deep>. |
119 |
|
|
|
120 |
|
|
However, when WebPAC is running on desktop machines (or laptops :-), it's |
121 |
|
|
highly undesireable for system to start swapping. Using C<low_mem> option can |
122 |
|
|
reduce WecPAC memory usage to around 64Mb for same database with lookup |
123 |
|
|
fields and sorted indexes which stay in RAM. Performance will suffer, but |
124 |
|
|
memory usage will really be minimal. It might be also more confortable to |
125 |
|
|
run WebPAC reniced on those machines. |
126 |
|
|
|
127 |
|
|
|
128 |
|
|
=head1 AUTHOR |
129 |
|
|
|
130 |
|
|
Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >> |
131 |
|
|
|
132 |
dpavlin |
1 |
=head1 COPYRIGHT & LICENSE |
133 |
|
|
|
134 |
|
|
Copyright 2005 Dobrica Pavlinusic, All Rights Reserved. |
135 |
|
|
|
136 |
|
|
This program is free software; you can redistribute it and/or modify it |
137 |
|
|
under the same terms as Perl itself. |
138 |
|
|
|
139 |
|
|
=cut |
140 |
|
|
|
141 |
|
|
1; # End of WebPAC::Input |