Line # Revision Author
1 60 dpavlin #
2 # this file implements index functions using DBI
3 # and huge amounts of memory for cache speedup
4 #
5 94 dpavlin # this version doesn't support ident (which sould be location in
6 # library). But, that functionality is not used anyway...
7 #
8 60 dpavlin
9 package index_DBI;
10 use strict qw(vars);
11 use vars qw($Count);
12 use HTML::Entities;
13 188 dpavlin use URI::Escape;
14 192 dpavlin use locale;
15 60 dpavlin
16 use DBI;
17
18 my %Table; # index tables which where visited in this run
19 my %sth_cache; # cache prepared statements
20
21 # cache var
22 my $c_table;
23 my $c_count;
24
25 88 dpavlin # bench time
26 94 dpavlin my $bench_time = time();
27 88 dpavlin
28 94 dpavlin sub bench {
29 my $self = shift;
30 my $msg = shift;
31
32 print STDERR "last operation took ",time()-$bench_time," seconds...\n";
33 $bench_time=time();
34 print STDERR "$msg\n";
35 }
36
37 60 dpavlin sub new {
38 my $class = shift;
39 my $self = {};
40 bless($self, $class);
41
42 my $dbd = shift || die "need dbi_dbd= in [global] section of configuration file";
43 my $dsn = shift || die "need dbi_dsn= in [global] section of configuration file";
44 my $user = shift || die "need dbi_user= in [global] section of configuration file";
45 my $passwd = shift || die "need dbi_passwd= in [global] section of configuration file";
46
47 206 dpavlin $self->{dbd} = $dbd;
48
49 60 dpavlin $self->{dbh} = DBI->connect("DBI:$dbd:$dsn",$user,$passwd) || die $DBI::errstr;
50 $Count++;
51
52 94 dpavlin $self->bench("connected to $dbd as $user");
53
54 226 dpavlin # force SQLite to support binary 0 in data (which shouldn't
55 # happend, but it did to me)
56 eval {
57 no warnings 'all';
58 $self->{dbh}->{sqlite_handle_binary_nulls} = 1;
59 };
60
61 60 dpavlin return $self;
62 }
63
64 sub delete_and_create {
65 my $self = shift;
66
67 my $field = shift;
68
69 #print "#### delete_and_create($field)\n";
70
71 my $sql = "select count(*) from $field";
72 400 dpavlin my $sth = $self->{dbh}->prepare($sql);
73 60 dpavlin # FIX: this is not a good way to check if table exists!
74 400 dpavlin if ($sth && $sth->execute() && $sth->fetchrow_hashref) {
75 60 dpavlin my $sql = "drop table $field";
76 225 dpavlin my $sth = $self->{dbh}->do($sql) || warn "SQL: $sql - ".$sth->errstr();
77 60 dpavlin }
78 $sql = "create table $field (
79 item varchar(255),
80 188 dpavlin display text,
81 60 dpavlin count int,
82 ord int,
83 94 dpavlin primary key (item)
84 60 dpavlin )";
85
86 94 dpavlin $sth = $self->{dbh}->do($sql) || warn "SQL: $sql ".$self->{dbh}->errstr();
87 60 dpavlin }
88
89 sub insert {
90 my $self = shift;
91
92 my $field = shift;
93 my $index_data = shift || print STDERR "\$index->insert($field,NULL,...)";
94 188 dpavlin my $display = shift || $index_data;
95 60 dpavlin
96 if (! $index_data) {
97 print STDERR "\$index->insert() -- no value to insert\n";
98 return;
99 }
100
101 $Table{$field}++;
102
103 #$sth_cache{$field."select"}->execute($index_data) || die "cache: $field select; ".$self->{dbh}->errstr();
104 93 dpavlin
105 633 dpavlin $index_data =~ s#&(\w)(acute|cedil|circ|grave|ring|slash|tilde|uml);#$1#gi;
106 93 dpavlin
107 60 dpavlin my $uc = uc($index_data);
108 94 dpavlin if (! $c_table->{$field}->{$uc}) {
109 60 dpavlin #print stderr "in index: $index_data\n";
110 94 dpavlin $c_table->{$field}->{$uc} = $index_data;
111 188 dpavlin $c_table->{$field}->{$uc}->{display} = $display;
112 94 dpavlin $c_count->{$field}->{$uc} = 1;
113 60 dpavlin } else {
114 94 dpavlin $c_count->{$field}->{$uc}++;
115 60 dpavlin }
116 }
117
118 140 dpavlin sub count {
119 60 dpavlin my $self = shift;
120
121 my $field = shift;
122 140 dpavlin my $where = shift;
123 60 dpavlin
124 140 dpavlin my $sql = "select count(*) from $field where upper(item) like upper(?)||'%'";
125 60 dpavlin
126 my $sth = $self->{dbh}->prepare($sql) || die $self->{dbh}->errstr();
127 140 dpavlin $sth->execute($where) || die "sql: $sql; ".$self->{dbh}->errstr();
128 60 dpavlin
129 my ($total) = $sth->fetchrow_array();
130
131 142 dpavlin # no results, count all
132 if (! $total) {
133 my $sql = "select count(*) from $field";
134
135 my $sth = $self->{dbh}->prepare($sql) || die $self->{dbh}->errstr();
136 $sth->execute() || die "sql: $sql; ".$self->{dbh}->errstr();
137 $total = $sth->fetchrow_array();
138
139 }
140
141 return $total || 1;
142 60 dpavlin }
143
144
145 sub fetch {
146 my $self = shift;
147
148 my $field = shift;
149 my $where = shift;
150
151 my $from_ord = shift || 0;
152 my $rows = shift || 10;
153
154 my @sql_args;
155
156 188 dpavlin my $sql = "select item,display,ord from $field";
157 60 dpavlin
158 if ($where) {
159 140 dpavlin my $sql2 = "select ord from $field where upper(item) like upper(?)||'%'";
160 60 dpavlin my $sth = $self->{dbh}->prepare($sql2) || die "sql2: $sql2; ".$self->{dbh}->errstr();
161
162 $sth->execute($where) || die "sql2: $sql2; ".$self->{dbh}->errstr();
163 if (my $row = $sth->fetchrow_hashref) {
164 $from_ord += $row->{ord} - 1;
165 127 dpavlin } else {
166 # if no match is found when searching from beginning
167 # of word in index, try substring match anywhere
168 201 dpavlin $sql2 = "select ord from $field where upper(item) like '% '||upper(?)||'%'";
169 127 dpavlin $sth = $self->{dbh}->prepare($sql2) || die "sql2: $sql2; ".$self->{dbh}->errstr();
170 $sth->execute($where) || die "sql2: $sql2; ".$self->{dbh}->errstr();
171 if (my $row = $sth->fetchrow_hashref) {
172 $from_ord += $row->{ord} - 1;
173 }
174 60 dpavlin }
175 }
176 $sql .= " order by ord limit $rows offset $from_ord";
177
178 my $sth = $self->{dbh}->prepare($sql) || die "prepare: $sql; ".$self->{dbh}->errstr();
179 $sth->execute() || die "execute: $sql; ".$self->{dbh}->errstr();
180 my @arr;
181 while (my $row = $sth->fetchrow_hashref) {
182 188 dpavlin $row->{item} = HTML::Entities::encode($row->{item},' <>&"');
183 $row->{display} = HTML::Entities::encode($row->{display},'<>&"');
184 633 dpavlin $row->{item} =~ s#&amp;(\w)(acute|cedil|circ|grave|ring|slash|tilde|uml);#$1#gi;
185 $row->{display} =~ s#&amp;(\w)(acute|cedil|circ|grave|ring|slash|tilde|uml);#&$1$2;#gi;
186 60 dpavlin push @arr,$row;
187 }
188 return @arr;
189 }
190
191 sub close {
192 my $self = shift;
193
194 94 dpavlin return if (! $self->{dbh});
195 60 dpavlin
196 94 dpavlin foreach my $table (keys %Table) {
197 $self->bench("Crating table $table");
198 $self->delete_and_create($table);
199 60 dpavlin
200 $self->{dbh}->begin_work || die $self->{dbh}->errstr();
201
202 219 dpavlin $self->bench("Sorting ".$Table{$table}." (with duplicates) items in $table");
203 94 dpavlin my @keys = sort keys %{$c_table->{$table}};
204
205 219 dpavlin $self->bench("Dumping ".($#keys+1)." items into $table");
206 188 dpavlin my $sql = "insert into $table (ord,item,display,count) values (?,?,?,?)";
207 60 dpavlin my $sth = $self->{dbh}->prepare($sql) || die "sql: $sql; ".$self->{dbh}->errstr();
208 94 dpavlin
209 my $ord = 0;
210 foreach my $key (@keys) {
211 95 dpavlin $sth->execute(++$ord,
212 94 dpavlin $c_table->{$table}->{$key},
213 188 dpavlin $c_table->{$table}->{$key}->{display},
214 94 dpavlin $c_count->{$table}->{$key}
215 );
216 60 dpavlin }
217
218 $self->{dbh}->commit || die $self->{dbh}->errstr();
219 }
220 206 dpavlin
221 if ($self->{dbd} =~ m/(Pg|SQLite)/) {
222 $self->{dbh}->do(qq{vacuum}) || warn "vacumming failed. It shouldn't if you are using PostgreSQL or SQLite: ".$self->{dbh}->errstr();
223 }
224
225 94 dpavlin $self->bench("disconnecting from database");
226 60 dpavlin
227 94 dpavlin $self->{dbh}->disconnect;
228 undef $self->{dbh};
229 60 dpavlin }
230
231 END {
232 $Count--;
233 print STDERR "index_DBI fatal error: \$index->close() not called... $Count references left!\n" if ($Count > 0);
234 # FIX: debug output
235 # print STDERR "usage\ttable\n";
236 # foreach (keys %Table) {
237 # print STDERR $Table{$_},"\t$_\n";
238 # }
239 }
240
241 1;