1 |
package oreilly_de_catalog::main; |
2 |
|
3 |
=pod |
4 |
|
5 |
Testing Instructions: |
6 |
|
7 |
Search for Kochbuch: Umlaute correct? |
8 |
|
9 |
Search for 389721: Many fallback hits? |
10 |
|
11 |
Search for an existing ISBN with correct dashes, with wrong dashes and |
12 |
without dashes. If the ISBN is correct, you should get only one result. |
13 |
|
14 |
Search for books that have more than one author: ampersand correctly |
15 |
escaped? |
16 |
|
17 |
Search for C#, C++, .net |
18 |
|
19 |
Are the flags there and correct? E.g. "Java Foundation Classes" |
20 |
|
21 |
Is the -M test for the stylesheet disabled? |
22 |
|
23 |
Search for repkover which hits all "colophon"s. |
24 |
|
25 |
Search while the indexer is running. |
26 |
|
27 |
Search for <, &, >, ". Kein Server Error? Kommt schoen "sed & awk"? |
28 |
|
29 |
Search for "über". Viele deutsche Buecher? Erscheint richtig |
30 |
geschriebenes "ueber" im Textfeld? Benutze gleich das "ue" uns suche |
31 |
nach "fuer" (mit echtem Umlaut). |
32 |
|
33 |
Lego suchen wegen des Registered Zeichens. Aeh? |
34 |
|
35 |
index_ora muss einmal mit --debug gelaufen sein und die ALL Dump Datei |
36 |
muss man sich anschauen wg. Umlauten und Ticks. HTML::Entities (in |
37 |
HTML::Parser) muss mit Support for high entities compiliert werden. |
38 |
|
39 |
Search for word with "-". |
40 |
|
41 |
Search for "-" alone. Should give no match. |
42 |
|
43 |
|
44 |
|
45 |
=cut |
46 |
|
47 |
use Apache::Constants (); |
48 |
use Apache::Request; |
49 |
use Apache::Session::Counted; |
50 |
use Fcntl qw(O_RDONLY); |
51 |
use HTTP::Date (); |
52 |
use Apache::Util (); |
53 |
use Apache::File (); |
54 |
use WAIT::Database (); |
55 |
|
56 |
use oreilly_de_catalog::config; |
57 |
use oreilly_de_catalog::wait_filter; |
58 |
|
59 |
use strict; |
60 |
|
61 |
our $DEBUG_s = 0; # Searchterm wrt. UTF-8 |
62 |
our $XSLCACHE = {}; |
63 |
|
64 |
sub handler { |
65 |
my($r) = @_; |
66 |
my $req = __PACKAGE__->new(R => $r); |
67 |
$req->dispatch; |
68 |
} |
69 |
|
70 |
foreach my $fld ( qw(R CGI IF DEBUG DONE) ) { |
71 |
no strict 'refs'; |
72 |
*{"get_$fld"} = sub { |
73 |
my ($self) = shift; |
74 |
$self->{$fld}; |
75 |
}; |
76 |
*{"set_$fld"} = sub { |
77 |
my ($self) = shift; |
78 |
$self->{$fld} = shift; |
79 |
}; |
80 |
} |
81 |
|
82 |
sub session { |
83 |
my oreilly_de_catalog::main $self = shift; |
84 |
return $self->{SESSION} if defined $self->{SESSION}; |
85 |
my $cgi = $self->get_CGI; |
86 |
my $sid = $cgi->param('SESSION'); |
87 |
my %session; |
88 |
require Apache::Session::Counted; |
89 |
my $cntfile = $self->counterfile; |
90 |
my $rundatadir = oreilly_de_catalog::config::RUNDATADIR or die "RUNDATADIR not set"; |
91 |
tie %session, 'Apache::Session::Counted', |
92 |
$sid, { |
93 |
Directory => "$rundatadir/sessions", |
94 |
DirLevels => 1, |
95 |
CounterFile => $cntfile, |
96 |
}; |
97 |
$self->{SESSION} = \%session; |
98 |
} |
99 |
|
100 |
sub sessionid { |
101 |
shift->session->{_session_id}; |
102 |
} |
103 |
|
104 |
sub counterfile { |
105 |
my oreilly_de_catalog::main $self = shift; |
106 |
my $rundatadir = oreilly_de_catalog::config::RUNDATADIR or die "RUNDATADIR not set"; |
107 |
sprintf "%s/cnt/%s", $rundatadir, $self->today; |
108 |
} |
109 |
|
110 |
sub today { |
111 |
my oreilly_de_catalog::main $self = shift; |
112 |
return $self->{TODAY} if defined $self->{TODAY}; |
113 |
my @today = localtime; |
114 |
$today[5]+=1900; |
115 |
$today[4]++; |
116 |
sprintf "%04d-%02d-%02d", @today[5,4,3]; |
117 |
} |
118 |
|
119 |
sub handle_session { |
120 |
my $self = shift; |
121 |
my $oldsearchterm; |
122 |
$oldsearchterm = $self->session->{SearchTerm}; |
123 |
my $cgi = $self->get_CGI; |
124 |
my $r = $self->get_R; |
125 |
my $no = $cgi->param("NO"); |
126 |
$no = "" unless defined $no; |
127 |
my $oldsessionid = $cgi->param("SESSION"); |
128 |
if (length $no) { |
129 |
my $waha = $self->session->{WaitActiveHitsArr}; |
130 |
my $obj = $waha->[$no]; |
131 |
my $due = $cgi->param("DUE") || "index"; |
132 |
my $docid = $obj->{rec}{docid}; |
133 |
my $redir = sprintf( |
134 |
"http://%s%s", |
135 |
$r->server->server_hostname, |
136 |
$self->doclink($docid, $due), |
137 |
); |
138 |
# warn "INFO: redir[$redir]"; |
139 |
$r->header_out("Location",$redir); |
140 |
my $stat = Apache::Constants::REDIRECT(); |
141 |
$r->status($stat); |
142 |
$r->send_http_header; |
143 |
my $fh = $self->statslogfile; |
144 |
$fh->printf("%s %s %s [%s] %d %d %s %s\n", |
145 |
HTTP::Date::time2iso(), # date blank time |
146 |
$oldsessionid, # coming from |
147 |
"s", # eventtype select |
148 |
Apache::Util::escape_uri($oldsearchterm), # what we searched |
149 |
$no, # selected number |
150 |
$#$waha, # highest possible number |
151 |
$docid, # book ID |
152 |
$due, # document within book |
153 |
); |
154 |
close $fh or die "Could not close statslog: $!"; |
155 |
$self->set_DONE($stat); |
156 |
return; |
157 |
} |
158 |
my $fh = $self->statslogfile; |
159 |
$fh->printf("%s %s %s [%s] [%s]\n", |
160 |
HTTP::Date::time2iso(), # date blank time |
161 |
$oldsessionid, # coming from |
162 |
"c", # eventtype change |
163 |
Apache::Util::escape_uri($oldsearchterm), # what we searched |
164 |
Apache::Util::escape_uri($self->searchterm), # new query |
165 |
); |
166 |
close $fh or die "Could not close statslog: $!"; |
167 |
} |
168 |
|
169 |
sub dispatch { |
170 |
my($self) = @_; |
171 |
|
172 |
my $r = $self->get_R; |
173 |
my $cgi = Apache::Request->new($r); |
174 |
$self->set_CGI($cgi); |
175 |
|
176 |
my $uri = $r->uri; |
177 |
my $args = $r->args || ""; |
178 |
my @m; |
179 |
|
180 |
# If they are victim of statistics, do statistics and redirect or |
181 |
# let them pass by as appropriate |
182 |
if ($cgi->param("SESSION")) { |
183 |
$self->handle_session; |
184 |
my $done = $self->get_DONE; |
185 |
return $done if $done; |
186 |
} |
187 |
|
188 |
# Do they want an interface? |
189 |
my($if) = $self->interface; |
190 |
|
191 |
# Does somebody need debugging? |
192 |
my $debug = $cgi->param("DEBUG") || ""; |
193 |
if ($debug) { # debug |
194 |
$self->set_DEBUG($debug); |
195 |
my @param = $cgi->param; |
196 |
warn "uri[$uri]args[$args]param[@param]if[$if]"; |
197 |
} |
198 |
|
199 |
if (rand(1) < oreilly_de_catalog::config::STATSSAMPLE ) { |
200 |
$self->{SetupForStats} = 1; |
201 |
} |
202 |
|
203 |
$self->wait_query; |
204 |
|
205 |
# now $self contains everything we need to construct the page. |
206 |
if ( $self->{SetupForStats} ) { |
207 |
my $session = $self->session; |
208 |
for my $k (qw(SearchTerm WaitActiveHitsArr)) { |
209 |
$session->{$k} = $self->{$k}; |
210 |
} |
211 |
my $fh = $self->statslogfile; |
212 |
$fh->printf("%s %s %s [%s] %d %4.2f\n", |
213 |
HTTP::Date::time2iso(), # date blank time |
214 |
$self->sessionid, # starting session |
215 |
"n", # eventtype new |
216 |
Apache::Util::escape_uri($self->searchterm), # what we searched |
217 |
$self->{NumHits}, # hits we found |
218 |
oreilly_de_catalog::config::STATSSAMPLE, # current representativity |
219 |
); |
220 |
close $fh or die "Could not close statslog: $!"; |
221 |
} |
222 |
# require Devel::Peek;Devel::Peek::Dump($self->{WaitActiveHitsArr}[2]{rec}{title}); |
223 |
|
224 |
my $content; |
225 |
if ($if) { |
226 |
my $xml = $self->construct_xml; |
227 |
# require Devel::Peek;Devel::Peek::Dump($xml); |
228 |
$content = $self->convert_xml($xml); |
229 |
} else { |
230 |
$content = $self->direct_html; # can go away once we are sure XSLT always wins |
231 |
} |
232 |
|
233 |
$r->set_last_modified(int(time/86400)*86400); |
234 |
if ($r->protocol =~ /(\d)\.(\d)/ && $1 >= 1 && $2 >= 1){ |
235 |
$r->header_out('Cache-Control', "max-age=" . 24*60*60); |
236 |
} else { |
237 |
$r->header_out('Expires', |
238 |
HTTP::Date::time2str(time + 24*60*60)); |
239 |
} |
240 |
|
241 |
require Encode; |
242 |
my $enccontent = Encode::encode_utf8($content); |
243 |
$content = $enccontent if $enccontent; |
244 |
$r->set_content_length(length $content); |
245 |
|
246 |
|
247 |
$r->send_http_header; |
248 |
$r->print($content); |
249 |
Apache::Constants::OK(); |
250 |
} |
251 |
|
252 |
sub interface { |
253 |
my $self = shift; |
254 |
return $self->{IF} if defined $self->{IF}; |
255 |
my $cgi = $self->get_CGI; |
256 |
my $if = $cgi->param("IF"); |
257 |
$if ||= oreilly_de_catalog::config::DEFAULTSTYLESHEET; |
258 |
$self->set_IF($if); |
259 |
$if; |
260 |
} |
261 |
|
262 |
sub convert_xml { |
263 |
my $self = shift; |
264 |
my $xml = shift; |
265 |
|
266 |
# convert $xml into $content |
267 |
|
268 |
my $content = ""; |
269 |
|
270 |
require XML::LibXML; |
271 |
require XML::LibXSLT; |
272 |
my $if = $self->interface; |
273 |
my $cgi = $self->get_CGI; |
274 |
my $r = $self->get_R; |
275 |
my $debug = $cgi->param("DEBUG") || ""; |
276 |
|
277 |
my $parser = XML::LibXML->new(); |
278 |
my $doc = $parser->parse_string($xml); |
279 |
my $stylesheet; |
280 |
if (oreilly_de_catalog::config::TESTMTIMESTYLESHEET && |
281 |
exists $XSLCACHE->{$if} && |
282 |
$XSLCACHE->{$if}{M}) { |
283 |
my $file = $XSLCACHE->{$if}{file}; |
284 |
if (-M $file < $XSLCACHE->{$if}{M}) { |
285 |
delete $XSLCACHE->{$if}; |
286 |
} |
287 |
} |
288 |
unless ($XSLCACHE->{$if}{sheet}) { |
289 |
my $stylefile = __FILE__; |
290 |
$stylefile =~ s/main\.pm$/$if.xsl/; |
291 |
$XSLCACHE->{$if}{file} = $stylefile; |
292 |
$XSLCACHE->{$if}{M} = -M $stylefile; |
293 |
open my $fh, "<", $stylefile or die "Could not open $stylefile: $!"; |
294 |
local $/; |
295 |
my $styletxt = <$fh>; |
296 |
close $fh; |
297 |
my $xslt = XML::LibXSLT->new(); |
298 |
my $style_doc = $parser->parse_string($styletxt); |
299 |
$stylesheet = $xslt->parse_stylesheet($style_doc); |
300 |
$XSLCACHE->{$if}{sheet} = $stylesheet; |
301 |
} |
302 |
$stylesheet ||= $XSLCACHE->{$if}{sheet}; |
303 |
my $tdoc = $stylesheet->transform($doc); |
304 |
|
305 |
if ($debug) { |
306 |
if ($debug eq "DEBUG1") { |
307 |
$r->content_type("text/plain; charset=utf-8"); |
308 |
$content = "$xml\n-|-|-|-|-|-|-|-|-\n" . $stylesheet->output_string($tdoc); |
309 |
} else { |
310 |
$r->content_type("text/html; charset=utf-8"); |
311 |
my $args1 = my $args2 = $r->args; |
312 |
my $uri = $r->uri; |
313 |
$args1 =~ s/DEBUG=1/DEBUG=DEBUG1/; |
314 |
$args2 =~ s/DEBUG=1//; |
315 |
$content = qq{<html> |
316 |
<title>O'Reilly Suche -- Debugging in Frames</title> |
317 |
<frameset rows="75%,25%"> |
318 |
<frame src="$uri?$args1"> |
319 |
<frame src="$uri?$args2"> |
320 |
</frameset> |
321 |
</html> |
322 |
|
323 |
}; #'}; |
324 |
} |
325 |
} else { |
326 |
$r->content_type("text/html; charset=utf-8"); |
327 |
$content = $stylesheet->output_string($tdoc); |
328 |
$content =~ s|(\S)/>|$1 />|gs; # Netscape groks <br />, but not <br/> |
329 |
|
330 |
# Netscape 4.73 groks ü but not ü |
331 |
$content =~ s[&#x([\dA-F]+);][sprintf "&#%d;", hex $1]igse; |
332 |
|
333 |
# $content =~ s|</tr><tr>|</tr>\n<tr>|gs; |
334 |
|
335 |
|
336 |
if (0) { |
337 |
|
338 |
# The following line is WRONG, it would insert a newline even |
339 |
# into the content of the search textfield. It is here just as |
340 |
# a reminder. |
341 |
|
342 |
# $content =~ s|(.{80,}?)\s|$1\n|g; # a bit easier to debug |
343 |
} |
344 |
} |
345 |
$content; |
346 |
} |
347 |
|
348 |
sub statslogfile { |
349 |
my($self) = shift; |
350 |
my $file = oreilly_de_catalog::config::STATSLOGFILE; |
351 |
open my $fh, ">>", $file or die "Could not open $file: $!"; |
352 |
return $fh; |
353 |
} |
354 |
|
355 |
sub new { |
356 |
my($me) = shift; |
357 |
bless { @_ }, ref($me) || $me; |
358 |
} |
359 |
|
360 |
sub textfield { |
361 |
my($self, %arg) = @_; |
362 |
my $name = $arg{name} || ""; |
363 |
my $val = $self->{CGI}->param($name); |
364 |
require Encode; |
365 |
$val = Encode::decode_utf8($val); |
366 |
# require Devel::Peek;Devel::Peek::Dump($val); |
367 |
$val = $arg{default} || "" unless defined $val; |
368 |
my $ret = sprintf(qq{<input type="text" name="%s" value="%s"%s%s />}, |
369 |
$self->escapeHTML($name), |
370 |
$self->escapeHTML($val), |
371 |
exists $arg{size} ? qq{ size="$arg{size}"} : "", |
372 |
exists $arg{maxlength} ? qq{ maxlength="$arg{maxlength}"} : "" |
373 |
); |
374 |
# require Devel::Peek;Devel::Peek::Dump($ret); |
375 |
$ret; |
376 |
} |
377 |
|
378 |
sub searchterm { |
379 |
my($self) = @_; |
380 |
|
381 |
return $self->{SearchTerm} if defined $self->{SearchTerm}; |
382 |
|
383 |
my $cgi = $self->get_CGI; |
384 |
my $q = $cgi->param("tfq") || ""; # q from the text field |
385 |
my @q = $cgi->param("rbq"); # q from radiobuttons |
386 |
my $sterm = join " ", $q, @q; |
387 |
|
388 |
$sterm =~ s/^\s+//; |
389 |
|
390 |
if ($sterm =~ /[\200-\377]/) { |
391 |
# XXX if they have both 8bit and entities we cannot just simply |
392 |
# decode_entities because then we might have UTF-8 and Latin-1 |
393 |
# mixed in the string. It would be fair to issue a warning, but |
394 |
# I simply do not feel like it. |
395 |
|
396 |
# OK, let's assume we want to solve this: we split the string into |
397 |
# 7bit and 8bit chunks, apply decode_entities on the 7bit chunks |
398 |
# and something else to the 8bit chunks. Something like that.... |
399 |
} else { |
400 |
require HTML::Entities; |
401 |
HTML::Entities::decode_entities($sterm); |
402 |
} |
403 |
|
404 |
require Unicode::String; |
405 |
if ($sterm =~ /([\200-\377]+)/) { |
406 |
warn sprintf "DEBUG: we found 8bit bytes or chars: [%s]", $1 if $DEBUG_s; |
407 |
my $warn; |
408 |
{ |
409 |
# Put this in a scope of its own so we can warn() ASAP again |
410 |
local $^W=1; |
411 |
local($SIG{__WARN__}) = sub { $warn = $_[0]; }; |
412 |
my $us = Unicode::String::utf8($sterm); #side effect: warning |
413 |
} |
414 |
if ($warn and $warn =~ /utf8/i) { |
415 |
warn "DEBUG: Es war nicht UTF8, also kann's nur latin1 sein" if $DEBUG_s; |
416 |
$sterm = Unicode::String::latin1($sterm)->utf8; |
417 |
} else { |
418 |
warn "DEBUG: no warning, so we believe it's UTF-8. sterm[$sterm]" if $DEBUG_s; |
419 |
} |
420 |
require Encode; |
421 |
my $tmp = Encode::decode_utf8($sterm); |
422 |
if (defined $tmp) { |
423 |
$sterm = $tmp; |
424 |
} else { |
425 |
$sterm = Encode::decode("latin1", $sterm); |
426 |
} |
427 |
} |
428 |
$cgi->param('tfq',$sterm); |
429 |
$self->{SearchTermAsTextField} = $self->textfield( |
430 |
"name" => "tfq", |
431 |
"size" => 30, |
432 |
); |
433 |
|
434 |
# require Devel::Peek;Devel::Peek::Dump($self->{SearchTermAsTextField}); |
435 |
|
436 |
if ($self->{SetupForStats}) { |
437 |
$self->{HiddenfieldForStats} = sprintf( |
438 |
qq{<input type="hidden" |
439 |
name="SESSION" value="%s" />}, |
440 |
$self->sessionid, |
441 |
); |
442 |
} |
443 |
|
444 |
return $self->{SearchTerm} = $sterm; |
445 |
} |
446 |
|
447 |
sub wait_query { |
448 |
my($self) = @_; |
449 |
my $sterm = $self->searchterm or return; |
450 |
my $db = WAIT::Database->open(name => oreilly_de_catalog::config::WAITDBNAME, |
451 |
mode => O_RDONLY, |
452 |
directory => oreilly_de_catalog::config::WAITDIR, |
453 |
) or die; |
454 |
my $tb = $db->table(name => oreilly_de_catalog::config::WAITTBCATALOG) or die; |
455 |
my $overbook = 25; # Term "java xslt" gab nicht den Titel "Java and |
456 |
# XSLT" als ersten Hit als wir nur 4-faches |
457 |
# overbook hatten |
458 |
my %result; |
459 |
my %from; |
460 |
my(%weight) = (title => 4, |
461 |
aboutauthor => 0.4, # "parrot" reveiled must be low |
462 |
abouttranslator => 0.4, |
463 |
colophon => 0.6, |
464 |
inx => 0.05, |
465 |
isbn => 4, # 3-89721-227-7 reveiled that 1 is too small |
466 |
); |
467 |
my @attr = sort { |
468 |
($weight{$a}||1) <=> ($weight{$b}||1) |
469 |
} $tb->fields; # must be sorted so that the truncating only occurs |
470 |
# when the important fields have had their chance |
471 |
ATTR: for my $attr (@attr) { |
472 |
my $weight = $weight{$attr} || 1; |
473 |
# warn "DEBUG: attr[$attr]"; |
474 |
my %r; |
475 |
%r = $tb->search({attr => $attr, |
476 |
top => oreilly_de_catalog::config::WAITMAXHITS * $overbook, |
477 |
cont => $sterm}); |
478 |
if (0) { # debug |
479 |
my $hits = keys %r; |
480 |
warn "attr[$attr] sterm[$sterm] hits[$hits]"; |
481 |
} |
482 |
my $cnt = 0; |
483 |
RESULT: while (my($key, $val) = each %r) { |
484 |
$result{$key} ||= 0; |
485 |
$result{$key} += $val*$weight; |
486 |
$from{$key}{$attr} = undef; |
487 |
|
488 |
last RESULT if $cnt++ > oreilly_de_catalog::config::WAITMAXHITS * $overbook; |
489 |
last if $cnt > oreilly_de_catalog::config::WAITMAXHITS * $overbook/(scalar @attr) && |
490 |
scalar keys %result >= oreilly_de_catalog::config::WAITMAXHITS * $overbook; |
491 |
} |
492 |
} |
493 |
$self->{NumHits} = keys %result; |
494 |
if (%result) { |
495 |
$self->format_result(\%result, \%from, $tb); |
496 |
} else { |
497 |
$tb->close; |
498 |
my $overbook = 10; |
499 |
$tb = $db->table(name => oreilly_de_catalog::config::WAITTBFALLBACK) or die; |
500 |
my($wx,$wy) = oreilly_de_catalog::config::WAITALTWORDS =~ /(\d+)x(\d+)/; |
501 |
my $top = $wx * $wy; |
502 |
my %r = $tb->search( |
503 |
{attr => "headline", |
504 |
top => $top * $overbook, |
505 |
cont => $sterm, |
506 |
} |
507 |
); |
508 |
if (%r) { |
509 |
$self->format_aoa(\%r,$tb); |
510 |
} else { |
511 |
$self->{WaitNothingFound}++; |
512 |
warn "Info: twice no hit on [$sterm]\n"; |
513 |
} |
514 |
unless ($self->{WaitFallbackHitsAOA} || $self->{WaitActiveHitsArr}) { |
515 |
warn "Info: unexpectedly no hit on [$sterm]"; |
516 |
$self->{WaitNothingFound}++; |
517 |
} |
518 |
} |
519 |
$tb->close; |
520 |
} |
521 |
|
522 |
sub format_aoa { |
523 |
my $self = shift; |
524 |
my $result = shift; |
525 |
my %result = %$result; |
526 |
my $tb = shift; |
527 |
|
528 |
my $no = 0; # we number the hits |
529 |
my @didaoa = (); |
530 |
my($wx,$wy) = oreilly_de_catalog::config::WAITALTWORDS =~ /(\d+)x(\d+)/; |
531 |
my $top = $wx * $wy; |
532 |
for my $did (sort { $result{$b} <=> $result{$a} } keys %result) { |
533 |
my %rec = $tb->fetch($did); |
534 |
my $ydim = int($no/$wx); |
535 |
my $xdim = $no % $wx; |
536 |
my $dispvalue = $rec{docid}; |
537 |
my $checkbox = $self->checkbox("name" => "rbq", |
538 |
"value" => $dispvalue, |
539 |
); |
540 |
if ($dispvalue =~ /^\d{9}[\dX]$/i) { |
541 |
next; # we do not offer castrated ISBNs |
542 |
} |
543 |
$didaoa[$ydim][$xdim] = { |
544 |
did => $did, |
545 |
val => $result{$did}, |
546 |
checkbox => $checkbox, |
547 |
dispvalue => $dispvalue, |
548 |
}; |
549 |
last if ++$no >= $top; |
550 |
} |
551 |
$self->{WaitFallbackHitsAOA} = \@didaoa if @didaoa; |
552 |
} |
553 |
|
554 |
sub format_result { |
555 |
my $self = shift; |
556 |
my $result = shift; |
557 |
my $from = shift; |
558 |
my $tb = shift; |
559 |
my %result = %$result; |
560 |
my %from = %$from; |
561 |
my $r = $self->get_R; |
562 |
my $uri = $r->uri; |
563 |
my $no = 0; # we number the hits |
564 |
my @did = (); |
565 |
my $maxval; |
566 |
for my $did (sort { $result{$b} <=> $result{$a} } keys %result) { |
567 |
$maxval ||= $result{$did}; |
568 |
my %rec = $tb->fetch($did); |
569 |
for my $k (keys %rec) { |
570 |
$rec{$k} = $self->escapeHTML(Encode::decode_utf8($rec{$k})); |
571 |
} |
572 |
my $dueto = [ keys %{$from{$did}} ]; |
573 |
use Image::Size (); |
574 |
my $gif = sprintf(qq{%s/covers/%s.s.gif}, |
575 |
oreilly_de_catalog::config::CATALOG, |
576 |
$rec{docid}); |
577 |
my $wh = ""; |
578 |
if (-f $gif) { |
579 |
$wh = Image::Size::html_imgsize($gif); |
580 |
} |
581 |
my(@due,%due); |
582 |
@due{@$dueto} = (); |
583 |
delete $due{title}; |
584 |
delete $due{abstract}; |
585 |
my %duetree = ( |
586 |
author => { author => undef, aboutauthor => undef }, |
587 |
translator => { translator => undef, abouttranslator => undef }, |
588 |
desc => { desc => undef }, |
589 |
colophon => { colophon => undef }, |
590 |
chapter => { chapter => undef }, |
591 |
inx => { inx => undef }, |
592 |
toc => { toc => undef }, |
593 |
); |
594 |
my %duedict = ( |
595 |
author => "Über den Autor", |
596 |
translator => "Über den Übersetzer", |
597 |
desc => "Ausführliche Beschreibung", |
598 |
toc => "Inhaltsverzeichnis", |
599 |
inx => "Index", |
600 |
colophon => "Kolophon", |
601 |
chapter => "Probekapitel", |
602 |
); |
603 |
DUE: for my $due (qw(desc author translator chapter toc inx colophon)) { |
604 |
for my $dues (keys %{$duetree{$due}}) { |
605 |
if (exists $due{$dues}) { |
606 |
if ($self->{SetupForStats}) { |
607 |
push @due, sprintf( |
608 |
qq{<a href="%s?SESSION=%s;DUE=%s;NO=%s">%s</a>}, |
609 |
$uri, |
610 |
$self->sessionid, |
611 |
$due, |
612 |
$no, |
613 |
$duedict{$due}, |
614 |
); |
615 |
} else { |
616 |
push @due, sprintf( |
617 |
qq{<a href="%s">%s</a>}, |
618 |
$self->doclink( |
619 |
$rec{docid}, |
620 |
$due), |
621 |
$duedict{$due}, |
622 |
); |
623 |
} |
624 |
next DUE; |
625 |
} |
626 |
} |
627 |
} |
628 |
my $hrefindex; |
629 |
if ($self->{SetupForStats}) { |
630 |
$hrefindex = sprintf( |
631 |
"%s?SESSION=%s;DUE=index;NO=%s", |
632 |
$uri, |
633 |
$self->sessionid, |
634 |
$no, |
635 |
); |
636 |
} else { |
637 |
$hrefindex = sprintf( |
638 |
qq{%s/%s/index.html}, |
639 |
oreilly_de_catalog::config::CATALOGROOTLOC, |
640 |
$rec{docid}, |
641 |
); |
642 |
} |
643 |
my $coverlink = sprintf(qq{<a href="%s"><img class="cover" src="%s/covers/%s.s.gif" %s alt="cover" /></a>}, |
644 |
$hrefindex, |
645 |
oreilly_de_catalog::config::CATALOGROOTLOC, |
646 |
$rec{docid}, |
647 |
$wh, |
648 |
); |
649 |
my $indexlink = sprintf(qq{<a href="%s">%s</a>}, |
650 |
$hrefindex, |
651 |
$rec{title}, |
652 |
); |
653 |
my $duelinks = join(qq{<br />},@due); |
654 |
# warn "DEBUG: did[$did]"; |
655 |
my $lang = $rec{docid} =~ /ger$/ ? "de" : "en"; |
656 |
my $relval = int($result{$did} * 98.9 / ($maxval || 1))+1; |
657 |
|
658 |
$did[$no] = { |
659 |
did => $did, |
660 |
val => $result{$did}, |
661 |
dueto => $dueto, |
662 |
rec => \%rec, |
663 |
thumbnaillink => $coverlink, |
664 |
indexlink => $indexlink, |
665 |
duelinks => $duelinks, |
666 |
lang => $lang, |
667 |
relval => $relval, |
668 |
}; |
669 |
last if ++$no >= oreilly_de_catalog::config::WAITMAXHITS; |
670 |
} |
671 |
$self->{WaitActiveHitsArr} = \@did; |
672 |
} |
673 |
|
674 |
sub doclink { |
675 |
my($self,$recdocid,$due) = @_; |
676 |
$due = "chapter/index" if $due eq "chapter"; |
677 |
sprintf( |
678 |
qq{%s/%s/%s.html}, |
679 |
oreilly_de_catalog::config::CATALOGROOTLOC, |
680 |
$recdocid, |
681 |
$due, |
682 |
); |
683 |
} |
684 |
|
685 |
# sub escapeHTML { # slow but doesn't lose the UTF8-Flag |
686 |
sub escapeHTML_slow { |
687 |
my($self, $what) = @_; |
688 |
return unless defined $what; |
689 |
# require Devel::Peek;Devel::Peek::Dump($what); |
690 |
my %escapes = qw(& & " " > > < <); #"); |
691 |
$what =~ s/([&\"<>])/$escapes{$1}/g; #}/; |
692 |
# require Devel::Peek;Devel::Peek::Dump($what); |
693 |
$what; |
694 |
} |
695 |
|
696 |
|
697 |
# sub escapeHTML_unreliablewithutf8 { |
698 |
sub escapeHTML { # fast but loses the UTF-8 flag |
699 |
my($self, $what) = @_; |
700 |
return unless defined $what; |
701 |
# require Devel::Peek;Devel::Peek::Dump($what); |
702 |
require Apache::Util; |
703 |
$what = Encode::encode_utf8($what); |
704 |
my $ret = Apache::Util::escape_html($what); |
705 |
$ret = Encode::decode_utf8($ret); |
706 |
# require Devel::Peek;Devel::Peek::Dump($ret); |
707 |
$ret; |
708 |
} |
709 |
|
710 |
sub checkbox { |
711 |
my($self,%arg) = @_; |
712 |
|
713 |
my $name = $arg{name}; |
714 |
my $value; |
715 |
defined($value = $arg{value}) or ($value = "on"); |
716 |
my $checked; |
717 |
my @sel = $self->{CGI}->param($name); |
718 |
if (@sel) { |
719 |
for my $s (@sel) { |
720 |
if ($s eq $value) { |
721 |
$checked = 1; |
722 |
last; |
723 |
} |
724 |
} |
725 |
} else { |
726 |
$checked = $arg{checked}; |
727 |
} |
728 |
sprintf(qq{<input type="checkbox" name="%s" value="%s"%s />}, |
729 |
$self->escapeHTML($name), |
730 |
$self->escapeHTML($value), |
731 |
$checked ? qq{ checked="checked"} : "" |
732 |
); |
733 |
} |
734 |
|
735 |
sub construct_xml { |
736 |
my($self) = @_; |
737 |
my(@m); |
738 |
|
739 |
# if they want an interface, we need to convert our relevant data |
740 |
# to XML and feed it to some XSLT transformer. |
741 |
push @m, qq{<?xml version="1.0" encoding="UTF-8"?> |
742 |
<!DOCTYPE stylesheet [ |
743 |
<!ENTITY Auml "Ä"> |
744 |
<!ENTITY Ouml "Ö"> |
745 |
<!ENTITY Uuml "Ü"> |
746 |
<!ENTITY szlig "ß"> |
747 |
<!ENTITY auml "ä"> |
748 |
<!ENTITY ouml "ö"> |
749 |
<!ENTITY uuml "ü"> |
750 |
]> |
751 |
|
752 |
<root> |
753 |
}; |
754 |
for my $sfield (qw( |
755 |
|
756 |
SearchTermAsTextField HiddenfieldForStats WaitNothingFound |
757 |
|
758 |
)) { |
759 |
next unless exists $self->{$sfield} && defined $self->{$sfield}; |
760 |
push @m, sprintf(qq{<%s>%s</%s>\n}, |
761 |
$sfield, |
762 |
$self->{$sfield}, |
763 |
$sfield); |
764 |
} |
765 |
# WaitFallbackHitsAOA, WaitActiveHitsArr |
766 |
if (exists $self->{WaitFallbackHitsAOA} and defined $self->{WaitFallbackHitsAOA}) { |
767 |
push @m, qq{<WaitFallbackHitsAOA>\n}; |
768 |
for my $line (@{$self->{WaitFallbackHitsAOA}}) { |
769 |
push @m, qq{ <WaitFallbackHitsAOALine>}; |
770 |
for my $col (@$line) { |
771 |
push @m, qq{ <WaitFallbackHitsAOACol>}; |
772 |
for my $k (sort keys %$col) { |
773 |
# we have no escaping problem because the dictionary doesn't contain <>& |
774 |
push @m, qq{ <item key="$k">$col->{$k}</item>}; |
775 |
} |
776 |
push @m, qq{ </WaitFallbackHitsAOACol>\n}; |
777 |
} |
778 |
push @m, qq{ </WaitFallbackHitsAOALine>\n}; |
779 |
} |
780 |
push @m, qq{</WaitFallbackHitsAOA>\n}; |
781 |
} |
782 |
if (exists $self->{WaitActiveHitsArr} && defined $self->{WaitActiveHitsArr}) { |
783 |
push @m, qq{<WaitActiveHitsArr>}; |
784 |
for my $item (@{$self->{WaitActiveHitsArr}}) { |
785 |
push @m, qq{ <WaitActiveHitsArrItem>\n}; |
786 |
for my $k (sort keys %$item) { |
787 |
my $v = $item->{$k}; |
788 |
if (ref $v eq "ARRAY") { |
789 |
push @m, qq{ <item key="$k">skipped array</item>\n}; |
790 |
} elsif (ref $v eq "HASH") { |
791 |
push @m, qq{ <item key="$k">\n}; |
792 |
for my $k2 (sort keys %$v) { |
793 |
# No escape problems: all field names clean, all field |
794 |
# values (so far) clean |
795 |
push @m, qq{ <rec field="$k2">$v->{$k2}</rec>\n}; |
796 |
} |
797 |
push @m, qq{ </item>\n}; |
798 |
} else { |
799 |
# No escape problem |
800 |
push @m, qq{ <item key="$k">$item->{$k}</item>\n}; |
801 |
} |
802 |
} |
803 |
push @m, qq{ </WaitActiveHitsArrItem>\n}; |
804 |
} |
805 |
push @m, qq{</WaitActiveHitsArr>}; |
806 |
} |
807 |
push @m, qq{ |
808 |
</root> |
809 |
}; |
810 |
join "", @m; |
811 |
} |
812 |
|
813 |
sub direct_html { |
814 |
my($self) = @_; |
815 |
my(@m); |
816 |
|
817 |
# a bare-bone-page without XSLT. It was a pre-release study and I |
818 |
# leave it here in case XSLT breaks tomorrow |
819 |
|
820 |
push @m, qq{<html><head><title>O\'Reilly Verlag -- Suche</title></head><body>}; |
821 |
# startform |
822 |
push @m, qq{<form method="get"><p>}; |
823 |
# textfield |
824 |
push @m, $self->{SearchTermAsTextField}; |
825 |
# maybe radiobuttons |
826 |
if ($self->{WaitFallbackHitsAOA}) { |
827 |
|
828 |
push @m, qq{<p>Zu Ihrem Suchbegriff wurde kein Dokument |
829 |
gefunden. Befindet sich unter den folgenden Wörtern ein |
830 |
passender Suchbegriff? Wenn ja, bitte klicken Sie einen oder |
831 |
mehrere Suchbegriffe an und anschließend auf "Erneut |
832 |
Suchen".</p>}; |
833 |
|
834 |
push @m, qq{<table cellpadding="2" cellspacing="6">}; |
835 |
|
836 |
ALTTABLE: for my $yi (0..$#{$self->{WaitFallbackHitsAOA}}) { |
837 |
push @m, qq{<tr>}; |
838 |
for my $xi (0..$#{$self->{WaitFallbackHitsAOA}[$yi]}) { |
839 |
push @m, qq{<td>}; |
840 |
push @m, $self->{WaitFallbackHitsAOA}[$yi][$xi]{checkbox}; |
841 |
push @m, " "; |
842 |
push @m, $self->{WaitFallbackHitsAOA}[$yi][$xi]{dispvalue}; |
843 |
push @m, qq{</td>}; |
844 |
} |
845 |
push @m, qq{</tr>}; |
846 |
} |
847 |
push @m, qq{</table>}; |
848 |
push @m, qq{<input type="submit" value="Erneut Suchen" />}; |
849 |
} |
850 |
# endform |
851 |
push @m, qq{</p></form>}; |
852 |
if ($self->{WaitNothingFound}) { |
853 |
|
854 |
push @m, "<p>Zu Ihrem Suchbegriff konnten wir leider gar nichts |
855 |
finden. Bitte versuchen Sie einen längeren oder mehrere |
856 |
Suchbegriffe.</p>"; |
857 |
|
858 |
} |
859 |
# maybe answer |
860 |
if ($self->{WaitActiveHitsArr}) { |
861 |
push @m, qq{<table cellpadding="2" cellspacing="3">}; |
862 |
for my $i (0..$#{$self->{WaitActiveHitsArr}}) { |
863 |
push @m, sprintf(qq{<tr> |
864 |
<td align="center" valign="center">%s</td><td valign="top"><font size="5"> |
865 |
<b>%s</b></font><br />%s<br />ISBN %s<br /><br /><br />%s</td></tr>\n}, |
866 |
$self->{WaitActiveHitsArr}[$i]{thumbnaillink}, |
867 |
$self->{WaitActiveHitsArr}[$i]{indexlink}, |
868 |
$self->{WaitActiveHitsArr}[$i]{rec}{author}, |
869 |
$self->{WaitActiveHitsArr}[$i]{rec}{isbn}, |
870 |
$self->{WaitActiveHitsArr}[$i]{duelinks}, |
871 |
); |
872 |
} |
873 |
push @m, qq{</table>}; |
874 |
} |
875 |
|
876 |
push @m, qq{</body></html>}; |
877 |
my $r = $self->get_R; |
878 |
$r->content_type("text/html; charset=utf-8"); |
879 |
join "", @m; |
880 |
} |
881 |
|
882 |
1; |