1 |
#!/usr/bin/perl |
2 |
|
3 |
#-------------------------------------------------------------------------- |
4 |
# |
5 |
# Author: Dobrica Pavlinusic |
6 |
# Creation Date: 2002-03-17 |
7 |
|
8 |
|
9 |
#-------------------------------------------------------------------------- |
10 |
# |
11 |
# Description: |
12 |
# |
13 |
# This package implements the various filters for this database |
14 |
# |
15 |
|
16 |
#-------------------------------------------------------------------------- |
17 |
# |
18 |
# Pragmatic modules |
19 |
# |
20 |
|
21 |
use strict; |
22 |
|
23 |
|
24 |
#-------------------------------------------------------------------------- |
25 |
# |
26 |
# Package definition |
27 |
# |
28 |
|
29 |
package default; |
30 |
|
31 |
|
32 |
#-------------------------------------------------------------------------- |
33 |
# |
34 |
# Application Constants |
35 |
# |
36 |
|
37 |
# Field Names |
38 |
%default::FieldNames = ( |
39 |
'700+', 'Autor(i)', |
40 |
'200', 'Naslov', |
41 |
'205', 'Izdanje', |
42 |
'210', 'Izdavanje', |
43 |
'215', 'Materijalni opis', |
44 |
'225', 'Nakladnièka cjelina', |
45 |
'300+', 'Napomena', |
46 |
'330', 'Sadr¾aj', |
47 |
'423', 'Prikriveni radovi', |
48 |
'464', 'Analitièki radovi', |
49 |
'610', 'Kljuène rijeèi', |
50 |
'675+', 'Klasifik. oznaka', |
51 |
'990', 'Signatura', |
52 |
'991', 'Inventarni broj', |
53 |
'ISBN', 'ISBN', |
54 |
'MFN', 'MFN', |
55 |
'994a', 'Knji¾nica', |
56 |
|
57 |
|
58 |
'A', 'Author(s)', |
59 |
'B', 'Book', |
60 |
'C', 'City', |
61 |
'D', 'Date', |
62 |
'E', 'Editor', |
63 |
'F', 'Footnote', |
64 |
'G', 'Order Info', |
65 |
'H', 'Commentary', |
66 |
'I', 'Publisher', |
67 |
'J', 'Journal', |
68 |
'K', 'Keyword', |
69 |
'L', 'Label', |
70 |
'M', 'Bell', |
71 |
'N', 'Issue', |
72 |
'O', 'Other', |
73 |
'P', 'Page', |
74 |
'Q', 'Corporate', |
75 |
'R', 'Report', |
76 |
'S', 'Series', |
77 |
'T', 'Title', |
78 |
'U', 'Annotation', |
79 |
'V', 'Volume', |
80 |
'W', 'URL', |
81 |
'X', 'Abstract', |
82 |
'Y', 'Table of Contents', |
83 |
'Z', 'Reference', |
84 |
'$', 'Price', |
85 |
'*', 'Copyright', |
86 |
'^', 'Parts', |
87 |
); |
88 |
|
89 |
|
90 |
|
91 |
# Field Display Order |
92 |
@default::FieldDisplayOrder = ( |
93 |
'ISBN', |
94 |
'700+', |
95 |
'200', |
96 |
'205', |
97 |
'210', |
98 |
'215', |
99 |
'225', |
100 |
'300+', |
101 |
'330', |
102 |
'423', |
103 |
'464', |
104 |
'610', |
105 |
'675+', |
106 |
'990', |
107 |
'991', |
108 |
'MFN', |
109 |
'994a', |
110 |
|
111 |
'M', |
112 |
'L', |
113 |
'T', |
114 |
'S', |
115 |
'E', |
116 |
'Q', |
117 |
'A', |
118 |
'J', |
119 |
'B', |
120 |
'D', |
121 |
'V', |
122 |
'N', |
123 |
'P', |
124 |
'C', |
125 |
'I', |
126 |
'G', |
127 |
'$', |
128 |
'K', |
129 |
'*', |
130 |
'W', |
131 |
'O', |
132 |
'Y', |
133 |
'X', |
134 |
'R', |
135 |
'.', |
136 |
'F', |
137 |
'H', |
138 |
'U', |
139 |
'Z', |
140 |
'^' |
141 |
); |
142 |
|
143 |
# separators |
144 |
%default::Separator = ( |
145 |
'700+', ', ', |
146 |
'990', ' ', |
147 |
'991', ', ', |
148 |
'300+', '.- ', |
149 |
'225', ', ', |
150 |
'610', ' * ', |
151 |
|
152 |
|
153 |
); |
154 |
|
155 |
#-------------------------------------------------------------------------- |
156 |
# |
157 |
# Function: DocumentParser() |
158 |
# |
159 |
# Purpose: This function serves as a document parser |
160 |
# |
161 |
# Called by: DocumentFilter(), SummaryFilter() |
162 |
# |
163 |
# Parameters: $Database Database name |
164 |
# $DocumentID Document ID |
165 |
# $ItemName Item name |
166 |
# $MimeType Mime type |
167 |
# $DocumentRaw Raw document text |
168 |
# |
169 |
# Global Variables: |
170 |
# |
171 |
# Returns: A hash table of the document fields |
172 |
# |
173 |
sub DocumentParser { |
174 |
|
175 |
my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_; |
176 |
|
177 |
my (%Document, @DocumentLines, $DocumentLine); |
178 |
my ($FieldTag, $FieldData, $Separator); |
179 |
|
180 |
@DocumentLines = split(/\n/, $DocumentRaw); |
181 |
|
182 |
# Extract the document |
183 |
foreach $DocumentLine ( @DocumentLines ) { |
184 |
# check if this is a new field? |
185 |
if ( $DocumentLine =~ /%(\S+)\s*(.*)/ ) { |
186 |
$FieldTag = $1; |
187 |
$FieldData = $2; |
188 |
if ($default::Separator{$FieldTag}) { |
189 |
$Separator = $default::Separator{$FieldTag}; |
190 |
} else { |
191 |
$Separator = ";"; |
192 |
} |
193 |
} |
194 |
else { |
195 |
$FieldData = $DocumentLine; |
196 |
$Separator = "\n"; |
197 |
} |
198 |
|
199 |
if ( defined($Document{$FieldTag}) ) { |
200 |
$Document{$FieldTag} .= $Separator . $FieldData; |
201 |
} |
202 |
else { |
203 |
$Document{$FieldTag} = $FieldData; |
204 |
} |
205 |
} |
206 |
|
207 |
return (%Document); |
208 |
|
209 |
} |
210 |
|
211 |
|
212 |
#-------------------------------------------------------------------------- |
213 |
# |
214 |
# Function: DocumentFilter() |
215 |
# |
216 |
# Purpose: This function is the document filter |
217 |
# |
218 |
# Called by: external |
219 |
# |
220 |
# Parameters: $Database Database name |
221 |
# $DocumentID Document ID |
222 |
# $ItemName Item name |
223 |
# $MimeType Mime type |
224 |
# $DocumentRaw Raw document text |
225 |
# |
226 |
# Global Variables: %default::FieldNames, @default::FieldDisplayOrder |
227 |
# |
228 |
# Returns: The filtered document |
229 |
# |
230 |
sub DocumentFilter { |
231 |
|
232 |
my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_; |
233 |
|
234 |
my (%Document, $DocumentFinal, $FieldTag, $CommaFlag); |
235 |
my ($SearchTerm, @Authors, $Author, @AuthorNames, $AuthorName, @Keywords, $Keyword, @OrderNumbers, $OrderNumber); |
236 |
|
237 |
if ( !defined($DocumentRaw) ) { |
238 |
return (undef); |
239 |
} |
240 |
|
241 |
|
242 |
%Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw); |
243 |
|
244 |
$DocumentFinal = "<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=2 WIDTH=100%> \n"; |
245 |
# $DocumentFinal .= "<TR><TD COLSPAN=2 VALIGN=TOP ALIGN=CENTER><H3> $Document{'T'} </H3></TD></TR> \n"; |
246 |
|
247 |
# Loop over each line in the document list |
248 |
foreach $FieldTag ( @default::FieldDisplayOrder ) { |
249 |
|
250 |
# Skip empty slots |
251 |
if ( !(defined($Document{$FieldTag}) && ($Document{$FieldTag} ne "")) ) { |
252 |
next; |
253 |
} |
254 |
|
255 |
|
256 |
# Print the row start |
257 |
$DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=RIGHT BGCOLOR=\"#EEEEEE\" WIDTH=\"130\"> $default::FieldNames{$FieldTag}: </TD><TD VALIGN=TOP ALIGN=LEFT> "; |
258 |
|
259 |
$Document{$FieldTag} =~ s/&/∧/g; |
260 |
$Document{$FieldTag} =~ s/</</g; |
261 |
$Document{$FieldTag} =~ s/>/>/g; |
262 |
|
263 |
# Print the field data, marking up if needed |
264 |
if ( $FieldTag eq "A" || $FieldTag eq "E" ) { |
265 |
$CommaFlag = 0; |
266 |
@Authors = split(/;/, $Document{$FieldTag}); |
267 |
foreach $Author ( @Authors ) { |
268 |
if ( $CommaFlag ) { |
269 |
$DocumentFinal .= ", "; |
270 |
} |
271 |
$CommaFlag = 1; |
272 |
$SearchTerm = $Author; |
273 |
$SearchTerm =~ s/,//g; |
274 |
@AuthorNames = split(/ /, $SearchTerm); |
275 |
$SearchTerm = ""; |
276 |
foreach $AuthorName ( @AuthorNames ) { |
277 |
if ( ! ($AuthorName =~ /^\w+\.$/) ) { |
278 |
if ( $AuthorName ne "" ) { |
279 |
$AuthorName .= " "; |
280 |
} |
281 |
$SearchTerm .= $AuthorName; |
282 |
} |
283 |
} |
284 |
$SearchTerm =~ s/ /+/g; |
285 |
$DocumentFinal .= "<A HREF=\"$ENV{'SCRIPT_NAME'}/GetSearchResults?Database=$Database&FieldName1=author&FieldContent1=%22$SearchTerm%22\"> $Author </A>"; |
286 |
} |
287 |
} |
288 |
elsif ( $FieldTag eq "K" ) { |
289 |
$CommaFlag = 0; |
290 |
@Keywords = split(/,/, $Document{$FieldTag}); |
291 |
foreach $Keyword ( @Keywords ) { |
292 |
if ( $CommaFlag ) { |
293 |
$DocumentFinal .= ", "; |
294 |
} |
295 |
$CommaFlag = 1; |
296 |
$SearchTerm = $Keyword; |
297 |
$SearchTerm =~ s/ /+/g; |
298 |
$DocumentFinal .= "<A HREF=\"$ENV{'SCRIPT_NAME'}/GetSearchResults?Database=$Database&FieldName1=keyword&FieldContent1=%22$SearchTerm%22\"> $Keyword </A>"; |
299 |
} |
300 |
} |
301 |
elsif ( $FieldTag eq "W" ) { |
302 |
$DocumentFinal .= "<A HREF=\"$Document{$FieldTag}\" TARGET=\"BlankWindow\"> $Document{$FieldTag} </A>"; |
303 |
} |
304 |
elsif ( $FieldTag eq "Y" ) { |
305 |
$DocumentFinal .= "<PRE>$Document{$FieldTag}</PRE>"; |
306 |
} |
307 |
else { |
308 |
$DocumentFinal .= "$Document{$FieldTag}"; |
309 |
} |
310 |
|
311 |
$DocumentFinal .= "</TD></TR> \n"; |
312 |
|
313 |
} |
314 |
|
315 |
|
316 |
# List any external links we may have |
317 |
if ( defined($Document{'G'}) ) { |
318 |
|
319 |
@OrderNumbers = split(/; /, $Document{'G'}); |
320 |
|
321 |
$DocumentFinal .= "<TR><TD COLSPAN=2> <HR WIDTH=50%> </TD></TR>"; |
322 |
$DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=LEFT> <B> External Links: </B> </TD><TD VALIGN=TOP ALIGN=LEFT><UL>"; |
323 |
|
324 |
foreach $OrderNumber ( @OrderNumbers ) { |
325 |
|
326 |
if ( $OrderNumber =~ /ISBN\s*(\S*)/ ) { |
327 |
$OrderNumber = $1; |
328 |
$OrderNumber =~ s/-//g; |
329 |
$DocumentFinal .= "<LI><A HREF=\"http://www.amazon.com/exec/obidos/ASIN/$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Amazon.com </A></LI>"; |
330 |
$DocumentFinal .= "<LI><A HREF=\"http://search.borders.com/fcgi-bin/db2www/search/search.d2w/Details?mediaType=Book&searchType=ISBNUPC&code=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Borders </A></LI>"; |
331 |
$DocumentFinal .= "<LI><A HREF=\"http://shop.barnesandnoble.com/bookSearch/isbnInquiry.asp?isbn=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Barnes & Noble </A></LI>"; |
332 |
} |
333 |
} |
334 |
|
335 |
$DocumentFinal .= "</UL></TD></TR>"; |
336 |
} |
337 |
|
338 |
$DocumentFinal .= "</TABLE> \n"; |
339 |
|
340 |
return ($DocumentFinal); |
341 |
|
342 |
} |
343 |
|
344 |
|
345 |
|
346 |
#-------------------------------------------------------------------------- |
347 |
# |
348 |
# Function: SummaryFilter() |
349 |
# |
350 |
# Purpose: This function is the summary filter |
351 |
# |
352 |
# Called by: external |
353 |
# |
354 |
# Parameters: $Database Database name |
355 |
# $DocumentID Document ID |
356 |
# $ItemName Item name |
357 |
# $MimeType Mime type |
358 |
# $DocumentRaw Raw document text |
359 |
# |
360 |
# Global Variables: none |
361 |
# |
362 |
# Returns: The filtered summary |
363 |
# |
364 |
sub SummaryFilter { |
365 |
|
366 |
my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_; |
367 |
|
368 |
my (%Document, $Summary); |
369 |
|
370 |
|
371 |
if ( !defined($DocumentRaw) ) { |
372 |
return (undef); |
373 |
} |
374 |
|
375 |
|
376 |
# Parse the document |
377 |
%Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw); |
378 |
|
379 |
|
380 |
# Select the abstract if available |
381 |
if ( defined($Document{'X'}) ) { |
382 |
$Summary = $Document{'X'}; |
383 |
} |
384 |
|
385 |
# Select the table of contents if available |
386 |
elsif ( defined($Document{'Y'}) ) { |
387 |
$Summary = $Document{'Y'}; |
388 |
} |
389 |
|
390 |
# Select the keywords if available |
391 |
elsif ( defined($Document{'K'}) ) { |
392 |
$Summary = $Document{'K'}; |
393 |
} |
394 |
|
395 |
|
396 |
# Clean the summary text |
397 |
if ( defined($Summary) ) { |
398 |
# First clean up the text |
399 |
if ( index($Summary, "\r\n") >= 0 ) { |
400 |
$Summary =~ s/\r//gs; |
401 |
} |
402 |
elsif ( index($Summary, "\r") >= 0 ) { |
403 |
$Summary =~ s/\r/\n/gs; |
404 |
} |
405 |
$Summary =~ s/\n/ /gs; |
406 |
$Summary =~ s/\s+/ /gs; |
407 |
$Summary = ucfirst($Summary); |
408 |
} |
409 |
|
410 |
return ($Summary); |
411 |
|
412 |
} |
413 |
|
414 |
|
415 |
|
416 |
#-------------------------------------------------------------------------- |
417 |
# |
418 |
# Function: RelevanceFeedbackFilter() |
419 |
# |
420 |
# Purpose: This function is the relevance feedback filter |
421 |
# |
422 |
# Called by: external |
423 |
# |
424 |
# Parameters: $Database Database name |
425 |
# $DocumentID Document ID |
426 |
# $ItemName Item name |
427 |
# $MimeType Mime type |
428 |
# $DocumentRaw Raw document text |
429 |
# |
430 |
# Global Variables: none |
431 |
# |
432 |
# Returns: The filtered relevance feedback |
433 |
# |
434 |
sub RelevanceFeedbackFilter { |
435 |
|
436 |
my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_; |
437 |
|
438 |
my (%Document, $RelevanceFeedback); |
439 |
|
440 |
|
441 |
if ( !defined($DocumentRaw) ) { |
442 |
return (undef); |
443 |
} |
444 |
|
445 |
|
446 |
# Parse the document |
447 |
%Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw); |
448 |
|
449 |
|
450 |
# Select the abstract if available |
451 |
if ( defined($Document{'X'}) ) { |
452 |
$RelevanceFeedback = $Document{'X'}; |
453 |
} |
454 |
|
455 |
# Select the table of contents if available |
456 |
elsif ( defined($Document{'Y'}) ) { |
457 |
$RelevanceFeedback = $Document{'Y'}; |
458 |
} |
459 |
|
460 |
# Select the keywords if available |
461 |
elsif ( defined($Document{'K'}) ) { |
462 |
$RelevanceFeedback = $Document{'K'}; |
463 |
} |
464 |
|
465 |
# Select the title if available |
466 |
elsif ( defined($Document{'T'}) ) { |
467 |
$RelevanceFeedback = $Document{'T'}; |
468 |
} |
469 |
|
470 |
# Select the author if available |
471 |
elsif ( defined($Document{'A'}) ) { |
472 |
$RelevanceFeedback = $Document{'A'}; |
473 |
} |
474 |
|
475 |
return ($RelevanceFeedback); |
476 |
|
477 |
} |
478 |
|
479 |
|
480 |
|
481 |
#-------------------------------------------------------------------------- |
482 |
|
483 |
1; |