1 |
dpavlin |
1.1 |
#!/usr/bin/perl |
2 |
|
|
|
3 |
|
|
#-------------------------------------------------------------------------- |
4 |
|
|
# |
5 |
|
|
# Author: Dobrica Pavlinusic |
6 |
|
|
# Creation Date: 2002-03-17 |
7 |
|
|
|
8 |
|
|
|
9 |
|
|
#-------------------------------------------------------------------------- |
10 |
|
|
# |
11 |
|
|
# Description: |
12 |
|
|
# |
13 |
|
|
# This package implements the various filters for this database |
14 |
|
|
# |
15 |
|
|
|
16 |
|
|
#-------------------------------------------------------------------------- |
17 |
|
|
# |
18 |
|
|
# Pragmatic modules |
19 |
|
|
# |
20 |
|
|
|
21 |
|
|
use strict; |
22 |
|
|
|
23 |
|
|
|
24 |
|
|
#-------------------------------------------------------------------------- |
25 |
|
|
# |
26 |
|
|
# Package definition |
27 |
|
|
# |
28 |
|
|
|
29 |
|
|
package isis; |
30 |
|
|
|
31 |
|
|
|
32 |
|
|
#-------------------------------------------------------------------------- |
33 |
|
|
# |
34 |
|
|
# Application Constants |
35 |
|
|
# |
36 |
|
|
|
37 |
|
|
# Field Names |
38 |
|
|
%isis::FieldNames = ( |
39 |
|
|
'A', 'Author(s)', |
40 |
|
|
'B', 'Book', |
41 |
|
|
'C', 'City', |
42 |
|
|
'D', 'Date', |
43 |
|
|
'E', 'Editor', |
44 |
|
|
'F', 'Footnote', |
45 |
|
|
'G', 'Order Info', |
46 |
|
|
'H', 'Commentary', |
47 |
|
|
'I', 'Publisher', |
48 |
|
|
'J', 'Journal', |
49 |
|
|
'K', 'Keyword', |
50 |
|
|
'L', 'Label', |
51 |
|
|
'M', 'Bell', |
52 |
|
|
'N', 'Issue', |
53 |
|
|
'O', 'Other', |
54 |
|
|
'P', 'Page', |
55 |
|
|
'Q', 'Corporate', |
56 |
|
|
'R', 'Report', |
57 |
|
|
'S', 'Series', |
58 |
|
|
'T', 'Title', |
59 |
|
|
'U', 'Annotation', |
60 |
|
|
'V', 'Volume', |
61 |
|
|
'W', 'URL', |
62 |
|
|
'X', 'Abstract', |
63 |
|
|
'Y', 'Table of Contents', |
64 |
|
|
'Z', 'Reference', |
65 |
|
|
'$', 'Price', |
66 |
|
|
'*', 'Copyright', |
67 |
|
|
'^', 'Parts', |
68 |
|
|
); |
69 |
|
|
|
70 |
|
|
|
71 |
|
|
|
72 |
|
|
# Field Display Order |
73 |
|
|
@isis::FieldDisplayOrder = ( |
74 |
|
|
'M', |
75 |
|
|
'L', |
76 |
|
|
'T', |
77 |
|
|
'S', |
78 |
|
|
'E', |
79 |
|
|
'Q', |
80 |
|
|
'A', |
81 |
|
|
'J', |
82 |
|
|
'B', |
83 |
|
|
'D', |
84 |
|
|
'V', |
85 |
|
|
'N', |
86 |
|
|
'P', |
87 |
|
|
'C', |
88 |
|
|
'I', |
89 |
|
|
'G', |
90 |
|
|
'$', |
91 |
|
|
'K', |
92 |
|
|
'*', |
93 |
|
|
'W', |
94 |
|
|
'O', |
95 |
|
|
'Y', |
96 |
|
|
'X', |
97 |
|
|
'R', |
98 |
|
|
'.', |
99 |
|
|
'F', |
100 |
|
|
'H', |
101 |
|
|
'U', |
102 |
|
|
'Z', |
103 |
|
|
'^' |
104 |
|
|
); |
105 |
|
|
|
106 |
|
|
|
107 |
|
|
#-------------------------------------------------------------------------- |
108 |
|
|
# |
109 |
|
|
# Function: DocumentParser() |
110 |
|
|
# |
111 |
|
|
# Purpose: This function serves as a document parser |
112 |
|
|
# |
113 |
|
|
# Called by: DocumentFilter(), SummaryFilter() |
114 |
|
|
# |
115 |
|
|
# Parameters: $Database Database name |
116 |
|
|
# $DocumentID Document ID |
117 |
|
|
# $ItemName Item name |
118 |
|
|
# $MimeType Mime type |
119 |
|
|
# $DocumentRaw Raw document text |
120 |
|
|
# |
121 |
|
|
# Global Variables: |
122 |
|
|
# |
123 |
|
|
# Returns: A hash table of the document fields |
124 |
|
|
# |
125 |
|
|
sub DocumentParser { |
126 |
|
|
|
127 |
|
|
my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_; |
128 |
|
|
|
129 |
|
|
my (%Document, @DocumentLines, $DocumentLine); |
130 |
|
|
my ($FieldTag, $FieldData, $Separator); |
131 |
|
|
|
132 |
|
|
@DocumentLines = split(/\n/, $DocumentRaw); |
133 |
|
|
|
134 |
|
|
# Extract the document |
135 |
|
|
foreach $DocumentLine ( @DocumentLines ) { |
136 |
|
|
|
137 |
|
|
# check if this is a new field? |
138 |
|
|
if ( $DocumentLine =~ /%(.)\s*(.*)/ ) { |
139 |
|
|
$FieldTag = $1; |
140 |
|
|
$FieldData = $2; |
141 |
|
|
$Separator = ";"; |
142 |
|
|
} |
143 |
|
|
else { |
144 |
|
|
$FieldData = $DocumentLine; |
145 |
|
|
$Separator = "\n"; |
146 |
|
|
} |
147 |
|
|
|
148 |
|
|
if ( defined($Document{$FieldTag}) ) { |
149 |
|
|
$Document{$FieldTag} .= $Separator . $FieldData; |
150 |
|
|
} |
151 |
|
|
else { |
152 |
|
|
$Document{$FieldTag} = $FieldData; |
153 |
|
|
} |
154 |
|
|
} |
155 |
|
|
|
156 |
|
|
return (%Document); |
157 |
|
|
|
158 |
|
|
} |
159 |
|
|
|
160 |
|
|
|
161 |
|
|
#-------------------------------------------------------------------------- |
162 |
|
|
# |
163 |
|
|
# Function: DocumentFilter() |
164 |
|
|
# |
165 |
|
|
# Purpose: This function is the document filter |
166 |
|
|
# |
167 |
|
|
# Called by: external |
168 |
|
|
# |
169 |
|
|
# Parameters: $Database Database name |
170 |
|
|
# $DocumentID Document ID |
171 |
|
|
# $ItemName Item name |
172 |
|
|
# $MimeType Mime type |
173 |
|
|
# $DocumentRaw Raw document text |
174 |
|
|
# |
175 |
|
|
# Global Variables: %isis::FieldNames, @isis::FieldDisplayOrder |
176 |
|
|
# |
177 |
|
|
# Returns: The filtered document |
178 |
|
|
# |
179 |
|
|
sub DocumentFilter { |
180 |
|
|
|
181 |
|
|
my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_; |
182 |
|
|
|
183 |
|
|
my (%Document, $DocumentFinal, $FieldTag, $CommaFlag); |
184 |
|
|
my ($SearchTerm, @Authors, $Author, @AuthorNames, $AuthorName, @Keywords, $Keyword, @OrderNumbers, $OrderNumber); |
185 |
|
|
|
186 |
|
|
if ( !defined($DocumentRaw) ) { |
187 |
|
|
return (undef); |
188 |
|
|
} |
189 |
|
|
|
190 |
|
|
|
191 |
|
|
%Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw); |
192 |
|
|
|
193 |
|
|
$DocumentFinal = "<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=2 WIDTH=100%> \n"; |
194 |
|
|
# $DocumentFinal .= "<TR><TD COLSPAN=2 VALIGN=TOP ALIGN=CENTER><H3> $Document{'T'} </H3></TD></TR> \n"; |
195 |
|
|
|
196 |
|
|
|
197 |
|
|
# Loop over each line in the document list |
198 |
|
|
foreach $FieldTag ( @isis::FieldDisplayOrder ) { |
199 |
|
|
|
200 |
|
|
# Skip empty slots |
201 |
|
|
if ( !(defined($Document{$FieldTag}) && ($Document{$FieldTag} ne "")) ) { |
202 |
|
|
next; |
203 |
|
|
} |
204 |
|
|
|
205 |
|
|
|
206 |
|
|
# Print the row start |
207 |
|
|
$DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=RIGHT BGCOLOR=\"#EEEEEE\" WIDTH=1%> $isis::FieldNames{$FieldTag}: </TD><TD VALIGN=TOP ALIGN=LEFT> "; |
208 |
|
|
|
209 |
|
|
|
210 |
|
|
# Print the field data, marking up if needed |
211 |
|
|
if ( $FieldTag eq "A" || $FieldTag eq "E" ) { |
212 |
|
|
$CommaFlag = 0; |
213 |
|
|
@Authors = split(/;/, $Document{$FieldTag}); |
214 |
|
|
foreach $Author ( @Authors ) { |
215 |
|
|
if ( $CommaFlag ) { |
216 |
|
|
$DocumentFinal .= ", "; |
217 |
|
|
} |
218 |
|
|
$CommaFlag = 1; |
219 |
|
|
$SearchTerm = $Author; |
220 |
|
|
$SearchTerm =~ s/,//g; |
221 |
|
|
@AuthorNames = split(/ /, $SearchTerm); |
222 |
|
|
$SearchTerm = ""; |
223 |
|
|
foreach $AuthorName ( @AuthorNames ) { |
224 |
|
|
if ( ! ($AuthorName =~ /^\w+\.$/) ) { |
225 |
|
|
if ( $AuthorName ne "" ) { |
226 |
|
|
$AuthorName .= " "; |
227 |
|
|
} |
228 |
|
|
$SearchTerm .= $AuthorName; |
229 |
|
|
} |
230 |
|
|
} |
231 |
|
|
$SearchTerm =~ s/ /+/g; |
232 |
|
|
$DocumentFinal .= "<A HREF=\"$ENV{'SCRIPT_NAME'}/GetSearchResults?Database=isis&FieldName1=author&FieldContent1=%22$SearchTerm%22\"> $Author </A>"; |
233 |
|
|
} |
234 |
|
|
} |
235 |
|
|
elsif ( $FieldTag eq "K" ) { |
236 |
|
|
$CommaFlag = 0; |
237 |
|
|
@Keywords = split(/,/, $Document{$FieldTag}); |
238 |
|
|
foreach $Keyword ( @Keywords ) { |
239 |
|
|
if ( $CommaFlag ) { |
240 |
|
|
$DocumentFinal .= ", "; |
241 |
|
|
} |
242 |
|
|
$CommaFlag = 1; |
243 |
|
|
$SearchTerm = $Keyword; |
244 |
|
|
$SearchTerm =~ s/ /+/g; |
245 |
|
|
$DocumentFinal .= "<A HREF=\"$ENV{'SCRIPT_NAME'}/GetSearchResults?Database=isis&FieldName1=keyword&FieldContent1=%22$SearchTerm%22\"> $Keyword </A>"; |
246 |
|
|
} |
247 |
|
|
} |
248 |
|
|
elsif ( $FieldTag eq "W" ) { |
249 |
|
|
$DocumentFinal .= "<A HREF=\"$Document{$FieldTag}\" TARGET=\"BlankWindow\"> $Document{$FieldTag} </A>"; |
250 |
|
|
} |
251 |
|
|
elsif ( $FieldTag eq "Y" ) { |
252 |
|
|
$DocumentFinal .= "<PRE>$Document{$FieldTag}</PRE>"; |
253 |
|
|
} |
254 |
|
|
else { |
255 |
|
|
$DocumentFinal .= "$Document{$FieldTag}"; |
256 |
|
|
} |
257 |
|
|
|
258 |
|
|
$DocumentFinal .= "</TD></TR> \n"; |
259 |
|
|
|
260 |
|
|
} |
261 |
|
|
|
262 |
|
|
|
263 |
|
|
# List any external links we may have |
264 |
|
|
if ( defined($Document{'G'}) ) { |
265 |
|
|
|
266 |
|
|
@OrderNumbers = split(/; /, $Document{'G'}); |
267 |
|
|
|
268 |
|
|
$DocumentFinal .= "<TR><TD COLSPAN=2> <HR WIDTH=50%> </TD></TR>"; |
269 |
|
|
$DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=LEFT> <B> External Links: </B> </TD><TD VALIGN=TOP ALIGN=LEFT><UL>"; |
270 |
|
|
|
271 |
|
|
foreach $OrderNumber ( @OrderNumbers ) { |
272 |
|
|
|
273 |
|
|
if ( $OrderNumber =~ /ISBN\s*(\S*)/ ) { |
274 |
|
|
$OrderNumber = $1; |
275 |
|
|
$OrderNumber =~ s/-//g; |
276 |
|
|
$DocumentFinal .= "<LI><A HREF=\"http://www.amazon.com/exec/obidos/ASIN/$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Amazon.com </A></LI>"; |
277 |
|
|
$DocumentFinal .= "<LI><A HREF=\"http://search.borders.com/fcgi-bin/db2www/search/search.d2w/Details?mediaType=Book&searchType=ISBNUPC&code=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Borders </A></LI>"; |
278 |
|
|
$DocumentFinal .= "<LI><A HREF=\"http://shop.barnesandnoble.com/bookSearch/isbnInquiry.asp?isbn=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Barnes & Noble </A></LI>"; |
279 |
|
|
} |
280 |
|
|
} |
281 |
|
|
|
282 |
|
|
$DocumentFinal .= "</UL></TD></TR>"; |
283 |
|
|
} |
284 |
|
|
|
285 |
|
|
$DocumentFinal .= "</TABLE> \n"; |
286 |
|
|
|
287 |
|
|
return ($DocumentFinal); |
288 |
|
|
|
289 |
|
|
} |
290 |
|
|
|
291 |
|
|
|
292 |
|
|
|
293 |
|
|
#-------------------------------------------------------------------------- |
294 |
|
|
# |
295 |
|
|
# Function: SummaryFilter() |
296 |
|
|
# |
297 |
|
|
# Purpose: This function is the summary filter |
298 |
|
|
# |
299 |
|
|
# Called by: external |
300 |
|
|
# |
301 |
|
|
# Parameters: $Database Database name |
302 |
|
|
# $DocumentID Document ID |
303 |
|
|
# $ItemName Item name |
304 |
|
|
# $MimeType Mime type |
305 |
|
|
# $DocumentRaw Raw document text |
306 |
|
|
# |
307 |
|
|
# Global Variables: none |
308 |
|
|
# |
309 |
|
|
# Returns: The filtered summary |
310 |
|
|
# |
311 |
|
|
sub SummaryFilter { |
312 |
|
|
|
313 |
|
|
my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_; |
314 |
|
|
|
315 |
|
|
my (%Document, $Summary); |
316 |
|
|
|
317 |
|
|
|
318 |
|
|
if ( !defined($DocumentRaw) ) { |
319 |
|
|
return (undef); |
320 |
|
|
} |
321 |
|
|
|
322 |
|
|
|
323 |
|
|
# Parse the document |
324 |
|
|
%Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw); |
325 |
|
|
|
326 |
|
|
|
327 |
|
|
# Select the abstract if available |
328 |
|
|
if ( defined($Document{'X'}) ) { |
329 |
|
|
$Summary = $Document{'X'}; |
330 |
|
|
} |
331 |
|
|
|
332 |
|
|
# Select the table of contents if available |
333 |
|
|
elsif ( defined($Document{'Y'}) ) { |
334 |
|
|
$Summary = $Document{'Y'}; |
335 |
|
|
} |
336 |
|
|
|
337 |
|
|
# Select the keywords if available |
338 |
|
|
elsif ( defined($Document{'K'}) ) { |
339 |
|
|
$Summary = $Document{'K'}; |
340 |
|
|
} |
341 |
|
|
|
342 |
|
|
|
343 |
|
|
# Clean the summary text |
344 |
|
|
if ( defined($Summary) ) { |
345 |
|
|
# First clean up the text |
346 |
|
|
if ( index($Summary, "\r\n") >= 0 ) { |
347 |
|
|
$Summary =~ s/\r//gs; |
348 |
|
|
} |
349 |
|
|
elsif ( index($Summary, "\r") >= 0 ) { |
350 |
|
|
$Summary =~ s/\r/\n/gs; |
351 |
|
|
} |
352 |
|
|
$Summary =~ s/\n/ /gs; |
353 |
|
|
$Summary =~ s/\s+/ /gs; |
354 |
|
|
$Summary = ucfirst($Summary); |
355 |
|
|
} |
356 |
|
|
|
357 |
|
|
return ($Summary); |
358 |
|
|
|
359 |
|
|
} |
360 |
|
|
|
361 |
|
|
|
362 |
|
|
|
363 |
|
|
#-------------------------------------------------------------------------- |
364 |
|
|
# |
365 |
|
|
# Function: RelevanceFeedbackFilter() |
366 |
|
|
# |
367 |
|
|
# Purpose: This function is the relevance feedback filter |
368 |
|
|
# |
369 |
|
|
# Called by: external |
370 |
|
|
# |
371 |
|
|
# Parameters: $Database Database name |
372 |
|
|
# $DocumentID Document ID |
373 |
|
|
# $ItemName Item name |
374 |
|
|
# $MimeType Mime type |
375 |
|
|
# $DocumentRaw Raw document text |
376 |
|
|
# |
377 |
|
|
# Global Variables: none |
378 |
|
|
# |
379 |
|
|
# Returns: The filtered relevance feedback |
380 |
|
|
# |
381 |
|
|
sub RelevanceFeedbackFilter { |
382 |
|
|
|
383 |
|
|
my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_; |
384 |
|
|
|
385 |
|
|
my (%Document, $RelevanceFeedback); |
386 |
|
|
|
387 |
|
|
|
388 |
|
|
if ( !defined($DocumentRaw) ) { |
389 |
|
|
return (undef); |
390 |
|
|
} |
391 |
|
|
|
392 |
|
|
|
393 |
|
|
# Parse the document |
394 |
|
|
%Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw); |
395 |
|
|
|
396 |
|
|
|
397 |
|
|
# Select the abstract if available |
398 |
|
|
if ( defined($Document{'X'}) ) { |
399 |
|
|
$RelevanceFeedback = $Document{'X'}; |
400 |
|
|
} |
401 |
|
|
|
402 |
|
|
# Select the table of contents if available |
403 |
|
|
elsif ( defined($Document{'Y'}) ) { |
404 |
|
|
$RelevanceFeedback = $Document{'Y'}; |
405 |
|
|
} |
406 |
|
|
|
407 |
|
|
# Select the keywords if available |
408 |
|
|
elsif ( defined($Document{'K'}) ) { |
409 |
|
|
$RelevanceFeedback = $Document{'K'}; |
410 |
|
|
} |
411 |
|
|
|
412 |
|
|
# Select the title if available |
413 |
|
|
elsif ( defined($Document{'T'}) ) { |
414 |
|
|
$RelevanceFeedback = $Document{'T'}; |
415 |
|
|
} |
416 |
|
|
|
417 |
|
|
# Select the author if available |
418 |
|
|
elsif ( defined($Document{'A'}) ) { |
419 |
|
|
$RelevanceFeedback = $Document{'A'}; |
420 |
|
|
} |
421 |
|
|
|
422 |
|
|
return ($RelevanceFeedback); |
423 |
|
|
|
424 |
|
|
} |
425 |
|
|
|
426 |
|
|
|
427 |
|
|
|
428 |
|
|
#-------------------------------------------------------------------------- |
429 |
|
|
|
430 |
|
|
1; |