1 |
#!/usr/bin/perl |
2 |
|
3 |
#-------------------------------------------------------------------------- |
4 |
# |
5 |
# Author: Dobrica Pavlinusic |
6 |
# Creation Date: 2002-03-17 |
7 |
|
8 |
|
9 |
#-------------------------------------------------------------------------- |
10 |
# |
11 |
# Description: |
12 |
# |
13 |
# This package implements the various filters for this database |
14 |
# |
15 |
|
16 |
#-------------------------------------------------------------------------- |
17 |
# |
18 |
# Pragmatic modules |
19 |
# |
20 |
|
21 |
use strict; |
22 |
|
23 |
|
24 |
#-------------------------------------------------------------------------- |
25 |
# |
26 |
# Package definition |
27 |
# |
28 |
|
29 |
package default; |
30 |
|
31 |
|
32 |
#-------------------------------------------------------------------------- |
33 |
# |
34 |
# Application Constants |
35 |
# |
36 |
|
37 |
#-------------------------------------------------------------------------- |
38 |
# |
39 |
# Function: DocumentParser() |
40 |
# |
41 |
# Purpose: This function serves as a document parser |
42 |
# |
43 |
# Called by: DocumentFilter(), SummaryFilter() |
44 |
# |
45 |
# Parameters: $Database Database name |
46 |
# $DocumentID Document ID |
47 |
# $ItemName Item name |
48 |
# $MimeType Mime type |
49 |
# $DocumentRaw Raw document text |
50 |
# |
51 |
# Global Variables: |
52 |
# |
53 |
# Returns: A hash table of the document fields |
54 |
# |
55 |
sub DocumentParser { |
56 |
|
57 |
my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_; |
58 |
|
59 |
my (%Document, @DocumentLines, $DocumentLine); |
60 |
my ($FieldTag, $FieldData, $Separator); |
61 |
|
62 |
@DocumentLines = split(/\n/, $DocumentRaw); |
63 |
|
64 |
# Extract the document |
65 |
foreach $DocumentLine ( @DocumentLines ) { |
66 |
# check if this is a new field? |
67 |
if ( $DocumentLine =~ /%(\S+)\s*(.*)/ ) { |
68 |
$FieldTag = $1; |
69 |
$FieldData = $2; |
70 |
if ($default::Separator{$FieldTag}) { |
71 |
$Separator = $default::Separator{$FieldTag}; |
72 |
} else { |
73 |
$Separator = ";"; |
74 |
} |
75 |
} |
76 |
else { |
77 |
$FieldData = $DocumentLine; |
78 |
$Separator = "\n"; |
79 |
} |
80 |
|
81 |
if ( defined($Document{$FieldTag}) ) { |
82 |
$Document{$FieldTag} .= $Separator . $FieldData; |
83 |
} |
84 |
else { |
85 |
$Document{$FieldTag} = $FieldData; |
86 |
} |
87 |
} |
88 |
|
89 |
return (%Document); |
90 |
|
91 |
} |
92 |
|
93 |
|
94 |
#-------------------------------------------------------------------------- |
95 |
# |
96 |
# Function: DocumentFilter() |
97 |
# |
98 |
# Purpose: This function is the document filter |
99 |
# |
100 |
# Called by: external |
101 |
# |
102 |
# Parameters: $Database Database name |
103 |
# $DocumentID Document ID |
104 |
# $ItemName Item name |
105 |
# $MimeType Mime type |
106 |
# $DocumentRaw Raw document text |
107 |
# |
108 |
# Global Variables: %default::FieldNames, @default::FieldDisplayOrder |
109 |
# |
110 |
# Returns: The filtered document |
111 |
# |
112 |
sub DocumentFilter { |
113 |
|
114 |
my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_; |
115 |
|
116 |
my (%Document, $DocumentFinal, $FieldTag, $CommaFlag); |
117 |
my ($SearchTerm, @Authors, $Author, @AuthorNames, $AuthorName, @Keywords, $Keyword, @OrderNumbers, $OrderNumber); |
118 |
|
119 |
if ( !defined($DocumentRaw) ) { |
120 |
return (undef); |
121 |
} |
122 |
|
123 |
|
124 |
%Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw); |
125 |
|
126 |
$DocumentFinal = "<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=2 WIDTH=100%> \n"; |
127 |
# $DocumentFinal .= "<TR><TD COLSPAN=2 VALIGN=TOP ALIGN=CENTER><H3> $Document{'T'} </H3></TD></TR> \n"; |
128 |
|
129 |
# Loop over each line in the document list |
130 |
foreach $FieldTag ( @default::FieldDisplayOrder ) { |
131 |
|
132 |
# print "-- $FieldTag = $Document{$FieldTag} --"; |
133 |
|
134 |
# Skip empty slots |
135 |
if ( !(defined($Document{$FieldTag}) && ($Document{$FieldTag} ne "")) ) { |
136 |
next; |
137 |
} |
138 |
|
139 |
|
140 |
# Print the row start |
141 |
$DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=RIGHT BGCOLOR=\"#EEEEEE\" WIDTH=\"130\"> $default::FieldNames{$FieldTag}: </TD><TD VALIGN=TOP ALIGN=LEFT> "; |
142 |
|
143 |
sub html_enc { |
144 |
my $tmp = $_[0]; |
145 |
$tmp =~ s/&/&/g; |
146 |
$tmp =~ s/</</g; |
147 |
$tmp =~ s/>/>/g; |
148 |
# fix some tags |
149 |
$tmp =~ s/<br>/<br>/gi; |
150 |
return $tmp; |
151 |
} |
152 |
|
153 |
# format URI |
154 |
if ( $FieldTag eq "856" ) { |
155 |
my $alt = $Document{$FieldTag}; |
156 |
if (length $alt > $default::max_display_url_len ) { |
157 |
$alt = substr($alt,0,$default::max_display_url_len)."..."; |
158 |
} |
159 |
my $url = $Document{$FieldTag}; |
160 |
$url =~ s/"/%22/g; # save " from URLs |
161 |
$DocumentFinal .= "<A HREF=\"$url\" TARGET=\"BlankWindow\">$alt</A>"; |
162 |
|
163 |
} else { |
164 |
$DocumentFinal .= html_enc($Document{$FieldTag}); |
165 |
} |
166 |
|
167 |
$DocumentFinal .= "</TD></TR> \n"; |
168 |
|
169 |
} |
170 |
|
171 |
|
172 |
# List any external links we may have |
173 |
if ( defined($Document{'G'}) ) { |
174 |
|
175 |
@OrderNumbers = split(/; /, $Document{'G'}); |
176 |
|
177 |
$DocumentFinal .= "<TR><TD COLSPAN=2> <HR WIDTH=50%> </TD></TR>"; |
178 |
$DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=LEFT> <B> External Links: </B> </TD><TD VALIGN=TOP ALIGN=LEFT><UL>"; |
179 |
|
180 |
foreach $OrderNumber ( @OrderNumbers ) { |
181 |
|
182 |
if ( $OrderNumber =~ /ISBN\s*(\S*)/ ) { |
183 |
$OrderNumber = $1; |
184 |
$OrderNumber =~ s/-//g; |
185 |
$DocumentFinal .= "<LI><A HREF=\"http://www.amazon.com/exec/obidos/ASIN/$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Amazon.com </A></LI>"; |
186 |
$DocumentFinal .= "<LI><A HREF=\"http://search.borders.com/fcgi-bin/db2www/search/search.d2w/Details?mediaType=Book&searchType=ISBNUPC&code=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Borders </A></LI>"; |
187 |
$DocumentFinal .= "<LI><A HREF=\"http://shop.barnesandnoble.com/bookSearch/isbnInquiry.asp?isbn=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Barnes & Noble </A></LI>"; |
188 |
} |
189 |
} |
190 |
|
191 |
$DocumentFinal .= "</UL></TD></TR>"; |
192 |
} |
193 |
|
194 |
$DocumentFinal .= "</TABLE> \n"; |
195 |
|
196 |
return ($DocumentFinal); |
197 |
|
198 |
} |
199 |
|
200 |
|
201 |
|
202 |
#-------------------------------------------------------------------------- |
203 |
# |
204 |
# Function: SummaryFilter() |
205 |
# |
206 |
# Purpose: This function is the summary filter |
207 |
# |
208 |
# Called by: external |
209 |
# |
210 |
# Parameters: $Database Database name |
211 |
# $DocumentID Document ID |
212 |
# $ItemName Item name |
213 |
# $MimeType Mime type |
214 |
# $DocumentRaw Raw document text |
215 |
# |
216 |
# Global Variables: none |
217 |
# |
218 |
# Returns: The filtered summary |
219 |
# |
220 |
sub SummaryFilter { |
221 |
|
222 |
my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_; |
223 |
|
224 |
my (%Document, $Summary); |
225 |
|
226 |
|
227 |
if ( !defined($DocumentRaw) ) { |
228 |
return (undef); |
229 |
} |
230 |
|
231 |
|
232 |
# Parse the document |
233 |
%Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw); |
234 |
|
235 |
|
236 |
# Select the abstract if available |
237 |
if ( defined($Document{'X'}) ) { |
238 |
$Summary = $Document{'X'}; |
239 |
} |
240 |
|
241 |
# Select the table of contents if available |
242 |
elsif ( defined($Document{'Y'}) ) { |
243 |
$Summary = $Document{'Y'}; |
244 |
} |
245 |
|
246 |
# Select the keywords if available |
247 |
elsif ( defined($Document{'K'}) ) { |
248 |
$Summary = $Document{'K'}; |
249 |
} |
250 |
|
251 |
|
252 |
# Clean the summary text |
253 |
if ( defined($Summary) ) { |
254 |
# First clean up the text |
255 |
if ( index($Summary, "\r\n") >= 0 ) { |
256 |
$Summary =~ s/\r//gs; |
257 |
} |
258 |
elsif ( index($Summary, "\r") >= 0 ) { |
259 |
$Summary =~ s/\r/\n/gs; |
260 |
} |
261 |
$Summary =~ s/\n/ /gs; |
262 |
$Summary =~ s/\s+/ /gs; |
263 |
$Summary = ucfirst($Summary); |
264 |
} |
265 |
|
266 |
return ($Summary); |
267 |
|
268 |
} |
269 |
|
270 |
|
271 |
|
272 |
#-------------------------------------------------------------------------- |
273 |
# |
274 |
# Function: RelevanceFeedbackFilter() |
275 |
# |
276 |
# Purpose: This function is the relevance feedback filter |
277 |
# |
278 |
# Called by: external |
279 |
# |
280 |
# Parameters: $Database Database name |
281 |
# $DocumentID Document ID |
282 |
# $ItemName Item name |
283 |
# $MimeType Mime type |
284 |
# $DocumentRaw Raw document text |
285 |
# |
286 |
# Global Variables: none |
287 |
# |
288 |
# Returns: The filtered relevance feedback |
289 |
# |
290 |
sub RelevanceFeedbackFilter { |
291 |
|
292 |
my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_; |
293 |
|
294 |
my (%Document, $RelevanceFeedback); |
295 |
|
296 |
|
297 |
if ( !defined($DocumentRaw) ) { |
298 |
return (undef); |
299 |
} |
300 |
|
301 |
|
302 |
# Parse the document |
303 |
%Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw); |
304 |
|
305 |
|
306 |
# Select the abstract if available |
307 |
if ( defined($Document{'X'}) ) { |
308 |
$RelevanceFeedback = $Document{'X'}; |
309 |
} |
310 |
|
311 |
# Select the table of contents if available |
312 |
elsif ( defined($Document{'Y'}) ) { |
313 |
$RelevanceFeedback = $Document{'Y'}; |
314 |
} |
315 |
|
316 |
# Select the keywords if available |
317 |
elsif ( defined($Document{'K'}) ) { |
318 |
$RelevanceFeedback = $Document{'K'}; |
319 |
} |
320 |
|
321 |
# Select the title if available |
322 |
elsif ( defined($Document{'T'}) ) { |
323 |
$RelevanceFeedback = $Document{'T'}; |
324 |
} |
325 |
|
326 |
# Select the author if available |
327 |
elsif ( defined($Document{'A'}) ) { |
328 |
$RelevanceFeedback = $Document{'A'}; |
329 |
} |
330 |
|
331 |
return ($RelevanceFeedback); |
332 |
|
333 |
} |
334 |
|
335 |
|
336 |
|
337 |
#-------------------------------------------------------------------------- |
338 |
|
339 |
1; |