/[local]/search/filters/default.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /search/filters/default.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (show annotations)
Sun Jun 16 19:39:45 2002 UTC (21 years, 10 months ago) by dpavlin
Branch: MAIN
Changes since 1.3: +2 -2 lines
File MIME type: text/plain
fix here, fix there. removed unused code, more configuration
checks

1 #!/usr/bin/perl
2
3 #--------------------------------------------------------------------------
4 #
5 # Author: Dobrica Pavlinusic
6 # Creation Date: 2002-03-17
7
8
9 #--------------------------------------------------------------------------
10 #
11 # Description:
12 #
13 # This package implements the various filters for this database
14 #
15
16 #--------------------------------------------------------------------------
17 #
18 # Pragmatic modules
19 #
20
21 use strict;
22
23
24 #--------------------------------------------------------------------------
25 #
26 # Package definition
27 #
28
29 package default;
30
31
32 #--------------------------------------------------------------------------
33 #
34 # Application Constants
35 #
36
37 #--------------------------------------------------------------------------
38 #
39 # Function: DocumentParser()
40 #
41 # Purpose: This function serves as a document parser
42 #
43 # Called by: DocumentFilter(), SummaryFilter()
44 #
45 # Parameters: $Database Database name
46 # $DocumentID Document ID
47 # $ItemName Item name
48 # $MimeType Mime type
49 # $DocumentRaw Raw document text
50 #
51 # Global Variables:
52 #
53 # Returns: A hash table of the document fields
54 #
55 sub DocumentParser {
56
57 my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
58
59 my (%Document, @DocumentLines, $DocumentLine);
60 my ($FieldTag, $FieldData, $Separator);
61
62 @DocumentLines = split(/\n/, $DocumentRaw);
63
64 # Extract the document
65 foreach $DocumentLine ( @DocumentLines ) {
66 # check if this is a new field?
67 if ( $DocumentLine =~ /%(\S+)\s*(.*)/ ) {
68 $FieldTag = $1;
69 $FieldData = $2;
70 if ($default::Separator{$FieldTag}) {
71 $Separator = $default::Separator{$FieldTag};
72 } else {
73 $Separator = ";";
74 }
75 }
76 else {
77 $FieldData = $DocumentLine;
78 $Separator = "\n";
79 }
80
81 if ( defined($Document{$FieldTag}) ) {
82 $Document{$FieldTag} .= $Separator . $FieldData;
83 }
84 else {
85 $Document{$FieldTag} = $FieldData;
86 }
87 }
88
89 return (%Document);
90
91 }
92
93
94 #--------------------------------------------------------------------------
95 #
96 # Function: DocumentFilter()
97 #
98 # Purpose: This function is the document filter
99 #
100 # Called by: external
101 #
102 # Parameters: $Database Database name
103 # $DocumentID Document ID
104 # $ItemName Item name
105 # $MimeType Mime type
106 # $DocumentRaw Raw document text
107 #
108 # Global Variables: %default::FieldNames, @default::FieldDisplayOrder
109 #
110 # Returns: The filtered document
111 #
112 sub DocumentFilter {
113
114 my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
115
116 my (%Document, $DocumentFinal, $FieldTag, $CommaFlag);
117 my ($SearchTerm, @Authors, $Author, @AuthorNames, $AuthorName, @Keywords, $Keyword, @OrderNumbers, $OrderNumber);
118
119 if ( !defined($DocumentRaw) ) {
120 return (undef);
121 }
122
123
124 %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
125
126 $DocumentFinal = "<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=2 WIDTH=100%> \n";
127 # $DocumentFinal .= "<TR><TD COLSPAN=2 VALIGN=TOP ALIGN=CENTER><H3> $Document{'T'} </H3></TD></TR> \n";
128
129 # Loop over each line in the document list
130 foreach $FieldTag ( @default::FieldDisplayOrder ) {
131
132 # print "-- $FieldTag = $Document{$FieldTag} --";
133
134 # Skip empty slots
135 if ( !(defined($Document{$FieldTag}) && ($Document{$FieldTag} ne "")) ) {
136 next;
137 }
138
139
140 # Print the row start
141 $DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=RIGHT BGCOLOR=\"#EEEEEE\" WIDTH=\"130\"> $default::FieldNames{$FieldTag}: </TD><TD VALIGN=TOP ALIGN=LEFT> ";
142
143 sub html_enc {
144 my $tmp = $_[0];
145 $tmp =~ s/&/&and;/g;
146 $tmp =~ s/</&lt;/g;
147 $tmp =~ s/>/&gt;/g;
148 # fix some tags
149 $tmp =~ s/&lt;br&gt;/<br>/gi;
150 return $tmp;
151 }
152
153 $Document{$FieldTag} = html_enc($Document{$FieldTag});
154
155 # Print the field data, marking up if needed
156 if ( $FieldTag eq "A" || $FieldTag eq "E" ) {
157 $CommaFlag = 0;
158 @Authors = split(/;/, $Document{$FieldTag});
159 foreach $Author ( @Authors ) {
160 if ( $CommaFlag ) {
161 $DocumentFinal .= ", ";
162 }
163 $CommaFlag = 1;
164 $SearchTerm = $Author;
165 $SearchTerm =~ s/,//g;
166 @AuthorNames = split(/ /, $SearchTerm);
167 $SearchTerm = "";
168 foreach $AuthorName ( @AuthorNames ) {
169 if ( ! ($AuthorName =~ /^\w+\.$/) ) {
170 if ( $AuthorName ne "" ) {
171 $AuthorName .= " ";
172 }
173 $SearchTerm .= $AuthorName;
174 }
175 }
176 $SearchTerm =~ s/ /+/g;
177 $DocumentFinal .= "<A HREF=\"$ENV{'SCRIPT_NAME'}/GetSearchResults?Database=$Database&FieldName1=author&FieldContent1=%22$SearchTerm%22\"> $Author </A>";
178 }
179 }
180 elsif ( $FieldTag eq "K" ) {
181 $CommaFlag = 0;
182 @Keywords = split(/,/, $Document{$FieldTag});
183 foreach $Keyword ( @Keywords ) {
184 if ( $CommaFlag ) {
185 $DocumentFinal .= ", ";
186 }
187 $CommaFlag = 1;
188 $SearchTerm = $Keyword;
189 $SearchTerm =~ s/ /+/g;
190 $DocumentFinal .= "<A HREF=\"$ENV{'SCRIPT_NAME'}/GetSearchResults?Database=$Database&FieldName1=keyword&FieldContent1=%22$SearchTerm%22\"> $Keyword </A>";
191 }
192 }
193 elsif ( $FieldTag eq "W" ) {
194 $DocumentFinal .= "<A HREF=\"$Document{$FieldTag}\" TARGET=\"BlankWindow\"> $Document{$FieldTag} </A>";
195 }
196 elsif ( $FieldTag eq "Y" ) {
197 $DocumentFinal .= "<PRE>$Document{$FieldTag}</PRE>";
198 }
199 else {
200 $DocumentFinal .= "$Document{$FieldTag}";
201 }
202
203 $DocumentFinal .= "</TD></TR> \n";
204
205 }
206
207
208 # List any external links we may have
209 if ( defined($Document{'G'}) ) {
210
211 @OrderNumbers = split(/; /, $Document{'G'});
212
213 $DocumentFinal .= "<TR><TD COLSPAN=2> <HR WIDTH=50%> </TD></TR>";
214 $DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=LEFT> <B> External Links: </B> </TD><TD VALIGN=TOP ALIGN=LEFT><UL>";
215
216 foreach $OrderNumber ( @OrderNumbers ) {
217
218 if ( $OrderNumber =~ /ISBN\s*(\S*)/ ) {
219 $OrderNumber = $1;
220 $OrderNumber =~ s/-//g;
221 $DocumentFinal .= "<LI><A HREF=\"http://www.amazon.com/exec/obidos/ASIN/$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Amazon.com </A></LI>";
222 $DocumentFinal .= "<LI><A HREF=\"http://search.borders.com/fcgi-bin/db2www/search/search.d2w/Details?mediaType=Book&searchType=ISBNUPC&code=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Borders </A></LI>";
223 $DocumentFinal .= "<LI><A HREF=\"http://shop.barnesandnoble.com/bookSearch/isbnInquiry.asp?isbn=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Barnes & Noble </A></LI>";
224 }
225 }
226
227 $DocumentFinal .= "</UL></TD></TR>";
228 }
229
230 $DocumentFinal .= "</TABLE> \n";
231
232 return ($DocumentFinal);
233
234 }
235
236
237
238 #--------------------------------------------------------------------------
239 #
240 # Function: SummaryFilter()
241 #
242 # Purpose: This function is the summary filter
243 #
244 # Called by: external
245 #
246 # Parameters: $Database Database name
247 # $DocumentID Document ID
248 # $ItemName Item name
249 # $MimeType Mime type
250 # $DocumentRaw Raw document text
251 #
252 # Global Variables: none
253 #
254 # Returns: The filtered summary
255 #
256 sub SummaryFilter {
257
258 my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
259
260 my (%Document, $Summary);
261
262
263 if ( !defined($DocumentRaw) ) {
264 return (undef);
265 }
266
267
268 # Parse the document
269 %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
270
271
272 # Select the abstract if available
273 if ( defined($Document{'X'}) ) {
274 $Summary = $Document{'X'};
275 }
276
277 # Select the table of contents if available
278 elsif ( defined($Document{'Y'}) ) {
279 $Summary = $Document{'Y'};
280 }
281
282 # Select the keywords if available
283 elsif ( defined($Document{'K'}) ) {
284 $Summary = $Document{'K'};
285 }
286
287
288 # Clean the summary text
289 if ( defined($Summary) ) {
290 # First clean up the text
291 if ( index($Summary, "\r\n") >= 0 ) {
292 $Summary =~ s/\r//gs;
293 }
294 elsif ( index($Summary, "\r") >= 0 ) {
295 $Summary =~ s/\r/\n/gs;
296 }
297 $Summary =~ s/\n/ /gs;
298 $Summary =~ s/\s+/ /gs;
299 $Summary = ucfirst($Summary);
300 }
301
302 return ($Summary);
303
304 }
305
306
307
308 #--------------------------------------------------------------------------
309 #
310 # Function: RelevanceFeedbackFilter()
311 #
312 # Purpose: This function is the relevance feedback filter
313 #
314 # Called by: external
315 #
316 # Parameters: $Database Database name
317 # $DocumentID Document ID
318 # $ItemName Item name
319 # $MimeType Mime type
320 # $DocumentRaw Raw document text
321 #
322 # Global Variables: none
323 #
324 # Returns: The filtered relevance feedback
325 #
326 sub RelevanceFeedbackFilter {
327
328 my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
329
330 my (%Document, $RelevanceFeedback);
331
332
333 if ( !defined($DocumentRaw) ) {
334 return (undef);
335 }
336
337
338 # Parse the document
339 %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
340
341
342 # Select the abstract if available
343 if ( defined($Document{'X'}) ) {
344 $RelevanceFeedback = $Document{'X'};
345 }
346
347 # Select the table of contents if available
348 elsif ( defined($Document{'Y'}) ) {
349 $RelevanceFeedback = $Document{'Y'};
350 }
351
352 # Select the keywords if available
353 elsif ( defined($Document{'K'}) ) {
354 $RelevanceFeedback = $Document{'K'};
355 }
356
357 # Select the title if available
358 elsif ( defined($Document{'T'}) ) {
359 $RelevanceFeedback = $Document{'T'};
360 }
361
362 # Select the author if available
363 elsif ( defined($Document{'A'}) ) {
364 $RelevanceFeedback = $Document{'A'};
365 }
366
367 return ($RelevanceFeedback);
368
369 }
370
371
372
373 #--------------------------------------------------------------------------
374
375 1;

  ViewVC Help
Powered by ViewVC 1.1.26