/[local]/search/filters/isis.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /search/filters/isis.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1.1.1 - (show annotations) (vendor branch)
Mon Mar 18 20:04:37 2002 UTC (17 years, 6 months ago) by dpavlin
Branch: DbP, MAIN
CVS Tags: r0, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain
initial import

1 #!/usr/bin/perl
2
3 #--------------------------------------------------------------------------
4 #
5 # Author: Dobrica Pavlinusic
6 # Creation Date: 2002-03-17
7
8
9 #--------------------------------------------------------------------------
10 #
11 # Description:
12 #
13 # This package implements the various filters for this database
14 #
15
16 #--------------------------------------------------------------------------
17 #
18 # Pragmatic modules
19 #
20
21 use strict;
22
23
24 #--------------------------------------------------------------------------
25 #
26 # Package definition
27 #
28
29 package isis;
30
31
32 #--------------------------------------------------------------------------
33 #
34 # Application Constants
35 #
36
37 # Field Names
38 %isis::FieldNames = (
39 'A', 'Author(s)',
40 'B', 'Book',
41 'C', 'City',
42 'D', 'Date',
43 'E', 'Editor',
44 'F', 'Footnote',
45 'G', 'Order Info',
46 'H', 'Commentary',
47 'I', 'Publisher',
48 'J', 'Journal',
49 'K', 'Keyword',
50 'L', 'Label',
51 'M', 'Bell',
52 'N', 'Issue',
53 'O', 'Other',
54 'P', 'Page',
55 'Q', 'Corporate',
56 'R', 'Report',
57 'S', 'Series',
58 'T', 'Title',
59 'U', 'Annotation',
60 'V', 'Volume',
61 'W', 'URL',
62 'X', 'Abstract',
63 'Y', 'Table of Contents',
64 'Z', 'Reference',
65 '$', 'Price',
66 '*', 'Copyright',
67 '^', 'Parts',
68 );
69
70
71
72 # Field Display Order
73 @isis::FieldDisplayOrder = (
74 'M',
75 'L',
76 'T',
77 'S',
78 'E',
79 'Q',
80 'A',
81 'J',
82 'B',
83 'D',
84 'V',
85 'N',
86 'P',
87 'C',
88 'I',
89 'G',
90 '$',
91 'K',
92 '*',
93 'W',
94 'O',
95 'Y',
96 'X',
97 'R',
98 '.',
99 'F',
100 'H',
101 'U',
102 'Z',
103 '^'
104 );
105
106
107 #--------------------------------------------------------------------------
108 #
109 # Function: DocumentParser()
110 #
111 # Purpose: This function serves as a document parser
112 #
113 # Called by: DocumentFilter(), SummaryFilter()
114 #
115 # Parameters: $Database Database name
116 # $DocumentID Document ID
117 # $ItemName Item name
118 # $MimeType Mime type
119 # $DocumentRaw Raw document text
120 #
121 # Global Variables:
122 #
123 # Returns: A hash table of the document fields
124 #
125 sub DocumentParser {
126
127 my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
128
129 my (%Document, @DocumentLines, $DocumentLine);
130 my ($FieldTag, $FieldData, $Separator);
131
132 @DocumentLines = split(/\n/, $DocumentRaw);
133
134 # Extract the document
135 foreach $DocumentLine ( @DocumentLines ) {
136
137 # check if this is a new field?
138 if ( $DocumentLine =~ /%(.)\s*(.*)/ ) {
139 $FieldTag = $1;
140 $FieldData = $2;
141 $Separator = ";";
142 }
143 else {
144 $FieldData = $DocumentLine;
145 $Separator = "\n";
146 }
147
148 if ( defined($Document{$FieldTag}) ) {
149 $Document{$FieldTag} .= $Separator . $FieldData;
150 }
151 else {
152 $Document{$FieldTag} = $FieldData;
153 }
154 }
155
156 return (%Document);
157
158 }
159
160
161 #--------------------------------------------------------------------------
162 #
163 # Function: DocumentFilter()
164 #
165 # Purpose: This function is the document filter
166 #
167 # Called by: external
168 #
169 # Parameters: $Database Database name
170 # $DocumentID Document ID
171 # $ItemName Item name
172 # $MimeType Mime type
173 # $DocumentRaw Raw document text
174 #
175 # Global Variables: %isis::FieldNames, @isis::FieldDisplayOrder
176 #
177 # Returns: The filtered document
178 #
179 sub DocumentFilter {
180
181 my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
182
183 my (%Document, $DocumentFinal, $FieldTag, $CommaFlag);
184 my ($SearchTerm, @Authors, $Author, @AuthorNames, $AuthorName, @Keywords, $Keyword, @OrderNumbers, $OrderNumber);
185
186 if ( !defined($DocumentRaw) ) {
187 return (undef);
188 }
189
190
191 %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
192
193 $DocumentFinal = "<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=2 WIDTH=100%> \n";
194 # $DocumentFinal .= "<TR><TD COLSPAN=2 VALIGN=TOP ALIGN=CENTER><H3> $Document{'T'} </H3></TD></TR> \n";
195
196
197 # Loop over each line in the document list
198 foreach $FieldTag ( @isis::FieldDisplayOrder ) {
199
200 # Skip empty slots
201 if ( !(defined($Document{$FieldTag}) && ($Document{$FieldTag} ne "")) ) {
202 next;
203 }
204
205
206 # Print the row start
207 $DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=RIGHT BGCOLOR=\"#EEEEEE\" WIDTH=1%> $isis::FieldNames{$FieldTag}: </TD><TD VALIGN=TOP ALIGN=LEFT> ";
208
209
210 # Print the field data, marking up if needed
211 if ( $FieldTag eq "A" || $FieldTag eq "E" ) {
212 $CommaFlag = 0;
213 @Authors = split(/;/, $Document{$FieldTag});
214 foreach $Author ( @Authors ) {
215 if ( $CommaFlag ) {
216 $DocumentFinal .= ", ";
217 }
218 $CommaFlag = 1;
219 $SearchTerm = $Author;
220 $SearchTerm =~ s/,//g;
221 @AuthorNames = split(/ /, $SearchTerm);
222 $SearchTerm = "";
223 foreach $AuthorName ( @AuthorNames ) {
224 if ( ! ($AuthorName =~ /^\w+\.$/) ) {
225 if ( $AuthorName ne "" ) {
226 $AuthorName .= " ";
227 }
228 $SearchTerm .= $AuthorName;
229 }
230 }
231 $SearchTerm =~ s/ /+/g;
232 $DocumentFinal .= "<A HREF=\"$ENV{'SCRIPT_NAME'}/GetSearchResults?Database=isis&FieldName1=author&FieldContent1=%22$SearchTerm%22\"> $Author </A>";
233 }
234 }
235 elsif ( $FieldTag eq "K" ) {
236 $CommaFlag = 0;
237 @Keywords = split(/,/, $Document{$FieldTag});
238 foreach $Keyword ( @Keywords ) {
239 if ( $CommaFlag ) {
240 $DocumentFinal .= ", ";
241 }
242 $CommaFlag = 1;
243 $SearchTerm = $Keyword;
244 $SearchTerm =~ s/ /+/g;
245 $DocumentFinal .= "<A HREF=\"$ENV{'SCRIPT_NAME'}/GetSearchResults?Database=isis&FieldName1=keyword&FieldContent1=%22$SearchTerm%22\"> $Keyword </A>";
246 }
247 }
248 elsif ( $FieldTag eq "W" ) {
249 $DocumentFinal .= "<A HREF=\"$Document{$FieldTag}\" TARGET=\"BlankWindow\"> $Document{$FieldTag} </A>";
250 }
251 elsif ( $FieldTag eq "Y" ) {
252 $DocumentFinal .= "<PRE>$Document{$FieldTag}</PRE>";
253 }
254 else {
255 $DocumentFinal .= "$Document{$FieldTag}";
256 }
257
258 $DocumentFinal .= "</TD></TR> \n";
259
260 }
261
262
263 # List any external links we may have
264 if ( defined($Document{'G'}) ) {
265
266 @OrderNumbers = split(/; /, $Document{'G'});
267
268 $DocumentFinal .= "<TR><TD COLSPAN=2> <HR WIDTH=50%> </TD></TR>";
269 $DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=LEFT> <B> External Links: </B> </TD><TD VALIGN=TOP ALIGN=LEFT><UL>";
270
271 foreach $OrderNumber ( @OrderNumbers ) {
272
273 if ( $OrderNumber =~ /ISBN\s*(\S*)/ ) {
274 $OrderNumber = $1;
275 $OrderNumber =~ s/-//g;
276 $DocumentFinal .= "<LI><A HREF=\"http://www.amazon.com/exec/obidos/ASIN/$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Amazon.com </A></LI>";
277 $DocumentFinal .= "<LI><A HREF=\"http://search.borders.com/fcgi-bin/db2www/search/search.d2w/Details?mediaType=Book&searchType=ISBNUPC&code=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Borders </A></LI>";
278 $DocumentFinal .= "<LI><A HREF=\"http://shop.barnesandnoble.com/bookSearch/isbnInquiry.asp?isbn=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Barnes & Noble </A></LI>";
279 }
280 }
281
282 $DocumentFinal .= "</UL></TD></TR>";
283 }
284
285 $DocumentFinal .= "</TABLE> \n";
286
287 return ($DocumentFinal);
288
289 }
290
291
292
293 #--------------------------------------------------------------------------
294 #
295 # Function: SummaryFilter()
296 #
297 # Purpose: This function is the summary filter
298 #
299 # Called by: external
300 #
301 # Parameters: $Database Database name
302 # $DocumentID Document ID
303 # $ItemName Item name
304 # $MimeType Mime type
305 # $DocumentRaw Raw document text
306 #
307 # Global Variables: none
308 #
309 # Returns: The filtered summary
310 #
311 sub SummaryFilter {
312
313 my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
314
315 my (%Document, $Summary);
316
317
318 if ( !defined($DocumentRaw) ) {
319 return (undef);
320 }
321
322
323 # Parse the document
324 %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
325
326
327 # Select the abstract if available
328 if ( defined($Document{'X'}) ) {
329 $Summary = $Document{'X'};
330 }
331
332 # Select the table of contents if available
333 elsif ( defined($Document{'Y'}) ) {
334 $Summary = $Document{'Y'};
335 }
336
337 # Select the keywords if available
338 elsif ( defined($Document{'K'}) ) {
339 $Summary = $Document{'K'};
340 }
341
342
343 # Clean the summary text
344 if ( defined($Summary) ) {
345 # First clean up the text
346 if ( index($Summary, "\r\n") >= 0 ) {
347 $Summary =~ s/\r//gs;
348 }
349 elsif ( index($Summary, "\r") >= 0 ) {
350 $Summary =~ s/\r/\n/gs;
351 }
352 $Summary =~ s/\n/ /gs;
353 $Summary =~ s/\s+/ /gs;
354 $Summary = ucfirst($Summary);
355 }
356
357 return ($Summary);
358
359 }
360
361
362
363 #--------------------------------------------------------------------------
364 #
365 # Function: RelevanceFeedbackFilter()
366 #
367 # Purpose: This function is the relevance feedback filter
368 #
369 # Called by: external
370 #
371 # Parameters: $Database Database name
372 # $DocumentID Document ID
373 # $ItemName Item name
374 # $MimeType Mime type
375 # $DocumentRaw Raw document text
376 #
377 # Global Variables: none
378 #
379 # Returns: The filtered relevance feedback
380 #
381 sub RelevanceFeedbackFilter {
382
383 my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
384
385 my (%Document, $RelevanceFeedback);
386
387
388 if ( !defined($DocumentRaw) ) {
389 return (undef);
390 }
391
392
393 # Parse the document
394 %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
395
396
397 # Select the abstract if available
398 if ( defined($Document{'X'}) ) {
399 $RelevanceFeedback = $Document{'X'};
400 }
401
402 # Select the table of contents if available
403 elsif ( defined($Document{'Y'}) ) {
404 $RelevanceFeedback = $Document{'Y'};
405 }
406
407 # Select the keywords if available
408 elsif ( defined($Document{'K'}) ) {
409 $RelevanceFeedback = $Document{'K'};
410 }
411
412 # Select the title if available
413 elsif ( defined($Document{'T'}) ) {
414 $RelevanceFeedback = $Document{'T'};
415 }
416
417 # Select the author if available
418 elsif ( defined($Document{'A'}) ) {
419 $RelevanceFeedback = $Document{'A'};
420 }
421
422 return ($RelevanceFeedback);
423
424 }
425
426
427
428 #--------------------------------------------------------------------------
429
430 1;

  ViewVC Help
Powered by ViewVC 1.1.26