/[local]/search/filters/isis.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /search/filters/isis.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1.1.1 - (hide annotations) (vendor branch)
Mon Mar 18 20:04:37 2002 UTC (17 years, 4 months ago) by dpavlin
Branch: DbP, MAIN
CVS Tags: r0, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain
initial import

1 dpavlin 1.1 #!/usr/bin/perl
2    
3     #--------------------------------------------------------------------------
4     #
5     # Author: Dobrica Pavlinusic
6     # Creation Date: 2002-03-17
7    
8    
9     #--------------------------------------------------------------------------
10     #
11     # Description:
12     #
13     # This package implements the various filters for this database
14     #
15    
16     #--------------------------------------------------------------------------
17     #
18     # Pragmatic modules
19     #
20    
21     use strict;
22    
23    
24     #--------------------------------------------------------------------------
25     #
26     # Package definition
27     #
28    
29     package isis;
30    
31    
32     #--------------------------------------------------------------------------
33     #
34     # Application Constants
35     #
36    
37     # Field Names
38     %isis::FieldNames = (
39     'A', 'Author(s)',
40     'B', 'Book',
41     'C', 'City',
42     'D', 'Date',
43     'E', 'Editor',
44     'F', 'Footnote',
45     'G', 'Order Info',
46     'H', 'Commentary',
47     'I', 'Publisher',
48     'J', 'Journal',
49     'K', 'Keyword',
50     'L', 'Label',
51     'M', 'Bell',
52     'N', 'Issue',
53     'O', 'Other',
54     'P', 'Page',
55     'Q', 'Corporate',
56     'R', 'Report',
57     'S', 'Series',
58     'T', 'Title',
59     'U', 'Annotation',
60     'V', 'Volume',
61     'W', 'URL',
62     'X', 'Abstract',
63     'Y', 'Table of Contents',
64     'Z', 'Reference',
65     '$', 'Price',
66     '*', 'Copyright',
67     '^', 'Parts',
68     );
69    
70    
71    
72     # Field Display Order
73     @isis::FieldDisplayOrder = (
74     'M',
75     'L',
76     'T',
77     'S',
78     'E',
79     'Q',
80     'A',
81     'J',
82     'B',
83     'D',
84     'V',
85     'N',
86     'P',
87     'C',
88     'I',
89     'G',
90     '$',
91     'K',
92     '*',
93     'W',
94     'O',
95     'Y',
96     'X',
97     'R',
98     '.',
99     'F',
100     'H',
101     'U',
102     'Z',
103     '^'
104     );
105    
106    
107     #--------------------------------------------------------------------------
108     #
109     # Function: DocumentParser()
110     #
111     # Purpose: This function serves as a document parser
112     #
113     # Called by: DocumentFilter(), SummaryFilter()
114     #
115     # Parameters: $Database Database name
116     # $DocumentID Document ID
117     # $ItemName Item name
118     # $MimeType Mime type
119     # $DocumentRaw Raw document text
120     #
121     # Global Variables:
122     #
123     # Returns: A hash table of the document fields
124     #
125     sub DocumentParser {
126    
127     my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
128    
129     my (%Document, @DocumentLines, $DocumentLine);
130     my ($FieldTag, $FieldData, $Separator);
131    
132     @DocumentLines = split(/\n/, $DocumentRaw);
133    
134     # Extract the document
135     foreach $DocumentLine ( @DocumentLines ) {
136    
137     # check if this is a new field?
138     if ( $DocumentLine =~ /%(.)\s*(.*)/ ) {
139     $FieldTag = $1;
140     $FieldData = $2;
141     $Separator = ";";
142     }
143     else {
144     $FieldData = $DocumentLine;
145     $Separator = "\n";
146     }
147    
148     if ( defined($Document{$FieldTag}) ) {
149     $Document{$FieldTag} .= $Separator . $FieldData;
150     }
151     else {
152     $Document{$FieldTag} = $FieldData;
153     }
154     }
155    
156     return (%Document);
157    
158     }
159    
160    
161     #--------------------------------------------------------------------------
162     #
163     # Function: DocumentFilter()
164     #
165     # Purpose: This function is the document filter
166     #
167     # Called by: external
168     #
169     # Parameters: $Database Database name
170     # $DocumentID Document ID
171     # $ItemName Item name
172     # $MimeType Mime type
173     # $DocumentRaw Raw document text
174     #
175     # Global Variables: %isis::FieldNames, @isis::FieldDisplayOrder
176     #
177     # Returns: The filtered document
178     #
179     sub DocumentFilter {
180    
181     my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
182    
183     my (%Document, $DocumentFinal, $FieldTag, $CommaFlag);
184     my ($SearchTerm, @Authors, $Author, @AuthorNames, $AuthorName, @Keywords, $Keyword, @OrderNumbers, $OrderNumber);
185    
186     if ( !defined($DocumentRaw) ) {
187     return (undef);
188     }
189    
190    
191     %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
192    
193     $DocumentFinal = "<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=2 WIDTH=100%> \n";
194     # $DocumentFinal .= "<TR><TD COLSPAN=2 VALIGN=TOP ALIGN=CENTER><H3> $Document{'T'} </H3></TD></TR> \n";
195    
196    
197     # Loop over each line in the document list
198     foreach $FieldTag ( @isis::FieldDisplayOrder ) {
199    
200     # Skip empty slots
201     if ( !(defined($Document{$FieldTag}) && ($Document{$FieldTag} ne "")) ) {
202     next;
203     }
204    
205    
206     # Print the row start
207     $DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=RIGHT BGCOLOR=\"#EEEEEE\" WIDTH=1%> $isis::FieldNames{$FieldTag}: </TD><TD VALIGN=TOP ALIGN=LEFT> ";
208    
209    
210     # Print the field data, marking up if needed
211     if ( $FieldTag eq "A" || $FieldTag eq "E" ) {
212     $CommaFlag = 0;
213     @Authors = split(/;/, $Document{$FieldTag});
214     foreach $Author ( @Authors ) {
215     if ( $CommaFlag ) {
216     $DocumentFinal .= ", ";
217     }
218     $CommaFlag = 1;
219     $SearchTerm = $Author;
220     $SearchTerm =~ s/,//g;
221     @AuthorNames = split(/ /, $SearchTerm);
222     $SearchTerm = "";
223     foreach $AuthorName ( @AuthorNames ) {
224     if ( ! ($AuthorName =~ /^\w+\.$/) ) {
225     if ( $AuthorName ne "" ) {
226     $AuthorName .= " ";
227     }
228     $SearchTerm .= $AuthorName;
229     }
230     }
231     $SearchTerm =~ s/ /+/g;
232     $DocumentFinal .= "<A HREF=\"$ENV{'SCRIPT_NAME'}/GetSearchResults?Database=isis&FieldName1=author&FieldContent1=%22$SearchTerm%22\"> $Author </A>";
233     }
234     }
235     elsif ( $FieldTag eq "K" ) {
236     $CommaFlag = 0;
237     @Keywords = split(/,/, $Document{$FieldTag});
238     foreach $Keyword ( @Keywords ) {
239     if ( $CommaFlag ) {
240     $DocumentFinal .= ", ";
241     }
242     $CommaFlag = 1;
243     $SearchTerm = $Keyword;
244     $SearchTerm =~ s/ /+/g;
245     $DocumentFinal .= "<A HREF=\"$ENV{'SCRIPT_NAME'}/GetSearchResults?Database=isis&FieldName1=keyword&FieldContent1=%22$SearchTerm%22\"> $Keyword </A>";
246     }
247     }
248     elsif ( $FieldTag eq "W" ) {
249     $DocumentFinal .= "<A HREF=\"$Document{$FieldTag}\" TARGET=\"BlankWindow\"> $Document{$FieldTag} </A>";
250     }
251     elsif ( $FieldTag eq "Y" ) {
252     $DocumentFinal .= "<PRE>$Document{$FieldTag}</PRE>";
253     }
254     else {
255     $DocumentFinal .= "$Document{$FieldTag}";
256     }
257    
258     $DocumentFinal .= "</TD></TR> \n";
259    
260     }
261    
262    
263     # List any external links we may have
264     if ( defined($Document{'G'}) ) {
265    
266     @OrderNumbers = split(/; /, $Document{'G'});
267    
268     $DocumentFinal .= "<TR><TD COLSPAN=2> <HR WIDTH=50%> </TD></TR>";
269     $DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=LEFT> <B> External Links: </B> </TD><TD VALIGN=TOP ALIGN=LEFT><UL>";
270    
271     foreach $OrderNumber ( @OrderNumbers ) {
272    
273     if ( $OrderNumber =~ /ISBN\s*(\S*)/ ) {
274     $OrderNumber = $1;
275     $OrderNumber =~ s/-//g;
276     $DocumentFinal .= "<LI><A HREF=\"http://www.amazon.com/exec/obidos/ASIN/$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Amazon.com </A></LI>";
277     $DocumentFinal .= "<LI><A HREF=\"http://search.borders.com/fcgi-bin/db2www/search/search.d2w/Details?mediaType=Book&searchType=ISBNUPC&code=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Borders </A></LI>";
278     $DocumentFinal .= "<LI><A HREF=\"http://shop.barnesandnoble.com/bookSearch/isbnInquiry.asp?isbn=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Barnes & Noble </A></LI>";
279     }
280     }
281    
282     $DocumentFinal .= "</UL></TD></TR>";
283     }
284    
285     $DocumentFinal .= "</TABLE> \n";
286    
287     return ($DocumentFinal);
288    
289     }
290    
291    
292    
293     #--------------------------------------------------------------------------
294     #
295     # Function: SummaryFilter()
296     #
297     # Purpose: This function is the summary filter
298     #
299     # Called by: external
300     #
301     # Parameters: $Database Database name
302     # $DocumentID Document ID
303     # $ItemName Item name
304     # $MimeType Mime type
305     # $DocumentRaw Raw document text
306     #
307     # Global Variables: none
308     #
309     # Returns: The filtered summary
310     #
311     sub SummaryFilter {
312    
313     my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
314    
315     my (%Document, $Summary);
316    
317    
318     if ( !defined($DocumentRaw) ) {
319     return (undef);
320     }
321    
322    
323     # Parse the document
324     %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
325    
326    
327     # Select the abstract if available
328     if ( defined($Document{'X'}) ) {
329     $Summary = $Document{'X'};
330     }
331    
332     # Select the table of contents if available
333     elsif ( defined($Document{'Y'}) ) {
334     $Summary = $Document{'Y'};
335     }
336    
337     # Select the keywords if available
338     elsif ( defined($Document{'K'}) ) {
339     $Summary = $Document{'K'};
340     }
341    
342    
343     # Clean the summary text
344     if ( defined($Summary) ) {
345     # First clean up the text
346     if ( index($Summary, "\r\n") >= 0 ) {
347     $Summary =~ s/\r//gs;
348     }
349     elsif ( index($Summary, "\r") >= 0 ) {
350     $Summary =~ s/\r/\n/gs;
351     }
352     $Summary =~ s/\n/ /gs;
353     $Summary =~ s/\s+/ /gs;
354     $Summary = ucfirst($Summary);
355     }
356    
357     return ($Summary);
358    
359     }
360    
361    
362    
363     #--------------------------------------------------------------------------
364     #
365     # Function: RelevanceFeedbackFilter()
366     #
367     # Purpose: This function is the relevance feedback filter
368     #
369     # Called by: external
370     #
371     # Parameters: $Database Database name
372     # $DocumentID Document ID
373     # $ItemName Item name
374     # $MimeType Mime type
375     # $DocumentRaw Raw document text
376     #
377     # Global Variables: none
378     #
379     # Returns: The filtered relevance feedback
380     #
381     sub RelevanceFeedbackFilter {
382    
383     my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
384    
385     my (%Document, $RelevanceFeedback);
386    
387    
388     if ( !defined($DocumentRaw) ) {
389     return (undef);
390     }
391    
392    
393     # Parse the document
394     %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
395    
396    
397     # Select the abstract if available
398     if ( defined($Document{'X'}) ) {
399     $RelevanceFeedback = $Document{'X'};
400     }
401    
402     # Select the table of contents if available
403     elsif ( defined($Document{'Y'}) ) {
404     $RelevanceFeedback = $Document{'Y'};
405     }
406    
407     # Select the keywords if available
408     elsif ( defined($Document{'K'}) ) {
409     $RelevanceFeedback = $Document{'K'};
410     }
411    
412     # Select the title if available
413     elsif ( defined($Document{'T'}) ) {
414     $RelevanceFeedback = $Document{'T'};
415     }
416    
417     # Select the author if available
418     elsif ( defined($Document{'A'}) ) {
419     $RelevanceFeedback = $Document{'A'};
420     }
421    
422     return ($RelevanceFeedback);
423    
424     }
425    
426    
427    
428     #--------------------------------------------------------------------------
429    
430     1;

  ViewVC Help
Powered by ViewVC 1.1.26