/[local]/search/filters/default.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /search/filters/default.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (hide annotations)
Sun Jun 16 19:39:45 2002 UTC (17 years, 3 months ago) by dpavlin
Branch: MAIN
Changes since 1.3: +2 -2 lines
File MIME type: text/plain
fix here, fix there. removed unused code, more configuration
checks

1 dpavlin 1.1 #!/usr/bin/perl
2    
3     #--------------------------------------------------------------------------
4     #
5     # Author: Dobrica Pavlinusic
6     # Creation Date: 2002-03-17
7    
8    
9     #--------------------------------------------------------------------------
10     #
11     # Description:
12     #
13     # This package implements the various filters for this database
14     #
15    
16     #--------------------------------------------------------------------------
17     #
18     # Pragmatic modules
19     #
20    
21     use strict;
22    
23    
24     #--------------------------------------------------------------------------
25     #
26     # Package definition
27     #
28    
29     package default;
30    
31    
32     #--------------------------------------------------------------------------
33     #
34     # Application Constants
35     #
36    
37     #--------------------------------------------------------------------------
38     #
39     # Function: DocumentParser()
40     #
41     # Purpose: This function serves as a document parser
42     #
43     # Called by: DocumentFilter(), SummaryFilter()
44     #
45     # Parameters: $Database Database name
46     # $DocumentID Document ID
47     # $ItemName Item name
48     # $MimeType Mime type
49     # $DocumentRaw Raw document text
50     #
51     # Global Variables:
52     #
53     # Returns: A hash table of the document fields
54     #
55     sub DocumentParser {
56    
57     my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
58    
59     my (%Document, @DocumentLines, $DocumentLine);
60     my ($FieldTag, $FieldData, $Separator);
61    
62     @DocumentLines = split(/\n/, $DocumentRaw);
63    
64     # Extract the document
65     foreach $DocumentLine ( @DocumentLines ) {
66     # check if this is a new field?
67     if ( $DocumentLine =~ /%(\S+)\s*(.*)/ ) {
68     $FieldTag = $1;
69     $FieldData = $2;
70     if ($default::Separator{$FieldTag}) {
71     $Separator = $default::Separator{$FieldTag};
72     } else {
73     $Separator = ";";
74     }
75     }
76     else {
77     $FieldData = $DocumentLine;
78     $Separator = "\n";
79     }
80    
81     if ( defined($Document{$FieldTag}) ) {
82     $Document{$FieldTag} .= $Separator . $FieldData;
83     }
84     else {
85     $Document{$FieldTag} = $FieldData;
86     }
87     }
88    
89     return (%Document);
90    
91     }
92    
93    
94     #--------------------------------------------------------------------------
95     #
96     # Function: DocumentFilter()
97     #
98     # Purpose: This function is the document filter
99     #
100     # Called by: external
101     #
102     # Parameters: $Database Database name
103     # $DocumentID Document ID
104     # $ItemName Item name
105     # $MimeType Mime type
106     # $DocumentRaw Raw document text
107     #
108     # Global Variables: %default::FieldNames, @default::FieldDisplayOrder
109     #
110     # Returns: The filtered document
111     #
112     sub DocumentFilter {
113    
114     my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
115    
116     my (%Document, $DocumentFinal, $FieldTag, $CommaFlag);
117     my ($SearchTerm, @Authors, $Author, @AuthorNames, $AuthorName, @Keywords, $Keyword, @OrderNumbers, $OrderNumber);
118    
119     if ( !defined($DocumentRaw) ) {
120     return (undef);
121     }
122    
123    
124     %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
125    
126     $DocumentFinal = "<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=2 WIDTH=100%> \n";
127     # $DocumentFinal .= "<TR><TD COLSPAN=2 VALIGN=TOP ALIGN=CENTER><H3> $Document{'T'} </H3></TD></TR> \n";
128    
129     # Loop over each line in the document list
130     foreach $FieldTag ( @default::FieldDisplayOrder ) {
131 dpavlin 1.4
132     # print "-- $FieldTag = $Document{$FieldTag} --";
133 dpavlin 1.1
134     # Skip empty slots
135     if ( !(defined($Document{$FieldTag}) && ($Document{$FieldTag} ne "")) ) {
136     next;
137     }
138    
139    
140     # Print the row start
141     $DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=RIGHT BGCOLOR=\"#EEEEEE\" WIDTH=\"130\"> $default::FieldNames{$FieldTag}: </TD><TD VALIGN=TOP ALIGN=LEFT> ";
142    
143 dpavlin 1.3 sub html_enc {
144     my $tmp = $_[0];
145     $tmp =~ s/&/&and;/g;
146     $tmp =~ s/</&lt;/g;
147     $tmp =~ s/>/&gt;/g;
148     # fix some tags
149     $tmp =~ s/&lt;br&gt;/<br>/gi;
150     return $tmp;
151     }
152    
153     $Document{$FieldTag} = html_enc($Document{$FieldTag});
154 dpavlin 1.2
155 dpavlin 1.1 # Print the field data, marking up if needed
156     if ( $FieldTag eq "A" || $FieldTag eq "E" ) {
157     $CommaFlag = 0;
158     @Authors = split(/;/, $Document{$FieldTag});
159     foreach $Author ( @Authors ) {
160     if ( $CommaFlag ) {
161     $DocumentFinal .= ", ";
162     }
163     $CommaFlag = 1;
164     $SearchTerm = $Author;
165     $SearchTerm =~ s/,//g;
166     @AuthorNames = split(/ /, $SearchTerm);
167     $SearchTerm = "";
168     foreach $AuthorName ( @AuthorNames ) {
169     if ( ! ($AuthorName =~ /^\w+\.$/) ) {
170     if ( $AuthorName ne "" ) {
171     $AuthorName .= " ";
172     }
173     $SearchTerm .= $AuthorName;
174     }
175     }
176     $SearchTerm =~ s/ /+/g;
177     $DocumentFinal .= "<A HREF=\"$ENV{'SCRIPT_NAME'}/GetSearchResults?Database=$Database&FieldName1=author&FieldContent1=%22$SearchTerm%22\"> $Author </A>";
178     }
179     }
180     elsif ( $FieldTag eq "K" ) {
181     $CommaFlag = 0;
182     @Keywords = split(/,/, $Document{$FieldTag});
183     foreach $Keyword ( @Keywords ) {
184     if ( $CommaFlag ) {
185     $DocumentFinal .= ", ";
186     }
187     $CommaFlag = 1;
188     $SearchTerm = $Keyword;
189     $SearchTerm =~ s/ /+/g;
190     $DocumentFinal .= "<A HREF=\"$ENV{'SCRIPT_NAME'}/GetSearchResults?Database=$Database&FieldName1=keyword&FieldContent1=%22$SearchTerm%22\"> $Keyword </A>";
191     }
192     }
193     elsif ( $FieldTag eq "W" ) {
194     $DocumentFinal .= "<A HREF=\"$Document{$FieldTag}\" TARGET=\"BlankWindow\"> $Document{$FieldTag} </A>";
195     }
196     elsif ( $FieldTag eq "Y" ) {
197     $DocumentFinal .= "<PRE>$Document{$FieldTag}</PRE>";
198     }
199     else {
200     $DocumentFinal .= "$Document{$FieldTag}";
201     }
202    
203     $DocumentFinal .= "</TD></TR> \n";
204    
205     }
206    
207    
208     # List any external links we may have
209     if ( defined($Document{'G'}) ) {
210    
211     @OrderNumbers = split(/; /, $Document{'G'});
212    
213     $DocumentFinal .= "<TR><TD COLSPAN=2> <HR WIDTH=50%> </TD></TR>";
214     $DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=LEFT> <B> External Links: </B> </TD><TD VALIGN=TOP ALIGN=LEFT><UL>";
215    
216     foreach $OrderNumber ( @OrderNumbers ) {
217    
218     if ( $OrderNumber =~ /ISBN\s*(\S*)/ ) {
219     $OrderNumber = $1;
220     $OrderNumber =~ s/-//g;
221     $DocumentFinal .= "<LI><A HREF=\"http://www.amazon.com/exec/obidos/ASIN/$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Amazon.com </A></LI>";
222     $DocumentFinal .= "<LI><A HREF=\"http://search.borders.com/fcgi-bin/db2www/search/search.d2w/Details?mediaType=Book&searchType=ISBNUPC&code=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Borders </A></LI>";
223     $DocumentFinal .= "<LI><A HREF=\"http://shop.barnesandnoble.com/bookSearch/isbnInquiry.asp?isbn=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Barnes & Noble </A></LI>";
224     }
225     }
226    
227     $DocumentFinal .= "</UL></TD></TR>";
228     }
229    
230     $DocumentFinal .= "</TABLE> \n";
231    
232     return ($DocumentFinal);
233    
234     }
235    
236    
237    
238     #--------------------------------------------------------------------------
239     #
240     # Function: SummaryFilter()
241     #
242     # Purpose: This function is the summary filter
243     #
244     # Called by: external
245     #
246     # Parameters: $Database Database name
247     # $DocumentID Document ID
248     # $ItemName Item name
249     # $MimeType Mime type
250     # $DocumentRaw Raw document text
251     #
252     # Global Variables: none
253     #
254     # Returns: The filtered summary
255     #
256     sub SummaryFilter {
257    
258     my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
259    
260     my (%Document, $Summary);
261    
262    
263     if ( !defined($DocumentRaw) ) {
264     return (undef);
265     }
266    
267    
268     # Parse the document
269     %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
270    
271    
272     # Select the abstract if available
273     if ( defined($Document{'X'}) ) {
274     $Summary = $Document{'X'};
275     }
276    
277     # Select the table of contents if available
278     elsif ( defined($Document{'Y'}) ) {
279     $Summary = $Document{'Y'};
280     }
281    
282     # Select the keywords if available
283     elsif ( defined($Document{'K'}) ) {
284     $Summary = $Document{'K'};
285     }
286    
287    
288     # Clean the summary text
289     if ( defined($Summary) ) {
290     # First clean up the text
291     if ( index($Summary, "\r\n") >= 0 ) {
292     $Summary =~ s/\r//gs;
293     }
294     elsif ( index($Summary, "\r") >= 0 ) {
295     $Summary =~ s/\r/\n/gs;
296     }
297     $Summary =~ s/\n/ /gs;
298     $Summary =~ s/\s+/ /gs;
299     $Summary = ucfirst($Summary);
300     }
301    
302     return ($Summary);
303    
304     }
305    
306    
307    
308     #--------------------------------------------------------------------------
309     #
310     # Function: RelevanceFeedbackFilter()
311     #
312     # Purpose: This function is the relevance feedback filter
313     #
314     # Called by: external
315     #
316     # Parameters: $Database Database name
317     # $DocumentID Document ID
318     # $ItemName Item name
319     # $MimeType Mime type
320     # $DocumentRaw Raw document text
321     #
322     # Global Variables: none
323     #
324     # Returns: The filtered relevance feedback
325     #
326     sub RelevanceFeedbackFilter {
327    
328     my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
329    
330     my (%Document, $RelevanceFeedback);
331    
332    
333     if ( !defined($DocumentRaw) ) {
334     return (undef);
335     }
336    
337    
338     # Parse the document
339     %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
340    
341    
342     # Select the abstract if available
343     if ( defined($Document{'X'}) ) {
344     $RelevanceFeedback = $Document{'X'};
345     }
346    
347     # Select the table of contents if available
348     elsif ( defined($Document{'Y'}) ) {
349     $RelevanceFeedback = $Document{'Y'};
350     }
351    
352     # Select the keywords if available
353     elsif ( defined($Document{'K'}) ) {
354     $RelevanceFeedback = $Document{'K'};
355     }
356    
357     # Select the title if available
358     elsif ( defined($Document{'T'}) ) {
359     $RelevanceFeedback = $Document{'T'};
360     }
361    
362     # Select the author if available
363     elsif ( defined($Document{'A'}) ) {
364     $RelevanceFeedback = $Document{'A'};
365     }
366    
367     return ($RelevanceFeedback);
368    
369     }
370    
371    
372    
373     #--------------------------------------------------------------------------
374    
375     1;

  ViewVC Help
Powered by ViewVC 1.1.26