/[local]/search/filters/default.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /search/filters/default.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (hide annotations)
Fri Jun 14 18:10:27 2002 UTC (17 years, 1 month ago) by dpavlin
Branch: MAIN
Changes since 1.1: +4 -0 lines
File MIME type: text/plain
escape some html entities

1 dpavlin 1.1 #!/usr/bin/perl
2    
3     #--------------------------------------------------------------------------
4     #
5     # Author: Dobrica Pavlinusic
6     # Creation Date: 2002-03-17
7    
8    
9     #--------------------------------------------------------------------------
10     #
11     # Description:
12     #
13     # This package implements the various filters for this database
14     #
15    
16     #--------------------------------------------------------------------------
17     #
18     # Pragmatic modules
19     #
20    
21     use strict;
22    
23    
24     #--------------------------------------------------------------------------
25     #
26     # Package definition
27     #
28    
29     package default;
30    
31    
32     #--------------------------------------------------------------------------
33     #
34     # Application Constants
35     #
36    
37     # Field Names
38     %default::FieldNames = (
39     '700+', 'Autor(i)',
40     '200', 'Naslov',
41     '205', 'Izdanje',
42     '210', 'Izdavanje',
43     '215', 'Materijalni opis',
44     '225', 'Nakladnièka cjelina',
45     '300+', 'Napomena',
46     '330', 'Sadr¾aj',
47     '423', 'Prikriveni radovi',
48     '464', 'Analitièki radovi',
49     '610', 'Kljuène rijeèi',
50     '675+', 'Klasifik. oznaka',
51     '990', 'Signatura',
52     '991', 'Inventarni broj',
53     'ISBN', 'ISBN',
54     'MFN', 'MFN',
55     '994a', 'Knji¾nica',
56    
57    
58     'A', 'Author(s)',
59     'B', 'Book',
60     'C', 'City',
61     'D', 'Date',
62     'E', 'Editor',
63     'F', 'Footnote',
64     'G', 'Order Info',
65     'H', 'Commentary',
66     'I', 'Publisher',
67     'J', 'Journal',
68     'K', 'Keyword',
69     'L', 'Label',
70     'M', 'Bell',
71     'N', 'Issue',
72     'O', 'Other',
73     'P', 'Page',
74     'Q', 'Corporate',
75     'R', 'Report',
76     'S', 'Series',
77     'T', 'Title',
78     'U', 'Annotation',
79     'V', 'Volume',
80     'W', 'URL',
81     'X', 'Abstract',
82     'Y', 'Table of Contents',
83     'Z', 'Reference',
84     '$', 'Price',
85     '*', 'Copyright',
86     '^', 'Parts',
87     );
88    
89    
90    
91     # Field Display Order
92     @default::FieldDisplayOrder = (
93     'ISBN',
94     '700+',
95     '200',
96     '205',
97     '210',
98     '215',
99     '225',
100     '300+',
101     '330',
102     '423',
103     '464',
104     '610',
105     '675+',
106     '990',
107     '991',
108     'MFN',
109     '994a',
110    
111     'M',
112     'L',
113     'T',
114     'S',
115     'E',
116     'Q',
117     'A',
118     'J',
119     'B',
120     'D',
121     'V',
122     'N',
123     'P',
124     'C',
125     'I',
126     'G',
127     '$',
128     'K',
129     '*',
130     'W',
131     'O',
132     'Y',
133     'X',
134     'R',
135     '.',
136     'F',
137     'H',
138     'U',
139     'Z',
140     '^'
141     );
142    
143     # separators
144     %default::Separator = (
145     '700+', ', ',
146     '990', ' ',
147     '991', ', ',
148     '300+', '.- ',
149     '225', ', ',
150     '610', ' * ',
151    
152    
153     );
154    
155     #--------------------------------------------------------------------------
156     #
157     # Function: DocumentParser()
158     #
159     # Purpose: This function serves as a document parser
160     #
161     # Called by: DocumentFilter(), SummaryFilter()
162     #
163     # Parameters: $Database Database name
164     # $DocumentID Document ID
165     # $ItemName Item name
166     # $MimeType Mime type
167     # $DocumentRaw Raw document text
168     #
169     # Global Variables:
170     #
171     # Returns: A hash table of the document fields
172     #
173     sub DocumentParser {
174    
175     my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
176    
177     my (%Document, @DocumentLines, $DocumentLine);
178     my ($FieldTag, $FieldData, $Separator);
179    
180     @DocumentLines = split(/\n/, $DocumentRaw);
181    
182     # Extract the document
183     foreach $DocumentLine ( @DocumentLines ) {
184     # check if this is a new field?
185     if ( $DocumentLine =~ /%(\S+)\s*(.*)/ ) {
186     $FieldTag = $1;
187     $FieldData = $2;
188     if ($default::Separator{$FieldTag}) {
189     $Separator = $default::Separator{$FieldTag};
190     } else {
191     $Separator = ";";
192     }
193     }
194     else {
195     $FieldData = $DocumentLine;
196     $Separator = "\n";
197     }
198    
199     if ( defined($Document{$FieldTag}) ) {
200     $Document{$FieldTag} .= $Separator . $FieldData;
201     }
202     else {
203     $Document{$FieldTag} = $FieldData;
204     }
205     }
206    
207     return (%Document);
208    
209     }
210    
211    
212     #--------------------------------------------------------------------------
213     #
214     # Function: DocumentFilter()
215     #
216     # Purpose: This function is the document filter
217     #
218     # Called by: external
219     #
220     # Parameters: $Database Database name
221     # $DocumentID Document ID
222     # $ItemName Item name
223     # $MimeType Mime type
224     # $DocumentRaw Raw document text
225     #
226     # Global Variables: %default::FieldNames, @default::FieldDisplayOrder
227     #
228     # Returns: The filtered document
229     #
230     sub DocumentFilter {
231    
232     my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
233    
234     my (%Document, $DocumentFinal, $FieldTag, $CommaFlag);
235     my ($SearchTerm, @Authors, $Author, @AuthorNames, $AuthorName, @Keywords, $Keyword, @OrderNumbers, $OrderNumber);
236    
237     if ( !defined($DocumentRaw) ) {
238     return (undef);
239     }
240    
241    
242     %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
243    
244     $DocumentFinal = "<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=2 WIDTH=100%> \n";
245     # $DocumentFinal .= "<TR><TD COLSPAN=2 VALIGN=TOP ALIGN=CENTER><H3> $Document{'T'} </H3></TD></TR> \n";
246    
247     # Loop over each line in the document list
248     foreach $FieldTag ( @default::FieldDisplayOrder ) {
249    
250     # Skip empty slots
251     if ( !(defined($Document{$FieldTag}) && ($Document{$FieldTag} ne "")) ) {
252     next;
253     }
254    
255    
256     # Print the row start
257     $DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=RIGHT BGCOLOR=\"#EEEEEE\" WIDTH=\"130\"> $default::FieldNames{$FieldTag}: </TD><TD VALIGN=TOP ALIGN=LEFT> ";
258    
259 dpavlin 1.2 $Document{$FieldTag} =~ s/&/&and;/g;
260     $Document{$FieldTag} =~ s/</&lt;/g;
261     $Document{$FieldTag} =~ s/>/&gt;/g;
262    
263 dpavlin 1.1 # Print the field data, marking up if needed
264     if ( $FieldTag eq "A" || $FieldTag eq "E" ) {
265     $CommaFlag = 0;
266     @Authors = split(/;/, $Document{$FieldTag});
267     foreach $Author ( @Authors ) {
268     if ( $CommaFlag ) {
269     $DocumentFinal .= ", ";
270     }
271     $CommaFlag = 1;
272     $SearchTerm = $Author;
273     $SearchTerm =~ s/,//g;
274     @AuthorNames = split(/ /, $SearchTerm);
275     $SearchTerm = "";
276     foreach $AuthorName ( @AuthorNames ) {
277     if ( ! ($AuthorName =~ /^\w+\.$/) ) {
278     if ( $AuthorName ne "" ) {
279     $AuthorName .= " ";
280     }
281     $SearchTerm .= $AuthorName;
282     }
283     }
284     $SearchTerm =~ s/ /+/g;
285     $DocumentFinal .= "<A HREF=\"$ENV{'SCRIPT_NAME'}/GetSearchResults?Database=$Database&FieldName1=author&FieldContent1=%22$SearchTerm%22\"> $Author </A>";
286     }
287     }
288     elsif ( $FieldTag eq "K" ) {
289     $CommaFlag = 0;
290     @Keywords = split(/,/, $Document{$FieldTag});
291     foreach $Keyword ( @Keywords ) {
292     if ( $CommaFlag ) {
293     $DocumentFinal .= ", ";
294     }
295     $CommaFlag = 1;
296     $SearchTerm = $Keyword;
297     $SearchTerm =~ s/ /+/g;
298     $DocumentFinal .= "<A HREF=\"$ENV{'SCRIPT_NAME'}/GetSearchResults?Database=$Database&FieldName1=keyword&FieldContent1=%22$SearchTerm%22\"> $Keyword </A>";
299     }
300     }
301     elsif ( $FieldTag eq "W" ) {
302     $DocumentFinal .= "<A HREF=\"$Document{$FieldTag}\" TARGET=\"BlankWindow\"> $Document{$FieldTag} </A>";
303     }
304     elsif ( $FieldTag eq "Y" ) {
305     $DocumentFinal .= "<PRE>$Document{$FieldTag}</PRE>";
306     }
307     else {
308     $DocumentFinal .= "$Document{$FieldTag}";
309     }
310    
311     $DocumentFinal .= "</TD></TR> \n";
312    
313     }
314    
315    
316     # List any external links we may have
317     if ( defined($Document{'G'}) ) {
318    
319     @OrderNumbers = split(/; /, $Document{'G'});
320    
321     $DocumentFinal .= "<TR><TD COLSPAN=2> <HR WIDTH=50%> </TD></TR>";
322     $DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=LEFT> <B> External Links: </B> </TD><TD VALIGN=TOP ALIGN=LEFT><UL>";
323    
324     foreach $OrderNumber ( @OrderNumbers ) {
325    
326     if ( $OrderNumber =~ /ISBN\s*(\S*)/ ) {
327     $OrderNumber = $1;
328     $OrderNumber =~ s/-//g;
329     $DocumentFinal .= "<LI><A HREF=\"http://www.amazon.com/exec/obidos/ASIN/$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Amazon.com </A></LI>";
330     $DocumentFinal .= "<LI><A HREF=\"http://search.borders.com/fcgi-bin/db2www/search/search.d2w/Details?mediaType=Book&searchType=ISBNUPC&code=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Borders </A></LI>";
331     $DocumentFinal .= "<LI><A HREF=\"http://shop.barnesandnoble.com/bookSearch/isbnInquiry.asp?isbn=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Barnes & Noble </A></LI>";
332     }
333     }
334    
335     $DocumentFinal .= "</UL></TD></TR>";
336     }
337    
338     $DocumentFinal .= "</TABLE> \n";
339    
340     return ($DocumentFinal);
341    
342     }
343    
344    
345    
346     #--------------------------------------------------------------------------
347     #
348     # Function: SummaryFilter()
349     #
350     # Purpose: This function is the summary filter
351     #
352     # Called by: external
353     #
354     # Parameters: $Database Database name
355     # $DocumentID Document ID
356     # $ItemName Item name
357     # $MimeType Mime type
358     # $DocumentRaw Raw document text
359     #
360     # Global Variables: none
361     #
362     # Returns: The filtered summary
363     #
364     sub SummaryFilter {
365    
366     my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
367    
368     my (%Document, $Summary);
369    
370    
371     if ( !defined($DocumentRaw) ) {
372     return (undef);
373     }
374    
375    
376     # Parse the document
377     %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
378    
379    
380     # Select the abstract if available
381     if ( defined($Document{'X'}) ) {
382     $Summary = $Document{'X'};
383     }
384    
385     # Select the table of contents if available
386     elsif ( defined($Document{'Y'}) ) {
387     $Summary = $Document{'Y'};
388     }
389    
390     # Select the keywords if available
391     elsif ( defined($Document{'K'}) ) {
392     $Summary = $Document{'K'};
393     }
394    
395    
396     # Clean the summary text
397     if ( defined($Summary) ) {
398     # First clean up the text
399     if ( index($Summary, "\r\n") >= 0 ) {
400     $Summary =~ s/\r//gs;
401     }
402     elsif ( index($Summary, "\r") >= 0 ) {
403     $Summary =~ s/\r/\n/gs;
404     }
405     $Summary =~ s/\n/ /gs;
406     $Summary =~ s/\s+/ /gs;
407     $Summary = ucfirst($Summary);
408     }
409    
410     return ($Summary);
411    
412     }
413    
414    
415    
416     #--------------------------------------------------------------------------
417     #
418     # Function: RelevanceFeedbackFilter()
419     #
420     # Purpose: This function is the relevance feedback filter
421     #
422     # Called by: external
423     #
424     # Parameters: $Database Database name
425     # $DocumentID Document ID
426     # $ItemName Item name
427     # $MimeType Mime type
428     # $DocumentRaw Raw document text
429     #
430     # Global Variables: none
431     #
432     # Returns: The filtered relevance feedback
433     #
434     sub RelevanceFeedbackFilter {
435    
436     my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
437    
438     my (%Document, $RelevanceFeedback);
439    
440    
441     if ( !defined($DocumentRaw) ) {
442     return (undef);
443     }
444    
445    
446     # Parse the document
447     %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
448    
449    
450     # Select the abstract if available
451     if ( defined($Document{'X'}) ) {
452     $RelevanceFeedback = $Document{'X'};
453     }
454    
455     # Select the table of contents if available
456     elsif ( defined($Document{'Y'}) ) {
457     $RelevanceFeedback = $Document{'Y'};
458     }
459    
460     # Select the keywords if available
461     elsif ( defined($Document{'K'}) ) {
462     $RelevanceFeedback = $Document{'K'};
463     }
464    
465     # Select the title if available
466     elsif ( defined($Document{'T'}) ) {
467     $RelevanceFeedback = $Document{'T'};
468     }
469    
470     # Select the author if available
471     elsif ( defined($Document{'A'}) ) {
472     $RelevanceFeedback = $Document{'A'};
473     }
474    
475     return ($RelevanceFeedback);
476    
477     }
478    
479    
480    
481     #--------------------------------------------------------------------------
482    
483     1;

  ViewVC Help
Powered by ViewVC 1.1.26