/[local]/search/filters/default.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /search/filters/default.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1.1.1 - (hide annotations) (vendor branch)
Thu Jun 13 17:25:53 2002 UTC (17 years, 1 month ago) by dpavlin
Branch: DbP
CVS Tags: r0
Changes since 1.1: +0 -0 lines
File MIME type: text/plain
initial import

1 dpavlin 1.1 #!/usr/bin/perl
2    
3     #--------------------------------------------------------------------------
4     #
5     # Author: Dobrica Pavlinusic
6     # Creation Date: 2002-03-17
7    
8    
9     #--------------------------------------------------------------------------
10     #
11     # Description:
12     #
13     # This package implements the various filters for this database
14     #
15    
16     #--------------------------------------------------------------------------
17     #
18     # Pragmatic modules
19     #
20    
21     use strict;
22    
23    
24     #--------------------------------------------------------------------------
25     #
26     # Package definition
27     #
28    
29     package default;
30    
31    
32     #--------------------------------------------------------------------------
33     #
34     # Application Constants
35     #
36    
37     # Field Names
38     %default::FieldNames = (
39     '700+', 'Autor(i)',
40     '200', 'Naslov',
41     '205', 'Izdanje',
42     '210', 'Izdavanje',
43     '215', 'Materijalni opis',
44     '225', 'Nakladnièka cjelina',
45     '300+', 'Napomena',
46     '330', 'Sadr¾aj',
47     '423', 'Prikriveni radovi',
48     '464', 'Analitièki radovi',
49     '610', 'Kljuène rijeèi',
50     '675+', 'Klasifik. oznaka',
51     '990', 'Signatura',
52     '991', 'Inventarni broj',
53     'ISBN', 'ISBN',
54     'MFN', 'MFN',
55     '994a', 'Knji¾nica',
56    
57    
58     'A', 'Author(s)',
59     'B', 'Book',
60     'C', 'City',
61     'D', 'Date',
62     'E', 'Editor',
63     'F', 'Footnote',
64     'G', 'Order Info',
65     'H', 'Commentary',
66     'I', 'Publisher',
67     'J', 'Journal',
68     'K', 'Keyword',
69     'L', 'Label',
70     'M', 'Bell',
71     'N', 'Issue',
72     'O', 'Other',
73     'P', 'Page',
74     'Q', 'Corporate',
75     'R', 'Report',
76     'S', 'Series',
77     'T', 'Title',
78     'U', 'Annotation',
79     'V', 'Volume',
80     'W', 'URL',
81     'X', 'Abstract',
82     'Y', 'Table of Contents',
83     'Z', 'Reference',
84     '$', 'Price',
85     '*', 'Copyright',
86     '^', 'Parts',
87     );
88    
89    
90    
91     # Field Display Order
92     @default::FieldDisplayOrder = (
93     'ISBN',
94     '700+',
95     '200',
96     '205',
97     '210',
98     '215',
99     '225',
100     '300+',
101     '330',
102     '423',
103     '464',
104     '610',
105     '675+',
106     '990',
107     '991',
108     'MFN',
109     '994a',
110    
111     'M',
112     'L',
113     'T',
114     'S',
115     'E',
116     'Q',
117     'A',
118     'J',
119     'B',
120     'D',
121     'V',
122     'N',
123     'P',
124     'C',
125     'I',
126     'G',
127     '$',
128     'K',
129     '*',
130     'W',
131     'O',
132     'Y',
133     'X',
134     'R',
135     '.',
136     'F',
137     'H',
138     'U',
139     'Z',
140     '^'
141     );
142    
143     # separators
144     %default::Separator = (
145     '700+', ', ',
146     '990', ' ',
147     '991', ', ',
148     '300+', '.- ',
149     '225', ', ',
150     '610', ' * ',
151    
152    
153     );
154    
155     #--------------------------------------------------------------------------
156     #
157     # Function: DocumentParser()
158     #
159     # Purpose: This function serves as a document parser
160     #
161     # Called by: DocumentFilter(), SummaryFilter()
162     #
163     # Parameters: $Database Database name
164     # $DocumentID Document ID
165     # $ItemName Item name
166     # $MimeType Mime type
167     # $DocumentRaw Raw document text
168     #
169     # Global Variables:
170     #
171     # Returns: A hash table of the document fields
172     #
173     sub DocumentParser {
174    
175     my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
176    
177     my (%Document, @DocumentLines, $DocumentLine);
178     my ($FieldTag, $FieldData, $Separator);
179    
180     @DocumentLines = split(/\n/, $DocumentRaw);
181    
182     # Extract the document
183     foreach $DocumentLine ( @DocumentLines ) {
184     # check if this is a new field?
185     if ( $DocumentLine =~ /%(\S+)\s*(.*)/ ) {
186     $FieldTag = $1;
187     $FieldData = $2;
188     if ($default::Separator{$FieldTag}) {
189     $Separator = $default::Separator{$FieldTag};
190     } else {
191     $Separator = ";";
192     }
193     }
194     else {
195     $FieldData = $DocumentLine;
196     $Separator = "\n";
197     }
198    
199     if ( defined($Document{$FieldTag}) ) {
200     $Document{$FieldTag} .= $Separator . $FieldData;
201     }
202     else {
203     $Document{$FieldTag} = $FieldData;
204     }
205     }
206    
207     return (%Document);
208    
209     }
210    
211    
212     #--------------------------------------------------------------------------
213     #
214     # Function: DocumentFilter()
215     #
216     # Purpose: This function is the document filter
217     #
218     # Called by: external
219     #
220     # Parameters: $Database Database name
221     # $DocumentID Document ID
222     # $ItemName Item name
223     # $MimeType Mime type
224     # $DocumentRaw Raw document text
225     #
226     # Global Variables: %default::FieldNames, @default::FieldDisplayOrder
227     #
228     # Returns: The filtered document
229     #
230     sub DocumentFilter {
231    
232     my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
233    
234     my (%Document, $DocumentFinal, $FieldTag, $CommaFlag);
235     my ($SearchTerm, @Authors, $Author, @AuthorNames, $AuthorName, @Keywords, $Keyword, @OrderNumbers, $OrderNumber);
236    
237     if ( !defined($DocumentRaw) ) {
238     return (undef);
239     }
240    
241    
242     %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
243    
244     $DocumentFinal = "<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=2 WIDTH=100%> \n";
245     # $DocumentFinal .= "<TR><TD COLSPAN=2 VALIGN=TOP ALIGN=CENTER><H3> $Document{'T'} </H3></TD></TR> \n";
246    
247     # Loop over each line in the document list
248     foreach $FieldTag ( @default::FieldDisplayOrder ) {
249    
250     # Skip empty slots
251     if ( !(defined($Document{$FieldTag}) && ($Document{$FieldTag} ne "")) ) {
252     next;
253     }
254    
255    
256     # Print the row start
257     $DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=RIGHT BGCOLOR=\"#EEEEEE\" WIDTH=\"130\"> $default::FieldNames{$FieldTag}: </TD><TD VALIGN=TOP ALIGN=LEFT> ";
258    
259     # Print the field data, marking up if needed
260     if ( $FieldTag eq "A" || $FieldTag eq "E" ) {
261     $CommaFlag = 0;
262     @Authors = split(/;/, $Document{$FieldTag});
263     foreach $Author ( @Authors ) {
264     if ( $CommaFlag ) {
265     $DocumentFinal .= ", ";
266     }
267     $CommaFlag = 1;
268     $SearchTerm = $Author;
269     $SearchTerm =~ s/,//g;
270     @AuthorNames = split(/ /, $SearchTerm);
271     $SearchTerm = "";
272     foreach $AuthorName ( @AuthorNames ) {
273     if ( ! ($AuthorName =~ /^\w+\.$/) ) {
274     if ( $AuthorName ne "" ) {
275     $AuthorName .= " ";
276     }
277     $SearchTerm .= $AuthorName;
278     }
279     }
280     $SearchTerm =~ s/ /+/g;
281     $DocumentFinal .= "<A HREF=\"$ENV{'SCRIPT_NAME'}/GetSearchResults?Database=$Database&FieldName1=author&FieldContent1=%22$SearchTerm%22\"> $Author </A>";
282     }
283     }
284     elsif ( $FieldTag eq "K" ) {
285     $CommaFlag = 0;
286     @Keywords = split(/,/, $Document{$FieldTag});
287     foreach $Keyword ( @Keywords ) {
288     if ( $CommaFlag ) {
289     $DocumentFinal .= ", ";
290     }
291     $CommaFlag = 1;
292     $SearchTerm = $Keyword;
293     $SearchTerm =~ s/ /+/g;
294     $DocumentFinal .= "<A HREF=\"$ENV{'SCRIPT_NAME'}/GetSearchResults?Database=$Database&FieldName1=keyword&FieldContent1=%22$SearchTerm%22\"> $Keyword </A>";
295     }
296     }
297     elsif ( $FieldTag eq "W" ) {
298     $DocumentFinal .= "<A HREF=\"$Document{$FieldTag}\" TARGET=\"BlankWindow\"> $Document{$FieldTag} </A>";
299     }
300     elsif ( $FieldTag eq "Y" ) {
301     $DocumentFinal .= "<PRE>$Document{$FieldTag}</PRE>";
302     }
303     else {
304     $DocumentFinal .= "$Document{$FieldTag}";
305     }
306    
307     $DocumentFinal .= "</TD></TR> \n";
308    
309     }
310    
311    
312     # List any external links we may have
313     if ( defined($Document{'G'}) ) {
314    
315     @OrderNumbers = split(/; /, $Document{'G'});
316    
317     $DocumentFinal .= "<TR><TD COLSPAN=2> <HR WIDTH=50%> </TD></TR>";
318     $DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=LEFT> <B> External Links: </B> </TD><TD VALIGN=TOP ALIGN=LEFT><UL>";
319    
320     foreach $OrderNumber ( @OrderNumbers ) {
321    
322     if ( $OrderNumber =~ /ISBN\s*(\S*)/ ) {
323     $OrderNumber = $1;
324     $OrderNumber =~ s/-//g;
325     $DocumentFinal .= "<LI><A HREF=\"http://www.amazon.com/exec/obidos/ASIN/$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Amazon.com </A></LI>";
326     $DocumentFinal .= "<LI><A HREF=\"http://search.borders.com/fcgi-bin/db2www/search/search.d2w/Details?mediaType=Book&searchType=ISBNUPC&code=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Borders </A></LI>";
327     $DocumentFinal .= "<LI><A HREF=\"http://shop.barnesandnoble.com/bookSearch/isbnInquiry.asp?isbn=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Barnes & Noble </A></LI>";
328     }
329     }
330    
331     $DocumentFinal .= "</UL></TD></TR>";
332     }
333    
334     $DocumentFinal .= "</TABLE> \n";
335    
336     return ($DocumentFinal);
337    
338     }
339    
340    
341    
342     #--------------------------------------------------------------------------
343     #
344     # Function: SummaryFilter()
345     #
346     # Purpose: This function is the summary filter
347     #
348     # Called by: external
349     #
350     # Parameters: $Database Database name
351     # $DocumentID Document ID
352     # $ItemName Item name
353     # $MimeType Mime type
354     # $DocumentRaw Raw document text
355     #
356     # Global Variables: none
357     #
358     # Returns: The filtered summary
359     #
360     sub SummaryFilter {
361    
362     my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
363    
364     my (%Document, $Summary);
365    
366    
367     if ( !defined($DocumentRaw) ) {
368     return (undef);
369     }
370    
371    
372     # Parse the document
373     %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
374    
375    
376     # Select the abstract if available
377     if ( defined($Document{'X'}) ) {
378     $Summary = $Document{'X'};
379     }
380    
381     # Select the table of contents if available
382     elsif ( defined($Document{'Y'}) ) {
383     $Summary = $Document{'Y'};
384     }
385    
386     # Select the keywords if available
387     elsif ( defined($Document{'K'}) ) {
388     $Summary = $Document{'K'};
389     }
390    
391    
392     # Clean the summary text
393     if ( defined($Summary) ) {
394     # First clean up the text
395     if ( index($Summary, "\r\n") >= 0 ) {
396     $Summary =~ s/\r//gs;
397     }
398     elsif ( index($Summary, "\r") >= 0 ) {
399     $Summary =~ s/\r/\n/gs;
400     }
401     $Summary =~ s/\n/ /gs;
402     $Summary =~ s/\s+/ /gs;
403     $Summary = ucfirst($Summary);
404     }
405    
406     return ($Summary);
407    
408     }
409    
410    
411    
412     #--------------------------------------------------------------------------
413     #
414     # Function: RelevanceFeedbackFilter()
415     #
416     # Purpose: This function is the relevance feedback filter
417     #
418     # Called by: external
419     #
420     # Parameters: $Database Database name
421     # $DocumentID Document ID
422     # $ItemName Item name
423     # $MimeType Mime type
424     # $DocumentRaw Raw document text
425     #
426     # Global Variables: none
427     #
428     # Returns: The filtered relevance feedback
429     #
430     sub RelevanceFeedbackFilter {
431    
432     my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
433    
434     my (%Document, $RelevanceFeedback);
435    
436    
437     if ( !defined($DocumentRaw) ) {
438     return (undef);
439     }
440    
441    
442     # Parse the document
443     %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
444    
445    
446     # Select the abstract if available
447     if ( defined($Document{'X'}) ) {
448     $RelevanceFeedback = $Document{'X'};
449     }
450    
451     # Select the table of contents if available
452     elsif ( defined($Document{'Y'}) ) {
453     $RelevanceFeedback = $Document{'Y'};
454     }
455    
456     # Select the keywords if available
457     elsif ( defined($Document{'K'}) ) {
458     $RelevanceFeedback = $Document{'K'};
459     }
460    
461     # Select the title if available
462     elsif ( defined($Document{'T'}) ) {
463     $RelevanceFeedback = $Document{'T'};
464     }
465    
466     # Select the author if available
467     elsif ( defined($Document{'A'}) ) {
468     $RelevanceFeedback = $Document{'A'};
469     }
470    
471     return ($RelevanceFeedback);
472    
473     }
474    
475    
476    
477     #--------------------------------------------------------------------------
478    
479     1;

  ViewVC Help
Powered by ViewVC 1.1.26