/[local]/search/filters/default.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /search/filters/default.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (show annotations)
Fri Jun 14 18:10:27 2002 UTC (17 years, 1 month ago) by dpavlin
Branch: MAIN
Changes since 1.1: +4 -0 lines
File MIME type: text/plain
escape some html entities

1 #!/usr/bin/perl
2
3 #--------------------------------------------------------------------------
4 #
5 # Author: Dobrica Pavlinusic
6 # Creation Date: 2002-03-17
7
8
9 #--------------------------------------------------------------------------
10 #
11 # Description:
12 #
13 # This package implements the various filters for this database
14 #
15
16 #--------------------------------------------------------------------------
17 #
18 # Pragmatic modules
19 #
20
21 use strict;
22
23
24 #--------------------------------------------------------------------------
25 #
26 # Package definition
27 #
28
29 package default;
30
31
32 #--------------------------------------------------------------------------
33 #
34 # Application Constants
35 #
36
37 # Field Names
38 %default::FieldNames = (
39 '700+', 'Autor(i)',
40 '200', 'Naslov',
41 '205', 'Izdanje',
42 '210', 'Izdavanje',
43 '215', 'Materijalni opis',
44 '225', 'Nakladnièka cjelina',
45 '300+', 'Napomena',
46 '330', 'Sadr¾aj',
47 '423', 'Prikriveni radovi',
48 '464', 'Analitièki radovi',
49 '610', 'Kljuène rijeèi',
50 '675+', 'Klasifik. oznaka',
51 '990', 'Signatura',
52 '991', 'Inventarni broj',
53 'ISBN', 'ISBN',
54 'MFN', 'MFN',
55 '994a', 'Knji¾nica',
56
57
58 'A', 'Author(s)',
59 'B', 'Book',
60 'C', 'City',
61 'D', 'Date',
62 'E', 'Editor',
63 'F', 'Footnote',
64 'G', 'Order Info',
65 'H', 'Commentary',
66 'I', 'Publisher',
67 'J', 'Journal',
68 'K', 'Keyword',
69 'L', 'Label',
70 'M', 'Bell',
71 'N', 'Issue',
72 'O', 'Other',
73 'P', 'Page',
74 'Q', 'Corporate',
75 'R', 'Report',
76 'S', 'Series',
77 'T', 'Title',
78 'U', 'Annotation',
79 'V', 'Volume',
80 'W', 'URL',
81 'X', 'Abstract',
82 'Y', 'Table of Contents',
83 'Z', 'Reference',
84 '$', 'Price',
85 '*', 'Copyright',
86 '^', 'Parts',
87 );
88
89
90
91 # Field Display Order
92 @default::FieldDisplayOrder = (
93 'ISBN',
94 '700+',
95 '200',
96 '205',
97 '210',
98 '215',
99 '225',
100 '300+',
101 '330',
102 '423',
103 '464',
104 '610',
105 '675+',
106 '990',
107 '991',
108 'MFN',
109 '994a',
110
111 'M',
112 'L',
113 'T',
114 'S',
115 'E',
116 'Q',
117 'A',
118 'J',
119 'B',
120 'D',
121 'V',
122 'N',
123 'P',
124 'C',
125 'I',
126 'G',
127 '$',
128 'K',
129 '*',
130 'W',
131 'O',
132 'Y',
133 'X',
134 'R',
135 '.',
136 'F',
137 'H',
138 'U',
139 'Z',
140 '^'
141 );
142
143 # separators
144 %default::Separator = (
145 '700+', ', ',
146 '990', ' ',
147 '991', ', ',
148 '300+', '.- ',
149 '225', ', ',
150 '610', ' * ',
151
152
153 );
154
155 #--------------------------------------------------------------------------
156 #
157 # Function: DocumentParser()
158 #
159 # Purpose: This function serves as a document parser
160 #
161 # Called by: DocumentFilter(), SummaryFilter()
162 #
163 # Parameters: $Database Database name
164 # $DocumentID Document ID
165 # $ItemName Item name
166 # $MimeType Mime type
167 # $DocumentRaw Raw document text
168 #
169 # Global Variables:
170 #
171 # Returns: A hash table of the document fields
172 #
173 sub DocumentParser {
174
175 my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
176
177 my (%Document, @DocumentLines, $DocumentLine);
178 my ($FieldTag, $FieldData, $Separator);
179
180 @DocumentLines = split(/\n/, $DocumentRaw);
181
182 # Extract the document
183 foreach $DocumentLine ( @DocumentLines ) {
184 # check if this is a new field?
185 if ( $DocumentLine =~ /%(\S+)\s*(.*)/ ) {
186 $FieldTag = $1;
187 $FieldData = $2;
188 if ($default::Separator{$FieldTag}) {
189 $Separator = $default::Separator{$FieldTag};
190 } else {
191 $Separator = ";";
192 }
193 }
194 else {
195 $FieldData = $DocumentLine;
196 $Separator = "\n";
197 }
198
199 if ( defined($Document{$FieldTag}) ) {
200 $Document{$FieldTag} .= $Separator . $FieldData;
201 }
202 else {
203 $Document{$FieldTag} = $FieldData;
204 }
205 }
206
207 return (%Document);
208
209 }
210
211
212 #--------------------------------------------------------------------------
213 #
214 # Function: DocumentFilter()
215 #
216 # Purpose: This function is the document filter
217 #
218 # Called by: external
219 #
220 # Parameters: $Database Database name
221 # $DocumentID Document ID
222 # $ItemName Item name
223 # $MimeType Mime type
224 # $DocumentRaw Raw document text
225 #
226 # Global Variables: %default::FieldNames, @default::FieldDisplayOrder
227 #
228 # Returns: The filtered document
229 #
230 sub DocumentFilter {
231
232 my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
233
234 my (%Document, $DocumentFinal, $FieldTag, $CommaFlag);
235 my ($SearchTerm, @Authors, $Author, @AuthorNames, $AuthorName, @Keywords, $Keyword, @OrderNumbers, $OrderNumber);
236
237 if ( !defined($DocumentRaw) ) {
238 return (undef);
239 }
240
241
242 %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
243
244 $DocumentFinal = "<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=2 WIDTH=100%> \n";
245 # $DocumentFinal .= "<TR><TD COLSPAN=2 VALIGN=TOP ALIGN=CENTER><H3> $Document{'T'} </H3></TD></TR> \n";
246
247 # Loop over each line in the document list
248 foreach $FieldTag ( @default::FieldDisplayOrder ) {
249
250 # Skip empty slots
251 if ( !(defined($Document{$FieldTag}) && ($Document{$FieldTag} ne "")) ) {
252 next;
253 }
254
255
256 # Print the row start
257 $DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=RIGHT BGCOLOR=\"#EEEEEE\" WIDTH=\"130\"> $default::FieldNames{$FieldTag}: </TD><TD VALIGN=TOP ALIGN=LEFT> ";
258
259 $Document{$FieldTag} =~ s/&/&and;/g;
260 $Document{$FieldTag} =~ s/</&lt;/g;
261 $Document{$FieldTag} =~ s/>/&gt;/g;
262
263 # Print the field data, marking up if needed
264 if ( $FieldTag eq "A" || $FieldTag eq "E" ) {
265 $CommaFlag = 0;
266 @Authors = split(/;/, $Document{$FieldTag});
267 foreach $Author ( @Authors ) {
268 if ( $CommaFlag ) {
269 $DocumentFinal .= ", ";
270 }
271 $CommaFlag = 1;
272 $SearchTerm = $Author;
273 $SearchTerm =~ s/,//g;
274 @AuthorNames = split(/ /, $SearchTerm);
275 $SearchTerm = "";
276 foreach $AuthorName ( @AuthorNames ) {
277 if ( ! ($AuthorName =~ /^\w+\.$/) ) {
278 if ( $AuthorName ne "" ) {
279 $AuthorName .= " ";
280 }
281 $SearchTerm .= $AuthorName;
282 }
283 }
284 $SearchTerm =~ s/ /+/g;
285 $DocumentFinal .= "<A HREF=\"$ENV{'SCRIPT_NAME'}/GetSearchResults?Database=$Database&FieldName1=author&FieldContent1=%22$SearchTerm%22\"> $Author </A>";
286 }
287 }
288 elsif ( $FieldTag eq "K" ) {
289 $CommaFlag = 0;
290 @Keywords = split(/,/, $Document{$FieldTag});
291 foreach $Keyword ( @Keywords ) {
292 if ( $CommaFlag ) {
293 $DocumentFinal .= ", ";
294 }
295 $CommaFlag = 1;
296 $SearchTerm = $Keyword;
297 $SearchTerm =~ s/ /+/g;
298 $DocumentFinal .= "<A HREF=\"$ENV{'SCRIPT_NAME'}/GetSearchResults?Database=$Database&FieldName1=keyword&FieldContent1=%22$SearchTerm%22\"> $Keyword </A>";
299 }
300 }
301 elsif ( $FieldTag eq "W" ) {
302 $DocumentFinal .= "<A HREF=\"$Document{$FieldTag}\" TARGET=\"BlankWindow\"> $Document{$FieldTag} </A>";
303 }
304 elsif ( $FieldTag eq "Y" ) {
305 $DocumentFinal .= "<PRE>$Document{$FieldTag}</PRE>";
306 }
307 else {
308 $DocumentFinal .= "$Document{$FieldTag}";
309 }
310
311 $DocumentFinal .= "</TD></TR> \n";
312
313 }
314
315
316 # List any external links we may have
317 if ( defined($Document{'G'}) ) {
318
319 @OrderNumbers = split(/; /, $Document{'G'});
320
321 $DocumentFinal .= "<TR><TD COLSPAN=2> <HR WIDTH=50%> </TD></TR>";
322 $DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=LEFT> <B> External Links: </B> </TD><TD VALIGN=TOP ALIGN=LEFT><UL>";
323
324 foreach $OrderNumber ( @OrderNumbers ) {
325
326 if ( $OrderNumber =~ /ISBN\s*(\S*)/ ) {
327 $OrderNumber = $1;
328 $OrderNumber =~ s/-//g;
329 $DocumentFinal .= "<LI><A HREF=\"http://www.amazon.com/exec/obidos/ASIN/$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Amazon.com </A></LI>";
330 $DocumentFinal .= "<LI><A HREF=\"http://search.borders.com/fcgi-bin/db2www/search/search.d2w/Details?mediaType=Book&searchType=ISBNUPC&code=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Borders </A></LI>";
331 $DocumentFinal .= "<LI><A HREF=\"http://shop.barnesandnoble.com/bookSearch/isbnInquiry.asp?isbn=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Barnes & Noble </A></LI>";
332 }
333 }
334
335 $DocumentFinal .= "</UL></TD></TR>";
336 }
337
338 $DocumentFinal .= "</TABLE> \n";
339
340 return ($DocumentFinal);
341
342 }
343
344
345
346 #--------------------------------------------------------------------------
347 #
348 # Function: SummaryFilter()
349 #
350 # Purpose: This function is the summary filter
351 #
352 # Called by: external
353 #
354 # Parameters: $Database Database name
355 # $DocumentID Document ID
356 # $ItemName Item name
357 # $MimeType Mime type
358 # $DocumentRaw Raw document text
359 #
360 # Global Variables: none
361 #
362 # Returns: The filtered summary
363 #
364 sub SummaryFilter {
365
366 my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
367
368 my (%Document, $Summary);
369
370
371 if ( !defined($DocumentRaw) ) {
372 return (undef);
373 }
374
375
376 # Parse the document
377 %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
378
379
380 # Select the abstract if available
381 if ( defined($Document{'X'}) ) {
382 $Summary = $Document{'X'};
383 }
384
385 # Select the table of contents if available
386 elsif ( defined($Document{'Y'}) ) {
387 $Summary = $Document{'Y'};
388 }
389
390 # Select the keywords if available
391 elsif ( defined($Document{'K'}) ) {
392 $Summary = $Document{'K'};
393 }
394
395
396 # Clean the summary text
397 if ( defined($Summary) ) {
398 # First clean up the text
399 if ( index($Summary, "\r\n") >= 0 ) {
400 $Summary =~ s/\r//gs;
401 }
402 elsif ( index($Summary, "\r") >= 0 ) {
403 $Summary =~ s/\r/\n/gs;
404 }
405 $Summary =~ s/\n/ /gs;
406 $Summary =~ s/\s+/ /gs;
407 $Summary = ucfirst($Summary);
408 }
409
410 return ($Summary);
411
412 }
413
414
415
416 #--------------------------------------------------------------------------
417 #
418 # Function: RelevanceFeedbackFilter()
419 #
420 # Purpose: This function is the relevance feedback filter
421 #
422 # Called by: external
423 #
424 # Parameters: $Database Database name
425 # $DocumentID Document ID
426 # $ItemName Item name
427 # $MimeType Mime type
428 # $DocumentRaw Raw document text
429 #
430 # Global Variables: none
431 #
432 # Returns: The filtered relevance feedback
433 #
434 sub RelevanceFeedbackFilter {
435
436 my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
437
438 my (%Document, $RelevanceFeedback);
439
440
441 if ( !defined($DocumentRaw) ) {
442 return (undef);
443 }
444
445
446 # Parse the document
447 %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
448
449
450 # Select the abstract if available
451 if ( defined($Document{'X'}) ) {
452 $RelevanceFeedback = $Document{'X'};
453 }
454
455 # Select the table of contents if available
456 elsif ( defined($Document{'Y'}) ) {
457 $RelevanceFeedback = $Document{'Y'};
458 }
459
460 # Select the keywords if available
461 elsif ( defined($Document{'K'}) ) {
462 $RelevanceFeedback = $Document{'K'};
463 }
464
465 # Select the title if available
466 elsif ( defined($Document{'T'}) ) {
467 $RelevanceFeedback = $Document{'T'};
468 }
469
470 # Select the author if available
471 elsif ( defined($Document{'A'}) ) {
472 $RelevanceFeedback = $Document{'A'};
473 }
474
475 return ($RelevanceFeedback);
476
477 }
478
479
480
481 #--------------------------------------------------------------------------
482
483 1;

  ViewVC Help
Powered by ViewVC 1.1.26