/[local]/search/filters/default.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /search/filters/default.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (show annotations)
Thu Jun 13 17:25:53 2002 UTC (17 years, 1 month ago) by dpavlin
Branch: MAIN
Branch point for: DbP
File MIME type: text/plain
Initial revision

1 #!/usr/bin/perl
2
3 #--------------------------------------------------------------------------
4 #
5 # Author: Dobrica Pavlinusic
6 # Creation Date: 2002-03-17
7
8
9 #--------------------------------------------------------------------------
10 #
11 # Description:
12 #
13 # This package implements the various filters for this database
14 #
15
16 #--------------------------------------------------------------------------
17 #
18 # Pragmatic modules
19 #
20
21 use strict;
22
23
24 #--------------------------------------------------------------------------
25 #
26 # Package definition
27 #
28
29 package default;
30
31
32 #--------------------------------------------------------------------------
33 #
34 # Application Constants
35 #
36
37 # Field Names
38 %default::FieldNames = (
39 '700+', 'Autor(i)',
40 '200', 'Naslov',
41 '205', 'Izdanje',
42 '210', 'Izdavanje',
43 '215', 'Materijalni opis',
44 '225', 'Nakladnièka cjelina',
45 '300+', 'Napomena',
46 '330', 'Sadr¾aj',
47 '423', 'Prikriveni radovi',
48 '464', 'Analitièki radovi',
49 '610', 'Kljuène rijeèi',
50 '675+', 'Klasifik. oznaka',
51 '990', 'Signatura',
52 '991', 'Inventarni broj',
53 'ISBN', 'ISBN',
54 'MFN', 'MFN',
55 '994a', 'Knji¾nica',
56
57
58 'A', 'Author(s)',
59 'B', 'Book',
60 'C', 'City',
61 'D', 'Date',
62 'E', 'Editor',
63 'F', 'Footnote',
64 'G', 'Order Info',
65 'H', 'Commentary',
66 'I', 'Publisher',
67 'J', 'Journal',
68 'K', 'Keyword',
69 'L', 'Label',
70 'M', 'Bell',
71 'N', 'Issue',
72 'O', 'Other',
73 'P', 'Page',
74 'Q', 'Corporate',
75 'R', 'Report',
76 'S', 'Series',
77 'T', 'Title',
78 'U', 'Annotation',
79 'V', 'Volume',
80 'W', 'URL',
81 'X', 'Abstract',
82 'Y', 'Table of Contents',
83 'Z', 'Reference',
84 '$', 'Price',
85 '*', 'Copyright',
86 '^', 'Parts',
87 );
88
89
90
91 # Field Display Order
92 @default::FieldDisplayOrder = (
93 'ISBN',
94 '700+',
95 '200',
96 '205',
97 '210',
98 '215',
99 '225',
100 '300+',
101 '330',
102 '423',
103 '464',
104 '610',
105 '675+',
106 '990',
107 '991',
108 'MFN',
109 '994a',
110
111 'M',
112 'L',
113 'T',
114 'S',
115 'E',
116 'Q',
117 'A',
118 'J',
119 'B',
120 'D',
121 'V',
122 'N',
123 'P',
124 'C',
125 'I',
126 'G',
127 '$',
128 'K',
129 '*',
130 'W',
131 'O',
132 'Y',
133 'X',
134 'R',
135 '.',
136 'F',
137 'H',
138 'U',
139 'Z',
140 '^'
141 );
142
143 # separators
144 %default::Separator = (
145 '700+', ', ',
146 '990', ' ',
147 '991', ', ',
148 '300+', '.- ',
149 '225', ', ',
150 '610', ' * ',
151
152
153 );
154
155 #--------------------------------------------------------------------------
156 #
157 # Function: DocumentParser()
158 #
159 # Purpose: This function serves as a document parser
160 #
161 # Called by: DocumentFilter(), SummaryFilter()
162 #
163 # Parameters: $Database Database name
164 # $DocumentID Document ID
165 # $ItemName Item name
166 # $MimeType Mime type
167 # $DocumentRaw Raw document text
168 #
169 # Global Variables:
170 #
171 # Returns: A hash table of the document fields
172 #
173 sub DocumentParser {
174
175 my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
176
177 my (%Document, @DocumentLines, $DocumentLine);
178 my ($FieldTag, $FieldData, $Separator);
179
180 @DocumentLines = split(/\n/, $DocumentRaw);
181
182 # Extract the document
183 foreach $DocumentLine ( @DocumentLines ) {
184 # check if this is a new field?
185 if ( $DocumentLine =~ /%(\S+)\s*(.*)/ ) {
186 $FieldTag = $1;
187 $FieldData = $2;
188 if ($default::Separator{$FieldTag}) {
189 $Separator = $default::Separator{$FieldTag};
190 } else {
191 $Separator = ";";
192 }
193 }
194 else {
195 $FieldData = $DocumentLine;
196 $Separator = "\n";
197 }
198
199 if ( defined($Document{$FieldTag}) ) {
200 $Document{$FieldTag} .= $Separator . $FieldData;
201 }
202 else {
203 $Document{$FieldTag} = $FieldData;
204 }
205 }
206
207 return (%Document);
208
209 }
210
211
212 #--------------------------------------------------------------------------
213 #
214 # Function: DocumentFilter()
215 #
216 # Purpose: This function is the document filter
217 #
218 # Called by: external
219 #
220 # Parameters: $Database Database name
221 # $DocumentID Document ID
222 # $ItemName Item name
223 # $MimeType Mime type
224 # $DocumentRaw Raw document text
225 #
226 # Global Variables: %default::FieldNames, @default::FieldDisplayOrder
227 #
228 # Returns: The filtered document
229 #
230 sub DocumentFilter {
231
232 my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
233
234 my (%Document, $DocumentFinal, $FieldTag, $CommaFlag);
235 my ($SearchTerm, @Authors, $Author, @AuthorNames, $AuthorName, @Keywords, $Keyword, @OrderNumbers, $OrderNumber);
236
237 if ( !defined($DocumentRaw) ) {
238 return (undef);
239 }
240
241
242 %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
243
244 $DocumentFinal = "<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=2 WIDTH=100%> \n";
245 # $DocumentFinal .= "<TR><TD COLSPAN=2 VALIGN=TOP ALIGN=CENTER><H3> $Document{'T'} </H3></TD></TR> \n";
246
247 # Loop over each line in the document list
248 foreach $FieldTag ( @default::FieldDisplayOrder ) {
249
250 # Skip empty slots
251 if ( !(defined($Document{$FieldTag}) && ($Document{$FieldTag} ne "")) ) {
252 next;
253 }
254
255
256 # Print the row start
257 $DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=RIGHT BGCOLOR=\"#EEEEEE\" WIDTH=\"130\"> $default::FieldNames{$FieldTag}: </TD><TD VALIGN=TOP ALIGN=LEFT> ";
258
259 # Print the field data, marking up if needed
260 if ( $FieldTag eq "A" || $FieldTag eq "E" ) {
261 $CommaFlag = 0;
262 @Authors = split(/;/, $Document{$FieldTag});
263 foreach $Author ( @Authors ) {
264 if ( $CommaFlag ) {
265 $DocumentFinal .= ", ";
266 }
267 $CommaFlag = 1;
268 $SearchTerm = $Author;
269 $SearchTerm =~ s/,//g;
270 @AuthorNames = split(/ /, $SearchTerm);
271 $SearchTerm = "";
272 foreach $AuthorName ( @AuthorNames ) {
273 if ( ! ($AuthorName =~ /^\w+\.$/) ) {
274 if ( $AuthorName ne "" ) {
275 $AuthorName .= " ";
276 }
277 $SearchTerm .= $AuthorName;
278 }
279 }
280 $SearchTerm =~ s/ /+/g;
281 $DocumentFinal .= "<A HREF=\"$ENV{'SCRIPT_NAME'}/GetSearchResults?Database=$Database&FieldName1=author&FieldContent1=%22$SearchTerm%22\"> $Author </A>";
282 }
283 }
284 elsif ( $FieldTag eq "K" ) {
285 $CommaFlag = 0;
286 @Keywords = split(/,/, $Document{$FieldTag});
287 foreach $Keyword ( @Keywords ) {
288 if ( $CommaFlag ) {
289 $DocumentFinal .= ", ";
290 }
291 $CommaFlag = 1;
292 $SearchTerm = $Keyword;
293 $SearchTerm =~ s/ /+/g;
294 $DocumentFinal .= "<A HREF=\"$ENV{'SCRIPT_NAME'}/GetSearchResults?Database=$Database&FieldName1=keyword&FieldContent1=%22$SearchTerm%22\"> $Keyword </A>";
295 }
296 }
297 elsif ( $FieldTag eq "W" ) {
298 $DocumentFinal .= "<A HREF=\"$Document{$FieldTag}\" TARGET=\"BlankWindow\"> $Document{$FieldTag} </A>";
299 }
300 elsif ( $FieldTag eq "Y" ) {
301 $DocumentFinal .= "<PRE>$Document{$FieldTag}</PRE>";
302 }
303 else {
304 $DocumentFinal .= "$Document{$FieldTag}";
305 }
306
307 $DocumentFinal .= "</TD></TR> \n";
308
309 }
310
311
312 # List any external links we may have
313 if ( defined($Document{'G'}) ) {
314
315 @OrderNumbers = split(/; /, $Document{'G'});
316
317 $DocumentFinal .= "<TR><TD COLSPAN=2> <HR WIDTH=50%> </TD></TR>";
318 $DocumentFinal .= "<TR><TD VALIGN=TOP ALIGN=LEFT> <B> External Links: </B> </TD><TD VALIGN=TOP ALIGN=LEFT><UL>";
319
320 foreach $OrderNumber ( @OrderNumbers ) {
321
322 if ( $OrderNumber =~ /ISBN\s*(\S*)/ ) {
323 $OrderNumber = $1;
324 $OrderNumber =~ s/-//g;
325 $DocumentFinal .= "<LI><A HREF=\"http://www.amazon.com/exec/obidos/ASIN/$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Amazon.com </A></LI>";
326 $DocumentFinal .= "<LI><A HREF=\"http://search.borders.com/fcgi-bin/db2www/search/search.d2w/Details?mediaType=Book&searchType=ISBNUPC&code=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Borders </A></LI>";
327 $DocumentFinal .= "<LI><A HREF=\"http://shop.barnesandnoble.com/bookSearch/isbnInquiry.asp?isbn=$OrderNumber\" TARGET=\"BlankWindow\"> Get this book from Barnes & Noble </A></LI>";
328 }
329 }
330
331 $DocumentFinal .= "</UL></TD></TR>";
332 }
333
334 $DocumentFinal .= "</TABLE> \n";
335
336 return ($DocumentFinal);
337
338 }
339
340
341
342 #--------------------------------------------------------------------------
343 #
344 # Function: SummaryFilter()
345 #
346 # Purpose: This function is the summary filter
347 #
348 # Called by: external
349 #
350 # Parameters: $Database Database name
351 # $DocumentID Document ID
352 # $ItemName Item name
353 # $MimeType Mime type
354 # $DocumentRaw Raw document text
355 #
356 # Global Variables: none
357 #
358 # Returns: The filtered summary
359 #
360 sub SummaryFilter {
361
362 my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
363
364 my (%Document, $Summary);
365
366
367 if ( !defined($DocumentRaw) ) {
368 return (undef);
369 }
370
371
372 # Parse the document
373 %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
374
375
376 # Select the abstract if available
377 if ( defined($Document{'X'}) ) {
378 $Summary = $Document{'X'};
379 }
380
381 # Select the table of contents if available
382 elsif ( defined($Document{'Y'}) ) {
383 $Summary = $Document{'Y'};
384 }
385
386 # Select the keywords if available
387 elsif ( defined($Document{'K'}) ) {
388 $Summary = $Document{'K'};
389 }
390
391
392 # Clean the summary text
393 if ( defined($Summary) ) {
394 # First clean up the text
395 if ( index($Summary, "\r\n") >= 0 ) {
396 $Summary =~ s/\r//gs;
397 }
398 elsif ( index($Summary, "\r") >= 0 ) {
399 $Summary =~ s/\r/\n/gs;
400 }
401 $Summary =~ s/\n/ /gs;
402 $Summary =~ s/\s+/ /gs;
403 $Summary = ucfirst($Summary);
404 }
405
406 return ($Summary);
407
408 }
409
410
411
412 #--------------------------------------------------------------------------
413 #
414 # Function: RelevanceFeedbackFilter()
415 #
416 # Purpose: This function is the relevance feedback filter
417 #
418 # Called by: external
419 #
420 # Parameters: $Database Database name
421 # $DocumentID Document ID
422 # $ItemName Item name
423 # $MimeType Mime type
424 # $DocumentRaw Raw document text
425 #
426 # Global Variables: none
427 #
428 # Returns: The filtered relevance feedback
429 #
430 sub RelevanceFeedbackFilter {
431
432 my ($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw) = @_;
433
434 my (%Document, $RelevanceFeedback);
435
436
437 if ( !defined($DocumentRaw) ) {
438 return (undef);
439 }
440
441
442 # Parse the document
443 %Document = &DocumentParser($Database, $DocumentID, $ItemName, $MimeType, $DocumentRaw);
444
445
446 # Select the abstract if available
447 if ( defined($Document{'X'}) ) {
448 $RelevanceFeedback = $Document{'X'};
449 }
450
451 # Select the table of contents if available
452 elsif ( defined($Document{'Y'}) ) {
453 $RelevanceFeedback = $Document{'Y'};
454 }
455
456 # Select the keywords if available
457 elsif ( defined($Document{'K'}) ) {
458 $RelevanceFeedback = $Document{'K'};
459 }
460
461 # Select the title if available
462 elsif ( defined($Document{'T'}) ) {
463 $RelevanceFeedback = $Document{'T'};
464 }
465
466 # Select the author if available
467 elsif ( defined($Document{'A'}) ) {
468 $RelevanceFeedback = $Document{'A'};
469 }
470
471 return ($RelevanceFeedback);
472
473 }
474
475
476
477 #--------------------------------------------------------------------------
478
479 1;

  ViewVC Help
Powered by ViewVC 1.1.26