/[swish]/trunk/common-progspider.config
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/common-progspider.config

Parent Directory Parent Directory | Revision Log Revision Log


Revision 63 - (hide annotations)
Fri Feb 6 13:29:39 2004 UTC (20 years, 1 month ago) by dpavlin
File size: 3345 byte(s)
convert pdf files when indexing with progspider

1 dpavlin 63 ###################################################
2    
3     # don't index files with this suffix (filesys only?)
4     NoContents .gif .xbm .au .mov .mpg ..ps .bin .exe .wmv
5    
6     # MetaNames automatic
7     #MetaNames first author
8    
9     # 0 to 3 - 0 is totally silent, 3 is the most verbose.
10     #IndexReport 2
11    
12     #FollowSymLinks yes
13    
14     #UseStemming no
15    
16     IgnoreTotalWordCountWhenRanking yes
17     # Put yes to ignore the total number of words in the file when calculating
18     # ranking. Often better with merges and small files. Default is no.
19    
20     WordCharacters abcdefghijklmnopqrstuvwxyz\&#;0123456789.@|,-'"[](~!@$%^{}_+?¹©ðÐèÈæƾ®
21    
22     # We allow a period and a dash within words, but strip them
23     # from the beginning or end of a word. This is done after
24     # WordCharacters above is used to split words.
25    
26     IgnoreFirstChar .-"'`([{<
27     IgnoreLastChar .-,!?;"'`)]}>
28    
29     # Finally, resulting words must begin/end with one
30     # of the characters listed here
31    
32     #BeginCharacters abcdefghijklmnopqrstuvwxyz0123456789é
33     #EndCharacters abcdefghijklmnopqrstuvwxyz0123456789é
34    
35    
36     # This automatically omits words that appear too often in the files
37     # (these words are called stopwords). Specify a whole percentage
38     # and a number, such as "80 256". This omits words that occur in
39     # over 80% of the files and appear in over 256 files. Comment out
40     # to turn of auto-stopwording.
41     IgnoreLimit 90 500
42    
43     # This option allows the user decide if to index the comments in the files
44     # default is 1. Set to 0 if comment indexing is not required.
45     IndexComments 0
46    
47     #TranslateCharacters string1 string2
48     # This option allows to index the characters in string1 to be indexed
49     # as the characteres in string2.
50     # This is done after htnl entities are converted
51     # This option is useful in languages like spanish, french, ...
52     # eg:
53     # TranslateCharacters _á -a
54     # This will index a_b as a-b and ámo as amo
55     TranslateCharacters ¹©ðÐèÈæƾ® ssddcccczz
56    
57     ################################
58     # DIRECTIVES for HTTP METHOD ONLY
59     # Comment out if using FILESYSTEM
60     ##################################
61    
62     #MaxDepth 0
63     #(default 5) This defines how many links the spider should
64     #follow before stopping. A value of 0 configures the spider to
65     #traverse all links
66    
67     #Delay 0
68     #(default 60) The number of seconds to wait between issuing
69     #requests to a server.
70    
71     #SpiderDirectory /rest/references/swish/spider
72    
73     #EquivalentServer http://portal.pliva.hr http://www.pliva.hr
74     #(default nothing) This allows you to deal with
75     #servers that use respond to multiple DNS names. Each line should have
76     #a list of all the method/names that should be considered equivalent.
77     #If you have multiple directives, each one defines its own set of equivalent
78     #servers.
79    
80     # documents which are html files
81     #IndexContents HTML* .htm .html .php .pl
82    
83     # text documents
84     #IndexContents TXT .txt .log .text
85    
86     # default
87     DefaultContents HTML*
88    
89     # use 100 chars from html
90     StoreDescription HTML* <body> 3000
91     StoreDescription TXT 3000
92    
93    
94     # index pdf
95     #IndexContents HTML .pdf
96     #FileFilter .pdf pdftotext "-htmlmeta '%p' -"
97    
98     # various other
99     #FileFilter .tar.gz tar "tvfz '%p'"
100     #FileFilter .gz gzip "-cd '%p'"
101     #FileFilter .bz2 bzip2 "-cd '%p'"
102    
103     # regex version of above
104     #FileFilterMatch tar "tvfz '%p'" /\.tar\.gz/i
105     #FileFilterMatch gzip "-cd '%p'" /\.gz/i
106     #FileFilterMatch bzip2 "-cd '%p'" /\.bz2/i
107    
108     # store path of document, title
109     MetaNames swishdocpath swishtitle
110    
111     # debug
112     ParserWarnLevel 3
113     IndexReport 1
114    

Properties

Name Value
cvs2svn:cvs-rev 1.1

  ViewVC Help
Powered by ViewVC 1.1.26