1 |
dpavlin |
11 |
################################################### |
2 |
|
|
|
3 |
|
|
# don't index files with this suffix (filesys only?) |
4 |
|
|
NoContents .gif .xbm .au .mov .mpg ..ps .bin .exe .wmv |
5 |
|
|
|
6 |
|
|
# MetaNames automatic |
7 |
dpavlin |
26 |
#MetaNames first author |
8 |
dpavlin |
11 |
|
9 |
|
|
# 0 to 3 - 0 is totally silent, 3 is the most verbose. |
10 |
|
|
#IndexReport 2 |
11 |
|
|
|
12 |
|
|
#FollowSymLinks yes |
13 |
|
|
|
14 |
|
|
#UseStemming no |
15 |
|
|
|
16 |
|
|
IgnoreTotalWordCountWhenRanking yes |
17 |
|
|
# Put yes to ignore the total number of words in the file when calculating |
18 |
|
|
# ranking. Often better with merges and small files. Default is no. |
19 |
|
|
|
20 |
|
|
WordCharacters abcdefghijklmnopqrstuvwxyz\&#;0123456789.@|,-'"[](~!@$%^{}_+?¹©ðÐèÈæƾ® |
21 |
|
|
|
22 |
|
|
# We allow a period and a dash within words, but strip them |
23 |
|
|
# from the beginning or end of a word. This is done after |
24 |
|
|
# WordCharacters above is used to split words. |
25 |
|
|
|
26 |
|
|
IgnoreFirstChar .-"'`([{< |
27 |
|
|
IgnoreLastChar .-,!?;"'`)]}> |
28 |
|
|
|
29 |
|
|
# Finally, resulting words must begin/end with one |
30 |
|
|
# of the characters listed here |
31 |
|
|
|
32 |
|
|
#BeginCharacters abcdefghijklmnopqrstuvwxyz0123456789é |
33 |
|
|
#EndCharacters abcdefghijklmnopqrstuvwxyz0123456789é |
34 |
|
|
|
35 |
|
|
|
36 |
|
|
# This automatically omits words that appear too often in the files |
37 |
|
|
# (these words are called stopwords). Specify a whole percentage |
38 |
|
|
# and a number, such as "80 256". This omits words that occur in |
39 |
|
|
# over 80% of the files and appear in over 256 files. Comment out |
40 |
|
|
# to turn of auto-stopwording. |
41 |
|
|
IgnoreLimit 90 500 |
42 |
|
|
|
43 |
|
|
# This option allows the user decide if to index the comments in the files |
44 |
|
|
# default is 1. Set to 0 if comment indexing is not required. |
45 |
|
|
IndexComments 0 |
46 |
|
|
|
47 |
|
|
#TranslateCharacters string1 string2 |
48 |
|
|
# This option allows to index the characters in string1 to be indexed |
49 |
|
|
# as the characteres in string2. |
50 |
|
|
# This is done after htnl entities are converted |
51 |
|
|
# This option is useful in languages like spanish, french, ... |
52 |
|
|
# eg: |
53 |
|
|
# TranslateCharacters _á -a |
54 |
|
|
# This will index a_b as a-b and ámo as amo |
55 |
|
|
TranslateCharacters ¹©ðÐèÈæƾ® ssddcccczz |
56 |
|
|
|
57 |
|
|
################################ |
58 |
|
|
# DIRECTIVES for HTTP METHOD ONLY |
59 |
|
|
# Comment out if using FILESYSTEM |
60 |
|
|
################################## |
61 |
|
|
|
62 |
|
|
MaxDepth 0 |
63 |
|
|
#(default 5) This defines how many links the spider should |
64 |
|
|
#follow before stopping. A value of 0 configures the spider to |
65 |
|
|
#traverse all links |
66 |
|
|
|
67 |
|
|
Delay 0 |
68 |
|
|
#(default 60) The number of seconds to wait between issuing |
69 |
|
|
#requests to a server. |
70 |
|
|
|
71 |
|
|
SpiderDirectory /data/swish/spider |
72 |
|
|
|
73 |
|
|
#EquivalentServer http://portal.pliva.hr http://www.pliva.hr |
74 |
|
|
#(default nothing) This allows you to deal with |
75 |
|
|
#servers that use respond to multiple DNS names. Each line should have |
76 |
|
|
#a list of all the method/names that should be considered equivalent. |
77 |
|
|
#If you have multiple directives, each one defines its own set of equivalent |
78 |
|
|
#servers. |
79 |
|
|
|
80 |
|
|
# documents which are html files |
81 |
|
|
IndexContents HTML .htm .html .php .pl |
82 |
|
|
|
83 |
|
|
# text documents |
84 |
|
|
IndexContents TXT .txt .log .text |
85 |
|
|
|
86 |
|
|
# default |
87 |
|
|
DefaultContents HTML |
88 |
|
|
|
89 |
|
|
# use 100 chars from html |
90 |
dpavlin |
26 |
StoreDescription HTML <body> 3000 |
91 |
|
|
StoreDescription TXT 3000 |
92 |
dpavlin |
11 |
|
93 |
|
|
|
94 |
|
|
# index pdf |
95 |
|
|
IndexContents HTML .pdf |
96 |
|
|
FileFilter .pdf pdftotext "-htmlmeta '%p' -" |
97 |
|
|
|
98 |
|
|
# various other |
99 |
|
|
FileFilter .gz gzip "-cd '%p'" |
100 |
|
|
FileFilter .bz2 bzip2 "-cd '%p'" |
101 |
|
|
|
102 |
dpavlin |
26 |
# store path of document |
103 |
|
|
MetaNames swishdocpath |
104 |
|
|
|
105 |
dpavlin |
11 |
# debug |
106 |
|
|
ParserWarnLevel 3 |
107 |
|
|
IndexReport 2 |
108 |
|
|
|