1 |
################################################### |
2 |
|
3 |
# don't index files with this suffix (filesys only?) |
4 |
NoContents .gif .xbm .au .mov .mpg ..ps .bin .exe .wmv |
5 |
|
6 |
# MetaNames automatic |
7 |
#MetaNames first author |
8 |
|
9 |
# 0 to 3 - 0 is totally silent, 3 is the most verbose. |
10 |
#IndexReport 2 |
11 |
|
12 |
#FollowSymLinks yes |
13 |
|
14 |
#UseStemming no |
15 |
|
16 |
IgnoreTotalWordCountWhenRanking yes |
17 |
# Put yes to ignore the total number of words in the file when calculating |
18 |
# ranking. Often better with merges and small files. Default is no. |
19 |
|
20 |
WordCharacters abcdefghijklmnopqrstuvwxyz\&#;0123456789.@|,-'"[](~!@$%^{}_+?¹©ðÐèÈæƾ® |
21 |
|
22 |
# We allow a period and a dash within words, but strip them |
23 |
# from the beginning or end of a word. This is done after |
24 |
# WordCharacters above is used to split words. |
25 |
|
26 |
IgnoreFirstChar .-"'`([{< |
27 |
IgnoreLastChar .-,!?;"'`)]}> |
28 |
|
29 |
# Finally, resulting words must begin/end with one |
30 |
# of the characters listed here |
31 |
|
32 |
#BeginCharacters abcdefghijklmnopqrstuvwxyz0123456789é |
33 |
#EndCharacters abcdefghijklmnopqrstuvwxyz0123456789é |
34 |
|
35 |
|
36 |
# This automatically omits words that appear too often in the files |
37 |
# (these words are called stopwords). Specify a whole percentage |
38 |
# and a number, such as "80 256". This omits words that occur in |
39 |
# over 80% of the files and appear in over 256 files. Comment out |
40 |
# to turn of auto-stopwording. |
41 |
IgnoreLimit 90 500 |
42 |
|
43 |
# This option allows the user decide if to index the comments in the files |
44 |
# default is 1. Set to 0 if comment indexing is not required. |
45 |
IndexComments 0 |
46 |
|
47 |
#TranslateCharacters string1 string2 |
48 |
# This option allows to index the characters in string1 to be indexed |
49 |
# as the characteres in string2. |
50 |
# This is done after htnl entities are converted |
51 |
# This option is useful in languages like spanish, french, ... |
52 |
# eg: |
53 |
# TranslateCharacters _á -a |
54 |
# This will index a_b as a-b and ámo as amo |
55 |
TranslateCharacters ¹©ðÐèÈæƾ® ssddcccczz |
56 |
|
57 |
################################ |
58 |
# DIRECTIVES for HTTP METHOD ONLY |
59 |
# Comment out if using FILESYSTEM |
60 |
################################## |
61 |
|
62 |
#MaxDepth 0 |
63 |
#(default 5) This defines how many links the spider should |
64 |
#follow before stopping. A value of 0 configures the spider to |
65 |
#traverse all links |
66 |
|
67 |
#Delay 0 |
68 |
#(default 60) The number of seconds to wait between issuing |
69 |
#requests to a server. |
70 |
|
71 |
#SpiderDirectory /rest/references/swish/spider |
72 |
|
73 |
#EquivalentServer http://portal.pliva.hr http://www.pliva.hr |
74 |
#(default nothing) This allows you to deal with |
75 |
#servers that use respond to multiple DNS names. Each line should have |
76 |
#a list of all the method/names that should be considered equivalent. |
77 |
#If you have multiple directives, each one defines its own set of equivalent |
78 |
#servers. |
79 |
|
80 |
# documents which are html files |
81 |
IndexContents HTML* .htm .html .php .pl |
82 |
|
83 |
# text documents |
84 |
IndexContents TXT .txt .log .text |
85 |
|
86 |
# default |
87 |
DefaultContents HTML* |
88 |
|
89 |
# use 100 chars from html |
90 |
StoreDescription HTML* <body> 3000 |
91 |
StoreDescription TXT 3000 |
92 |
|
93 |
|
94 |
# index pdf |
95 |
IndexContents HTML .pdf |
96 |
FileFilter .pdf pdftotext "-htmlmeta '%p' -" |
97 |
|
98 |
# various other |
99 |
FileFilter .tar.gz tar "tvfz '%p'" |
100 |
FileFilter .gz gzip "-cd '%p'" |
101 |
FileFilter .bz2 bzip2 "-cd '%p'" |
102 |
|
103 |
# regex version of above |
104 |
#FileFilterMatch tar "tvfz '%p'" /\.tar\.gz/i |
105 |
#FileFilterMatch gzip "-cd '%p'" /\.gz/i |
106 |
#FileFilterMatch bzip2 "-cd '%p'" /\.bz2/i |
107 |
|
108 |
# store path of document, title |
109 |
MetaNames swishdocpath swishtitle |
110 |
|
111 |
# debug |
112 |
ParserWarnLevel 3 |
113 |
IndexReport 1 |
114 |
|