/[swish]/trunk/common.config
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/common.config

Parent Directory Parent Directory | Revision Log Revision Log


Revision 11 - (show annotations)
Sun Mar 16 21:16:41 2003 UTC (16 years, 6 months ago) by dpavlin
File size: 3261 byte(s)
Initial revision

1 ###################################################
2
3 #IndexDir http://www.rot13.org
4
5 #IndexFile /data/swish/index/rot13
6
7 #IndexName "rot13.org"
8 #IndexDescription "Internet web pages"
9 #IndexPointer "http://www.rot13.org/"
10 #IndexAdmin "dpavlin@rot13.org"
11
12 # don't index files with this suffix (filesys only?)
13 NoContents .gif .xbm .au .mov .mpg ..ps .bin .exe .wmv
14
15 # MetaNames automatic
16 MetaNames first author
17
18 # 0 to 3 - 0 is totally silent, 3 is the most verbose.
19 #IndexReport 2
20
21 #FollowSymLinks yes
22
23 #UseStemming no
24
25 IgnoreTotalWordCountWhenRanking yes
26 # Put yes to ignore the total number of words in the file when calculating
27 # ranking. Often better with merges and small files. Default is no.
28
29 WordCharacters abcdefghijklmnopqrstuvwxyz\&#;0123456789.@|,-'"[](~!@$%^{}_+?¹©ðÐèÈæƾ®
30
31 # We allow a period and a dash within words, but strip them
32 # from the beginning or end of a word. This is done after
33 # WordCharacters above is used to split words.
34
35 IgnoreFirstChar .-"'`([{<
36 IgnoreLastChar .-,!?;"'`)]}>
37
38 # Finally, resulting words must begin/end with one
39 # of the characters listed here
40
41 #BeginCharacters abcdefghijklmnopqrstuvwxyz0123456789é
42 #EndCharacters abcdefghijklmnopqrstuvwxyz0123456789é
43
44
45 # This automatically omits words that appear too often in the files
46 # (these words are called stopwords). Specify a whole percentage
47 # and a number, such as "80 256". This omits words that occur in
48 # over 80% of the files and appear in over 256 files. Comment out
49 # to turn of auto-stopwording.
50 IgnoreLimit 90 500
51
52 # This option allows the user decide if to index the comments in the files
53 # default is 1. Set to 0 if comment indexing is not required.
54 IndexComments 0
55
56 #TranslateCharacters string1 string2
57 # This option allows to index the characters in string1 to be indexed
58 # as the characteres in string2.
59 # This is done after htnl entities are converted
60 # This option is useful in languages like spanish, french, ...
61 # eg:
62 # TranslateCharacters _á -a
63 # This will index a_b as a-b and ámo as amo
64 TranslateCharacters ¹©ðÐèÈæƾ® ssddcccczz
65
66 ################################
67 # DIRECTIVES for HTTP METHOD ONLY
68 # Comment out if using FILESYSTEM
69 ##################################
70
71 MaxDepth 0
72 #(default 5) This defines how many links the spider should
73 #follow before stopping. A value of 0 configures the spider to
74 #traverse all links
75
76 Delay 0
77 #(default 60) The number of seconds to wait between issuing
78 #requests to a server.
79
80 SpiderDirectory /data/swish/spider
81
82 #EquivalentServer http://portal.pliva.hr http://www.pliva.hr
83 #(default nothing) This allows you to deal with
84 #servers that use respond to multiple DNS names. Each line should have
85 #a list of all the method/names that should be considered equivalent.
86 #If you have multiple directives, each one defines its own set of equivalent
87 #servers.
88
89 # documents which are html files
90 IndexContents HTML .htm .html .php .pl
91
92 # text documents
93 IndexContents TXT .txt .log .text
94
95 # default
96 DefaultContents HTML
97
98 # use 100 chars from html
99 StoreDescription HTML <body> 300
100 StoreDescription TXT 300
101
102
103 # index pdf
104 IndexContents HTML .pdf
105 FileFilter .pdf pdftotext "-htmlmeta '%p' -"
106
107 # various other
108 FileFilter .gz gzip "-cd '%p'"
109 FileFilter .bz2 bzip2 "-cd '%p'"
110
111 # debug
112 ParserWarnLevel 3
113 IndexReport 2
114

Properties

Name Value
cvs2svn:cvs-rev 1.1

  ViewVC Help
Powered by ViewVC 1.1.26