/[SWISH-PlusPlus]/trunk/swish++/config.h
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/swish++/config.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 11 - (hide annotations)
Sun Dec 5 13:30:57 2004 UTC (19 years, 3 months ago) by dpavlin
File MIME type: text/plain
File size: 10308 byte(s)
support for meta in data, relaxed swish++ config, distribution cleanup

1 dpavlin 11 /*
2     ** SWISH++
3     ** config.h
4     **
5     ** Copyright (C) 1998 Paul J. Lucas
6     **
7     ** This program is free software; you can redistribute it and/or modify
8     ** it under the terms of the GNU General Public License as published by
9     ** the Free Software Foundation; either version 2 of the License, or
10     ** (at your option) any later version.
11     **
12     ** This program is distributed in the hope that it will be useful,
13     ** but WITHOUT ANY WARRANTY; without even the implied warranty of
14     ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15     ** GNU General Public License for more details.
16     **
17     ** You should have received a copy of the GNU General Public License
18     ** along with this program; if not, write to the Free Software
19     ** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20     */
21    
22     #ifndef config_H
23     #define config_H
24    
25     ////////// Word determination /////////////////////////////////////////////////
26    
27     int const Word_Hard_Min_Size = 1;
28     int const Word_Hard_Max_Size = 9999;
29     // The minimum and maximum lengths a word must be in order even to
30     // bother doing more aggressive checks on it to determine if it
31     // should be indexed.
32    
33     int const Word_Min_Size = 1;
34     // The minimum length a non-acronym word must be in order to be
35     // considered for indexing.
36    
37     int const Word_Min_Vowels = 0;
38     // The minimum number of vowels a word must have in order to be
39     // indexed.
40    
41     int const Word_Hex_Max_Size = 9999;
42     // The maximum length a string composed entirely of hexadecimal
43     // digits i.e., ASCII hex data, can be before it is discarded.
44     // Note that the word "cafe" is a legitimate English word composed
45     // entirely of hexedecimal digits. This parameter is used only by
46     // extract(1) in extract.c.
47    
48     // I don't think there is a word in English that has more than...
49    
50     int const Word_Max_Consec_Consonants = 9999;
51     // ...this many consecutive consonants (like "symphysis")
52    
53     int const Word_Max_Consec_Vowels = 9999;
54     // ...this many consecutive vowels (like "queueing")
55    
56     int const Word_Max_Consec_Same = 9999;
57     // ...this many of the same alphabetic character consecutively
58    
59     int const Word_Max_Consec_Puncts = 9999;
60     // ...this many punctuation character in a row
61    
62     // Characters that are permissible in words: letters must be lower case and
63     // upper case letters would be redundant.
64     //
65     char const Word_Chars[] = "&'-0123456789abcdefghijklmnopqrstuvwxyz_";
66     // Characters that may be in a word. Note that '&' is here so
67     // acronyms like "AT&T" are treated as one word. Unlike SWISH-E,
68     // ';' does not need to be here to recognize and convert character
69     // entity references.
70    
71     #define OPTIMIZE_WORD_CHARS 1
72     // If you are using the default set of characters, that is the
73     // alphanumerics and "&'-_" characters, then having this macro set
74     // to 1 will optimize the is_word_char() function yielding about a
75     // 10% performance improvement; alternatively, you can also edit
76     // that function to keep the optimization if you are not using the
77     // default set of characters. See word_util.h for details.
78    
79     char const Word_Begin_Chars[] = "0123456789abcdefghijklmnopqrstuvwxyz";
80     // Characters that may begin a word; should be a subset of the
81     // above.
82    
83     #define OPTIMIZE_WORD_BEGIN_CHARS 1
84     // Same deal as with OPTIMIZE_WORD_CHARS.
85    
86     char const Word_End_Chars[] = "0123456789abcdefghijklmnopqrstuvwxyz";
87     // Characters that may end a word; usually the same as the above.
88    
89     #define OPTIMIZE_WORD_END_CHARS 1
90     // Same deal as with OPTIMIZE_WORD_CHARS.
91    
92     #ifdef SEARCH_DAEMON
93     ////////// Search server daemon parameters ////////////////////////////////////
94    
95     char const SocketFile_Default[] = "/tmp/search.socket";
96     // Default name of the Unix domain socket file; this can be
97     // overridden either in a config. file or on the command line.
98    
99     int const SocketPort_Default = 1967;
100     // Default port number of the TCP socket; this can be overridden
101     // either in a config. file or on the command line.
102    
103     int const SocketQueueSize_Default = 511;
104     // Maximum number of queued connections for a socket. From
105     // [Stevens 1998], p. 96:
106     //
107     // Historically, sample code always shows a backlog of 5,
108     // as that was the maximum value supported by 4.2BSD.
109     // This was adequate in the 1980s when busy servers would
110     // handle only a few hundred connections per day. But
111     // with the growth of the World Wide Web (WWW), where busy
112     // servers handle millions of connections per day, this
113     // small number is completely inadequate. Busy HTTP
114     // servers must specify a much larger backlog, and newer
115     // kernels must support larger values.
116     //
117     // Unfortunately, Stevens doesn't say what a good value is. The
118     // default 511 value is taken from httpd.h in Apache:
119     //
120     // It defaults to 511 instead of 512 because some systems
121     // store it as an 8-bit datatype; 512 truncated to 8-bits
122     // is 0, while 511 is 255 when truncated.
123     //
124     // If it's good enough for Apache, it's good enough for us. This
125     // can be overridden either in a config. file or on the command
126     // line.
127    
128     int const SocketTimeout_Default = 10; // seconds
129     // The number of seconds a client has to complete a search request
130     // before being disconnected. This is to prevent a client from
131     // connecting, not completing a request, and causing the thread
132     // servicing the request to wait forever. This can be overridden
133     // either in a config. file or on the command line.
134    
135     int const ThreadsMin_Default = 5;
136     // The minimum number of simultanous threads; this can be
137     // overridden either in a config. file or on the command line.
138    
139     int const ThreadsMax_Default = 100;
140     // The maximum number of simultanous threads; this can be
141     // overridden either in a config. file or on the command line.
142    
143     int const ThreadTimeout_Default = 30; // seconds
144     // The number of seconds until an idle spare thread times out and
145     // destroys itself. This can be overridden either in a config.
146     // file or on the command line.
147    
148     char const User_Default[] = "nobody";
149     char const Group_Default[] = "nobody";
150     // The user and group to switch to after initialization (if root
151     // to begin with). This can be overridden either in a config.
152     // file or on the command line.
153     #endif
154    
155     ////////// Miscellaneous parameters ///////////////////////////////////////////
156    
157     char const ConfigFile_Default[] = "swish++.conf";
158     // Default name of the configuration file; this can be overridden
159     // on the command line.
160    
161     char const ExtractExtension_Default[] = "txt";
162     // Default extension to append to filenames during extraction.
163     // This can be overridden either in a config. file or on the
164     // command line.
165    
166     int const FilesGrow_Default = 100;
167     // Default number of files to grow reserved space for when
168     // incrementally indexing. This can be overridden either in a
169     // config. file or on the command line.
170    
171     int const FilesReserve_Default = 1000;
172     // Default maximum number of files to reserve space for; see
173     // file_info.c for details. This can be overridden either in a
174     // config. file or on the command line.
175    
176     int const Fork_Attempts = 5;
177     // Number of times to try to fork before giving up. This
178     // parameter is used only in filter.c.
179    
180     int const Fork_Sleep = 5; // seconds
181     // Number of seconds to sleep before retrying to fork. This
182     // parameter is used only in filter.c.
183    
184     char const IndexFile_Default[] = "swish++.index";
185     // Default name of the index file generated/searched; can be
186     // overridden either in a config. file or on the command line.
187    
188     int const ResultsMax_Default = 999999;
189     // Default maximum number of search results; this can be
190     // overridden either in a config. file or on the command line.
191    
192     char const ShellFilenameDelimChars[] = " \t&;<>|";
193     // Characters in a Unix shell command that delimit file names.
194     // Note that this says "file" (not "path") names.
195    
196     char const ShellFilenameEscapeChars[] = " !\"#$&'()*/;<>?[\\]^`{|}~";
197     // Characters in a file name that must be escaped when passed to a
198     // Unix shell. This is a superset of what are commonly referred
199     // to as "meta-characers" because the space and tab characters are
200     // included. Note again that this says "file" (not "path") name.
201    
202     #ifdef __CYGWIN__
203     char const TempDirectory_Default[] = "/temp";
204     #else
205    
206     char const TempDirectory_Default[] = "/tmp";
207     #endif
208     // Default directory to use for temporary files during indexing.
209     // If your OS mounts swap space via /tmp (e.g., Solaris), as
210     // indexing progresses and more files get created in /tmp, you
211     // will have less swap space, indexing will get slower, and you
212     // may run out of memory. If this is the case, you can either
213     // change this default here for all users (preferred) or override
214     // it either in a config. file or on the command line to use a
215     // directory on a real filesystem, i.e., one on a physical disk,
216     // e.g., /var/tmp on some OSs. The directory must exist.
217    
218     int const TitleLines_Default = 12;
219     // Specifies the maximum number of lines into a file for its
220     // "title" (whatever that means for a given file format); this can
221     // be overridden either in a config. file or on the command line.
222    
223     int const Title_Max_Size = 200;
224     // Maximum length of a file "title" (whatever that means for a
225     // given file format).
226    
227     #ifdef FEATURE_word_pos
228     int const WordsNear_Default = 10;
229     // The maximum number of words apart two words can be to be
230     // considered "near" each other; this can be overridden either in
231     // a config. file or on the command line.
232     #endif
233    
234     int const WordPercentMax_Default = 100;
235     // Default maximum percentage of files a word may occur in before
236     // it is discarded as being too frequent; this can be overridden
237     // either in a config. file or on the command line.
238    
239     int const WordThreshold_Default = 250000;
240     // The word count past which partial indicies are generated and
241     // merged since all the words are too big to fit into memory at
242     // the same time. If you index and your machine begins to swap
243     // like mad, lower this value. The above works OK in a 64MB
244     // machine. A rule of thumb is to add 250000 words for each
245     // additional 64MB of RAM you have. These numbers are for a SPARC
246     // machine running Solaris. Other machines running other
247     // operating systems use memory differently. You simply have to
248     // experiment. Only the super-user can increase this either in a
249     // config. file or on the command line.
250    
251     #endif /* config_H */
252     /* vim:set noet sw=8 ts=8: */

  ViewVC Help
Powered by ViewVC 1.1.26