/[swish]/trunk/make_config.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/make_config.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 33 - (show annotations)
Sun May 4 01:31:31 2003 UTC (20 years, 11 months ago) by dpavlin
File MIME type: text/plain
File size: 3031 byte(s)
usage for "strip url" option, fix for indexing of whole host (without
path in URL argument)

1 #!/usr/bin/perl -w
2 #
3 # create configuration for my version of swish crawler and web interface
4 #
5 # 2003-04-26 Dobrica Pavlinusic <dpavlin@rot13.org>
6 #
7
8 use strict;
9
10 my $name = shift @ARGV;
11 my $url = shift @ARGV;
12 my $stripurl = shift @ARGV;
13
14 if (!$name || !$url) {
15 print "Usage: $0 name URL [strip from url]\n";
16 exit;
17 }
18
19 my $dir=$0; $dir=~s/\/[^\/]+$//;
20
21 # try to deduce working directory if script is not called with absolute path
22 if ($dir !~ m#^/#) {
23 chomp($dir = `pwd`);
24 }
25
26 my ($host,$urlpath);
27
28 if ($url =~ m#(http://[^/]+)(/.*)$#) {
29 ($host,$urlpath) = ($1,$2);
30 if ($stripurl && $urlpath =~ m/^(.*$stripurl)/) {
31 $stripurl = $1;
32 } else {
33 $stripurl = "";
34 }
35 } else {
36 print "Can't parse URL $url into host and path!\n";
37 exit 1;
38 }
39
40 my $baseurl = $url;
41 $baseurl =~ s#/[^/]+$#/#g;
42
43 print "Config name: $name\nCrawling url: $url [strip $stripurl]\n";
44
45
46 my $config = <<"EOF";
47
48 ###################################################
49 IncludeConfigFile $dir/common.config
50
51 # this is a cludge to implement no parent URL feature in swish indexer
52 IndexDir "$url $baseurl"
53 ReplaceRules replace " $baseurl" ""
54 ReplaceRules replace "${host}${stripurl}" ""
55
56 IndexFile $dir/index/$name
57
58 StoreDescription HTML <body> 500
59
60 # not very usefull, but...
61 MetaNames keywords description
62
63 # store <title>
64 PropertyNameAlias swishtitle title
65
66 EOF
67
68 my $xml = << "EOF";
69 <config
70 max_hits="1000"
71 prog="/usr/bin/swish-e"
72 index="$dir/index/$name"
73 charset="iso-8859-2"
74 title="$name search"
75 affix="/usr/lib/ispell/american.aff"
76 url="$stripurl"
77 >
78 <labels>
79 <label value="10"> 10</label>
80 <label value="100"> 100</label>
81 <label value="0">unlimited</label>
82 </labels>
83 <text>
84 <search>Search for </search>
85 <documents> documents containing words: </documents>
86 <submit>Search</submit>
87 <no_spell>don't use alternative spellings </no_spell>
88 <no_properties>don't display results details</no_properties>
89 <hits>Showing %s documents (of maximum %d)... (%s)</hits>
90 <no_hits>Can't find any documents (%s, %s)</no_hits>
91 <footer>Searcher will try to create different variations of words using spelling dictionary. If you enter a word with minus (-) before it, it will exclude documents with this word and plus (+) will do the opposite (e.g. -work +play)</footer>
92 </text>
93 <!-- additional data to show summary -->
94 <properties>swishdescription swishdocsize</properties>
95 <hit><![CDATA[ <a href="%s">%s</a> [%s]<br>
96 <font color=gray size=-2>%s ... <i>%s bytes</i></font><br>
97 ]]></hit>
98
99 </config>
100 EOF
101
102 print "Creating $dir/$name.config\n";
103 open(C,"> $dir/$name.config") || die "can't open $dir/$name.config: $!";
104 print C $config;
105 close(C);
106
107 print "Creating $dir/html/$name.xml\n";
108 open(C, "> $dir/html/$name.xml") || die "can't open $dir/html/$name.xml: $!";
109 print C $xml;
110 close(C);
111
112 print "Creating symlink to $dir/html/$name.cgi\n";
113 symlink "$dir/html/swish.cgi","$dir/html/$name.cgi" || die "can't create symlink $dir/html/$name.cgi: $!";
114
115 #print "Index this with:\nswish-e -S http -c $dir/$name.config\n";
116 #print "Search using $name.cgi\n";

Properties

Name Value
cvs2svn:cvs-rev 1.2
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26