/[swish]/trunk/make_config.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/make_config.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 53 - (show annotations)
Tue Jan 20 18:41:38 2004 UTC (16 years, 5 months ago) by dpavlin
File MIME type: text/plain
File size: 3555 byte(s)
configuration moved to config/ directory

1 #!/usr/bin/perl -w
2 #
3 # create configuration for my version of swish crawler and web interface
4 #
5 # 2003-04-26 Dobrica Pavlinusic <dpavlin@rot13.org>
6 #
7
8 use strict;
9 use Getopt::Long;
10
11 my $stripurl;
12
13 my $result = GetOptions ("stripurl=s" => \$stripurl);
14
15 my $name = shift @ARGV;
16 my $url = shift @ARGV;
17 my $title = join(" ",@ARGV);
18
19 if (!$name || !$url) {
20 print "Usage: $0 [--stripurl=path] name URL title\n";
21 exit;
22 }
23
24 my $dir=$0; $dir=~s/\/[^\/]+$//;
25
26 # try to deduce working directory if script is not called with absolute path
27 if ($dir !~ m#^/#) {
28 chomp($dir = `pwd`);
29 }
30
31 my ($host,$urlpath);
32
33 if ($url =~ m#(http://[^/]+)(/.*)$#) {
34 ($host,$urlpath) = ($1,$2);
35 if ($stripurl && $url =~ m/^(.*$stripurl)/) {
36 $stripurl = $1;
37 } else {
38 $stripurl = $host;
39 }
40 } else {
41 print "Can't parse URL $url into host and path!\n";
42 exit 1;
43 }
44
45 my $baseurl = $url;
46 $baseurl =~ s#/[^/]+$#/#g;
47
48 print "Config name: $name\nCrawling url: $url [strip $stripurl]\n";
49 print "Title: $title\n" if ($title);
50
51
52 my $config = <<"EOF";
53
54 ###################################################
55 IncludeConfigFile $dir/common.config
56
57 # this is a cludge to implement no parent URL feature in swish indexer
58 IndexDir "$baseurl $url"
59 ReplaceRules replace "$baseurl " ""
60 ReplaceRules replace "${stripurl}" ""
61
62 IndexFile $dir/index/$name
63
64 StoreDescription HTML <body> 500
65
66 # not very usefull, but...
67 MetaNames keywords description
68
69 # store <title>
70 PropertyNameAlias swishtitle title
71
72 EOF
73
74 my $xml = << "EOF";
75 <config
76 max_hits="1000"
77 prog="/usr/bin/swish-e"
78 index="$dir/index/$name"
79 charset="iso-8859-2"
80 affix="/usr/lib/ispell/american.aff"
81 EOF
82 if ($title) {
83 $xml .= "\ttitle=\"$title\"\n";
84 } else {
85 $xml .= "\ttitle=\"$name search\"\n";
86 }
87
88 my $xml_title = $title || $url;
89
90 my $xml_url = $url;
91 $xml_url =~ s/^$host//;
92 my $xml_urlprefix = $stripurl;
93 $xml_urlprefix =~ s/^$host//;
94
95 $xml .= "\turl=\"$xml_urlprefix\"\n" if ($stripurl);
96 $xml .= << "EOF";
97 >
98 <labels>
99 <label value="10"> 10</label>
100 <label value="100"> 100</label>
101 <label value="0">unlimited</label>
102 </labels>
103 <text>
104 <search>Search for </search>
105 <documents> documents containing words: </documents>
106 <submit>Search</submit>
107 <no_spell>don't use alternative spellings </no_spell>
108 <no_properties>don't display results details</no_properties>
109 <hits>Showing %s documents (of maximum %d)... (%s)</hits>
110 <no_hits>Can't find any documents (%s, %s)</no_hits>
111 <footer><![CDATA[
112 Searcher will try to create different variations of words
113 using spelling dictionary. If you enter a word with minus
114 (<tt>-</tt>) before it, it will exclude documents with this
115 word and plus (<tt>+</tt>) will do the opposite
116 (e.g. <tt>-work +play</tt>)
117 <p><small>Content indexed:
118 <a href="$xml_url">$xml_title</small></p>
119 ]]></footer>
120 </text>
121 <!-- additional data to show summary -->
122 <properties>swishdescription swishdocsize</properties>
123 <hit><![CDATA[ <a href="%s">%s</a> [%s]<br>
124 <font color=gray size=-2>%s ... <i>%s bytes</i></font><br>
125 ]]></hit>
126
127 </config>
128 EOF
129
130 print "Creating $dir/config/$name\n";
131 open(C,"> $dir/config/$name") || die "can't open $dir/config/$name: $!";
132 print C $config;
133 close(C);
134
135 print "Creating $dir/html/$name.xml\n";
136 open(C, "> $dir/html/$name.xml") || die "can't open $dir/html/$name.xml: $!";
137 print C $xml;
138 close(C);
139
140 print "Creating symlink to $dir/html/$name.cgi\n";
141 symlink "$dir/html/swish.cgi","$dir/html/$name.cgi" || die "can't create symlink $dir/html/$name.cgi: $!";
142
143 #print "Index this with:\nswish-e -S http -c $dir/config/$name\n";
144 #print "Search using $name.cgi\n";

Properties

Name Value
cvs2svn:cvs-rev 1.7
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26