/[swish]/trunk/make_config.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/make_config.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 45 - (hide annotations)
Wed Nov 19 12:07:07 2003 UTC (20 years, 5 months ago) by dpavlin
File MIME type: text/plain
File size: 3555 byte(s)
fixes and improvements

1 dpavlin 32 #!/usr/bin/perl -w
2     #
3     # create configuration for my version of swish crawler and web interface
4     #
5     # 2003-04-26 Dobrica Pavlinusic <dpavlin@rot13.org>
6     #
7    
8     use strict;
9 dpavlin 45 use Getopt::Long;
10 dpavlin 32
11 dpavlin 45 my $stripurl;
12    
13     my $result = GetOptions ("stripurl=s" => \$stripurl);
14    
15 dpavlin 33 my $name = shift @ARGV;
16     my $url = shift @ARGV;
17 dpavlin 34 my $title = join(" ",@ARGV);
18 dpavlin 32
19     if (!$name || !$url) {
20 dpavlin 45 print "Usage: $0 [--stripurl=path] name URL title\n";
21 dpavlin 32 exit;
22     }
23    
24     my $dir=$0; $dir=~s/\/[^\/]+$//;
25    
26     # try to deduce working directory if script is not called with absolute path
27     if ($dir !~ m#^/#) {
28     chomp($dir = `pwd`);
29     }
30    
31     my ($host,$urlpath);
32    
33 dpavlin 33 if ($url =~ m#(http://[^/]+)(/.*)$#) {
34 dpavlin 32 ($host,$urlpath) = ($1,$2);
35 dpavlin 45 if ($stripurl && $url =~ m/^(.*$stripurl)/) {
36 dpavlin 32 $stripurl = $1;
37     } else {
38 dpavlin 45 $stripurl = $host;
39 dpavlin 32 }
40     } else {
41     print "Can't parse URL $url into host and path!\n";
42     exit 1;
43     }
44    
45     my $baseurl = $url;
46     $baseurl =~ s#/[^/]+$#/#g;
47    
48     print "Config name: $name\nCrawling url: $url [strip $stripurl]\n";
49 dpavlin 34 print "Title: $title\n" if ($title);
50 dpavlin 32
51    
52     my $config = <<"EOF";
53    
54     ###################################################
55     IncludeConfigFile $dir/common.config
56    
57     # this is a cludge to implement no parent URL feature in swish indexer
58 dpavlin 40 IndexDir "$baseurl $url"
59     ReplaceRules replace "$baseurl " ""
60 dpavlin 45 ReplaceRules replace "${stripurl}" ""
61 dpavlin 32
62     IndexFile $dir/index/$name
63    
64     StoreDescription HTML <body> 500
65    
66     # not very usefull, but...
67     MetaNames keywords description
68    
69     # store <title>
70     PropertyNameAlias swishtitle title
71    
72     EOF
73    
74     my $xml = << "EOF";
75     <config
76     max_hits="1000"
77     prog="/usr/bin/swish-e"
78     index="$dir/index/$name"
79     charset="iso-8859-2"
80     affix="/usr/lib/ispell/american.aff"
81 dpavlin 34 EOF
82     if ($title) {
83     $xml .= "\ttitle=\"$title\"\n";
84     } else {
85     $xml .= "\ttitle=\"$name search\"\n";
86     }
87 dpavlin 45
88     my $xml_title = $title || $url;
89    
90     my $xml_url = $url;
91     $xml_url =~ s/^$host//;
92     my $xml_urlprefix = $stripurl;
93     $xml_urlprefix =~ s/^$host//;
94    
95     $xml .= "\turl=\"$xml_urlprefix\"\n" if ($stripurl);
96 dpavlin 34 $xml .= << "EOF";
97 dpavlin 32 >
98     <labels>
99     <label value="10"> 10</label>
100     <label value="100"> 100</label>
101     <label value="0">unlimited</label>
102     </labels>
103     <text>
104     <search>Search for </search>
105     <documents> documents containing words: </documents>
106     <submit>Search</submit>
107     <no_spell>don't use alternative spellings </no_spell>
108     <no_properties>don't display results details</no_properties>
109     <hits>Showing %s documents (of maximum %d)... (%s)</hits>
110     <no_hits>Can't find any documents (%s, %s)</no_hits>
111 dpavlin 44 <footer><![CDATA[
112     Searcher will try to create different variations of words
113     using spelling dictionary. If you enter a word with minus
114     (<tt>-</tt>) before it, it will exclude documents with this
115     word and plus (<tt>+</tt>) will do the opposite
116     (e.g. <tt>-work +play</tt>)
117 dpavlin 45 <p><small>Content indexed:
118     <a href="$xml_url">$xml_title</small></p>
119 dpavlin 44 ]]></footer>
120 dpavlin 32 </text>
121     <!-- additional data to show summary -->
122     <properties>swishdescription swishdocsize</properties>
123     <hit><![CDATA[ <a href="%s">%s</a> [%s]<br>
124     <font color=gray size=-2>%s ... <i>%s bytes</i></font><br>
125     ]]></hit>
126    
127     </config>
128     EOF
129    
130     print "Creating $dir/$name.config\n";
131     open(C,"> $dir/$name.config") || die "can't open $dir/$name.config: $!";
132     print C $config;
133     close(C);
134    
135     print "Creating $dir/html/$name.xml\n";
136     open(C, "> $dir/html/$name.xml") || die "can't open $dir/html/$name.xml: $!";
137     print C $xml;
138     close(C);
139    
140     print "Creating symlink to $dir/html/$name.cgi\n";
141     symlink "$dir/html/swish.cgi","$dir/html/$name.cgi" || die "can't create symlink $dir/html/$name.cgi: $!";
142    
143     #print "Index this with:\nswish-e -S http -c $dir/$name.config\n";
144     #print "Search using $name.cgi\n";

Properties

Name Value
cvs2svn:cvs-rev 1.6
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26