/[swish]/trunk/make_config.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/make_config.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 44 - (hide annotations)
Mon Aug 4 16:41:14 2003 UTC (20 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 3380 byte(s)
added some html and URI of indexed content

1 dpavlin 32 #!/usr/bin/perl -w
2     #
3     # create configuration for my version of swish crawler and web interface
4     #
5     # 2003-04-26 Dobrica Pavlinusic <dpavlin@rot13.org>
6     #
7    
8     use strict;
9    
10 dpavlin 33 my $name = shift @ARGV;
11     my $url = shift @ARGV;
12 dpavlin 34 #my $stripurl = shift @ARGV;
13     my $stripurl;
14     my $title = join(" ",@ARGV);
15 dpavlin 32
16     if (!$name || !$url) {
17 dpavlin 34 print "Usage: $0 name URL [strip from url] [optional title]\n";
18 dpavlin 32 exit;
19     }
20    
21     my $dir=$0; $dir=~s/\/[^\/]+$//;
22    
23     # try to deduce working directory if script is not called with absolute path
24     if ($dir !~ m#^/#) {
25     chomp($dir = `pwd`);
26     }
27    
28     my ($host,$urlpath);
29    
30 dpavlin 33 if ($url =~ m#(http://[^/]+)(/.*)$#) {
31 dpavlin 32 ($host,$urlpath) = ($1,$2);
32     if ($stripurl && $urlpath =~ m/^(.*$stripurl)/) {
33     $stripurl = $1;
34     } else {
35     $stripurl = "";
36     }
37     } else {
38     print "Can't parse URL $url into host and path!\n";
39     exit 1;
40     }
41    
42     my $baseurl = $url;
43     $baseurl =~ s#/[^/]+$#/#g;
44    
45     print "Config name: $name\nCrawling url: $url [strip $stripurl]\n";
46 dpavlin 34 print "Title: $title\n" if ($title);
47 dpavlin 32
48    
49     my $config = <<"EOF";
50    
51     ###################################################
52     IncludeConfigFile $dir/common.config
53    
54     # this is a cludge to implement no parent URL feature in swish indexer
55 dpavlin 40 IndexDir "$baseurl $url"
56     ReplaceRules replace "$baseurl " ""
57 dpavlin 32 ReplaceRules replace "${host}${stripurl}" ""
58    
59     IndexFile $dir/index/$name
60    
61     StoreDescription HTML <body> 500
62    
63     # not very usefull, but...
64     MetaNames keywords description
65    
66     # store <title>
67     PropertyNameAlias swishtitle title
68    
69     EOF
70    
71     my $xml = << "EOF";
72     <config
73     max_hits="1000"
74     prog="/usr/bin/swish-e"
75     index="$dir/index/$name"
76     charset="iso-8859-2"
77     affix="/usr/lib/ispell/american.aff"
78 dpavlin 34 EOF
79     if ($title) {
80     $xml .= "\ttitle=\"$title\"\n";
81     } else {
82     $xml .= "\ttitle=\"$name search\"\n";
83     }
84     $xml .= "\turl=\"$stripurl\"\n" if ($stripurl);
85     $xml .= << "EOF";
86 dpavlin 32 >
87     <labels>
88     <label value="10"> 10</label>
89     <label value="100"> 100</label>
90     <label value="0">unlimited</label>
91     </labels>
92     <text>
93     <search>Search for </search>
94     <documents> documents containing words: </documents>
95     <submit>Search</submit>
96     <no_spell>don't use alternative spellings </no_spell>
97     <no_properties>don't display results details</no_properties>
98     <hits>Showing %s documents (of maximum %d)... (%s)</hits>
99     <no_hits>Can't find any documents (%s, %s)</no_hits>
100 dpavlin 44 <footer><![CDATA[
101     Searcher will try to create different variations of words
102     using spelling dictionary. If you enter a word with minus
103     (<tt>-</tt>) before it, it will exclude documents with this
104     word and plus (<tt>+</tt>) will do the opposite
105     (e.g. <tt>-work +play</tt>)
106     <p><small>URI of indexed content:
107     <a href="$url">$url</small></p>
108     ]]></footer>
109 dpavlin 32 </text>
110     <!-- additional data to show summary -->
111     <properties>swishdescription swishdocsize</properties>
112     <hit><![CDATA[ <a href="%s">%s</a> [%s]<br>
113     <font color=gray size=-2>%s ... <i>%s bytes</i></font><br>
114     ]]></hit>
115    
116     </config>
117     EOF
118    
119     print "Creating $dir/$name.config\n";
120     open(C,"> $dir/$name.config") || die "can't open $dir/$name.config: $!";
121     print C $config;
122     close(C);
123    
124     print "Creating $dir/html/$name.xml\n";
125     open(C, "> $dir/html/$name.xml") || die "can't open $dir/html/$name.xml: $!";
126     print C $xml;
127     close(C);
128    
129     print "Creating symlink to $dir/html/$name.cgi\n";
130     symlink "$dir/html/swish.cgi","$dir/html/$name.cgi" || die "can't create symlink $dir/html/$name.cgi: $!";
131    
132     #print "Index this with:\nswish-e -S http -c $dir/$name.config\n";
133     #print "Search using $name.cgi\n";

Properties

Name Value
cvs2svn:cvs-rev 1.5
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26