6 |
# |
# |
7 |
|
|
8 |
use strict; |
use strict; |
9 |
|
use Getopt::Long; |
10 |
|
|
11 |
my ($name,$url,$stripurl) = @ARGV; |
my $stripurl; |
12 |
|
|
13 |
|
my $result = GetOptions ("stripurl=s" => \$stripurl); |
14 |
|
|
15 |
|
my $name = shift @ARGV; |
16 |
|
my $url = shift @ARGV; |
17 |
|
my $title = join(" ",@ARGV); |
18 |
|
|
19 |
if (!$name || !$url) { |
if (!$name || !$url) { |
20 |
print "Usage: $0 name URL\n"; |
print "Usage: $0 [--stripurl=path] name URL title\n"; |
21 |
exit; |
exit; |
22 |
} |
} |
23 |
|
|
30 |
|
|
31 |
my ($host,$urlpath); |
my ($host,$urlpath); |
32 |
|
|
33 |
if ($url =~ m#(http://[^/]+)(/.+)$#) { |
if ($url =~ m#(http://[^/]+)(/.*)$#) { |
34 |
($host,$urlpath) = ($1,$2); |
($host,$urlpath) = ($1,$2); |
35 |
if ($stripurl && $urlpath =~ m/^(.*$stripurl)/) { |
if ($stripurl && $url =~ m/^(.*$stripurl)/) { |
36 |
$stripurl = $1; |
$stripurl = $1; |
37 |
} else { |
} else { |
38 |
$stripurl = ""; |
$stripurl = $host; |
39 |
} |
} |
40 |
} else { |
} else { |
41 |
print "Can't parse URL $url into host and path!\n"; |
print "Can't parse URL $url into host and path!\n"; |
46 |
$baseurl =~ s#/[^/]+$#/#g; |
$baseurl =~ s#/[^/]+$#/#g; |
47 |
|
|
48 |
print "Config name: $name\nCrawling url: $url [strip $stripurl]\n"; |
print "Config name: $name\nCrawling url: $url [strip $stripurl]\n"; |
49 |
|
print "Title: $title\n" if ($title); |
50 |
|
|
51 |
|
|
52 |
my $config = <<"EOF"; |
my $config = <<"EOF"; |
55 |
IncludeConfigFile $dir/common.config |
IncludeConfigFile $dir/common.config |
56 |
|
|
57 |
# this is a cludge to implement no parent URL feature in swish indexer |
# this is a cludge to implement no parent URL feature in swish indexer |
58 |
IndexDir "$url $baseurl" |
IndexDir "$baseurl $url" |
59 |
ReplaceRules replace " $baseurl" "" |
ReplaceRules replace "$baseurl " "" |
60 |
ReplaceRules replace "${host}${stripurl}" "" |
ReplaceRules replace "${stripurl}" "" |
61 |
|
|
62 |
IndexFile $dir/index/$name |
IndexFile $dir/index/$name |
63 |
|
|
77 |
prog="/usr/bin/swish-e" |
prog="/usr/bin/swish-e" |
78 |
index="$dir/index/$name" |
index="$dir/index/$name" |
79 |
charset="iso-8859-2" |
charset="iso-8859-2" |
|
title="$name search" |
|
80 |
affix="/usr/lib/ispell/american.aff" |
affix="/usr/lib/ispell/american.aff" |
81 |
url="$stripurl" |
EOF |
82 |
|
if ($title) { |
83 |
|
$xml .= "\ttitle=\"$title\"\n"; |
84 |
|
} else { |
85 |
|
$xml .= "\ttitle=\"$name search\"\n"; |
86 |
|
} |
87 |
|
|
88 |
|
my $xml_title = $title || $url; |
89 |
|
|
90 |
|
my $xml_url = $url; |
91 |
|
$xml_url =~ s/^$host//; |
92 |
|
my $xml_urlprefix = $stripurl; |
93 |
|
$xml_urlprefix =~ s/^$host//; |
94 |
|
|
95 |
|
$xml .= "\turl=\"$xml_urlprefix\"\n" if ($stripurl); |
96 |
|
$xml .= << "EOF"; |
97 |
> |
> |
98 |
<labels> |
<labels> |
99 |
<label value="10"> 10</label> |
<label value="10"> 10</label> |
108 |
<no_properties>don't display results details</no_properties> |
<no_properties>don't display results details</no_properties> |
109 |
<hits>Showing %s documents (of maximum %d)... (%s)</hits> |
<hits>Showing %s documents (of maximum %d)... (%s)</hits> |
110 |
<no_hits>Can't find any documents (%s, %s)</no_hits> |
<no_hits>Can't find any documents (%s, %s)</no_hits> |
111 |
<footer>Searcher will try to create different variations of words using spelling dictionary. If you enter a word with minus (-) before it, it will exclude documents with this word and plus (+) will do the opposite (e.g. -work +play)</footer> |
<footer><![CDATA[ |
112 |
|
Searcher will try to create different variations of words |
113 |
|
using spelling dictionary. If you enter a word with minus |
114 |
|
(<tt>-</tt>) before it, it will exclude documents with this |
115 |
|
word and plus (<tt>+</tt>) will do the opposite |
116 |
|
(e.g. <tt>-work +play</tt>) |
117 |
|
<p><small>Content indexed: |
118 |
|
<a href="$xml_url">$xml_title</small></p> |
119 |
|
]]></footer> |
120 |
</text> |
</text> |
121 |
<!-- additional data to show summary --> |
<!-- additional data to show summary --> |
122 |
<properties>swishdescription swishdocsize</properties> |
<properties>swishdescription swishdocsize</properties> |
127 |
</config> |
</config> |
128 |
EOF |
EOF |
129 |
|
|
130 |
print "Creating $dir/$name.config\n"; |
print "Creating $dir/config/$name\n"; |
131 |
open(C,"> $dir/$name.config") || die "can't open $dir/$name.config: $!"; |
open(C,"> $dir/config/$name") || die "can't open $dir/config/$name: $!"; |
132 |
print C $config; |
print C $config; |
133 |
close(C); |
close(C); |
134 |
|
|
140 |
print "Creating symlink to $dir/html/$name.cgi\n"; |
print "Creating symlink to $dir/html/$name.cgi\n"; |
141 |
symlink "$dir/html/swish.cgi","$dir/html/$name.cgi" || die "can't create symlink $dir/html/$name.cgi: $!"; |
symlink "$dir/html/swish.cgi","$dir/html/$name.cgi" || die "can't create symlink $dir/html/$name.cgi: $!"; |
142 |
|
|
143 |
#print "Index this with:\nswish-e -S http -c $dir/$name.config\n"; |
#print "Index this with:\nswish-e -S http -c $dir/config/$name\n"; |
144 |
#print "Search using $name.cgi\n"; |
#print "Search using $name.cgi\n"; |