17 |
my $title = join(" ",@ARGV); |
my $title = join(" ",@ARGV); |
18 |
|
|
19 |
if (!$name || !$url) { |
if (!$name || !$url) { |
20 |
print "Usage: $0 [--stripurl=path] name URL title\n"; |
print "Usage: $0 [--stripurl=path] name URL|path title\n"; |
21 |
exit; |
exit; |
22 |
} |
} |
23 |
|
|
37 |
} else { |
} else { |
38 |
$stripurl = $host; |
$stripurl = $host; |
39 |
} |
} |
40 |
|
} elsif (-d $url && $url =~ m#.+?(/references/)#) { |
41 |
|
$stripurl = $1; |
42 |
|
$host = ""; |
43 |
} else { |
} else { |
44 |
print "Can't parse URL $url into host and path!\n"; |
print "Can't parse URL $url into host and path!\n"; |
45 |
exit 1; |
exit 1; |
48 |
my $baseurl = $url; |
my $baseurl = $url; |
49 |
$baseurl =~ s#/[^/]+$#/#g; |
$baseurl =~ s#/[^/]+$#/#g; |
50 |
|
|
51 |
print "Config name: $name\nCrawling url: $url [strip $stripurl]\n"; |
print "Config name: $name\nCrawling: $url [strip $stripurl]\n"; |
52 |
print "Title: $title\n" if ($title); |
print "Title: $title\n" if ($title); |
53 |
|
|
54 |
|
my $indexer_config = << "EOF"; |
|
my $config = <<"EOF"; |
|
|
|
|
|
################################################### |
|
55 |
IncludeConfigFile $dir/common.config |
IncludeConfigFile $dir/common.config |
56 |
|
|
57 |
# this is a cludge to implement no parent URL feature in swish indexer |
# this is a cludge to implement no parent URL feature in swish indexer |
58 |
IndexDir "$baseurl $url" |
IndexDir "$baseurl $url" |
59 |
ReplaceRules replace "$baseurl " "" |
ReplaceRules replace "$baseurl " "" |
60 |
ReplaceRules replace "${stripurl}" "" |
ReplaceRules replace "${stripurl}" "" |
61 |
|
EOF |
62 |
|
|
63 |
|
$indexer_config = << "EOF" if (-d $url); |
64 |
|
IncludeConfigFile $dir/common-progspider.config |
65 |
|
IndexDir $dir/spider/progspider |
66 |
|
SwishProgParameters $url |
67 |
|
ReplaceRules replace "/rest/references/" "/" |
68 |
|
EOF |
69 |
|
|
70 |
|
my $config = <<"EOF"; |
71 |
|
|
72 |
|
################################################### |
73 |
|
$indexer_config |
74 |
IndexFile $dir/index/$name |
IndexFile $dir/index/$name |
75 |
|
|
76 |
StoreDescription HTML <body> 500 |
StoreDescription HTML <body> 500 |