--- trunk/spider/swishspider 2003/06/01 11:45:19 40
+++ trunk/spider/swishspider 2003/07/29 10:40:58 42
@@ -20,7 +20,7 @@
if ($url =~ m/\s/) {
($no_parent_url,$url) = split(/\s/,$url,2);
# old scheme had URL, no parent and new is reverse
- ($url,$no_parent_url) = ($no_parent_url,$url) if ($no_parent_url =~ m/$url/);
+ ($url,$no_parent_url) = ($no_parent_url,$url) if ($no_parent_url =~ m/\Q$url\E/);
}
my $request = new HTTP::Request( "GET", $url );
@@ -66,8 +66,10 @@
# if you don't want content to be indexed, include it in
# foobar tags or surround it with comments
# foobar
+ # foobar (also supported by swish)
$contents =~ s,.+?,,isg;
$contents =~ s,.+?,,isg;
+ $contents =~ s,.+?,,isg;
# this will remove all script from indexing content
$contents =~ s,,,isg;
# remap Windows charset to ISO-8859-2