--- trunk/spider/swishspider 2003/06/01 11:45:19 40 +++ trunk/spider/swishspider 2003/07/29 10:40:58 42 @@ -20,7 +20,7 @@ if ($url =~ m/\s/) { ($no_parent_url,$url) = split(/\s/,$url,2); # old scheme had URL, no parent and new is reverse - ($url,$no_parent_url) = ($no_parent_url,$url) if ($no_parent_url =~ m/$url/); + ($url,$no_parent_url) = ($no_parent_url,$url) if ($no_parent_url =~ m/\Q$url\E/); } my $request = new HTTP::Request( "GET", $url ); @@ -66,8 +66,10 @@ # if you don't want content to be indexed, include it in # foobar tags or surround it with comments # foobar + # foobar (also supported by swish) $contents =~ s,.+?,,isg; $contents =~ s,.+?,,isg; + $contents =~ s,.+?,,isg; # this will remove all script from indexing content $contents =~ s,,,isg; # remap Windows charset to ISO-8859-2