20 |
if ($url =~ m/\s/) { |
if ($url =~ m/\s/) { |
21 |
($no_parent_url,$url) = split(/\s/,$url,2); |
($no_parent_url,$url) = split(/\s/,$url,2); |
22 |
# old scheme had URL, no parent and new is reverse |
# old scheme had URL, no parent and new is reverse |
23 |
($url,$no_parent_url) = ($no_parent_url,$url) if ($no_parent_url =~ m/$url/); |
($url,$no_parent_url) = ($no_parent_url,$url) if ($no_parent_url =~ m/\Q$url\E/); |
24 |
} |
} |
25 |
|
|
26 |
my $request = new HTTP::Request( "GET", $url ); |
my $request = new HTTP::Request( "GET", $url ); |
66 |
# if you don't want content to be indexed, include it in |
# if you don't want content to be indexed, include it in |
67 |
# <noindex> foobar </noindex> tags or surround it with comments |
# <noindex> foobar </noindex> tags or surround it with comments |
68 |
# <!-- noindex --> foobar <!-- /noindex --> |
# <!-- noindex --> foobar <!-- /noindex --> |
69 |
|
# <!-- noindex --> foobar <!-- index --> (also supported by swish) |
70 |
$contents =~ s,<noindex>.+?</noindex>,,isg; |
$contents =~ s,<noindex>.+?</noindex>,,isg; |
71 |
$contents =~ s,<!--\s*noindex\s*-->.+?<!--\s*/noindex\s*-->,,isg; |
$contents =~ s,<!--\s*noindex\s*-->.+?<!--\s*/noindex\s*-->,,isg; |
72 |
|
$contents =~ s,<!--\s*noindex\s*-->.+?<!--\s*index\s*-->,,isg; |
73 |
# this will remove all script from indexing content |
# this will remove all script from indexing content |
74 |
$contents =~ s,<script>.+?</script>,,isg; |
$contents =~ s,<script>.+?</script>,,isg; |
75 |
# remap Windows charset to ISO-8859-2 |
# remap Windows charset to ISO-8859-2 |