8 |
$contents =~ s,<!--\s*noindex\s*-->.+?<!--\s*/noindex\s*-->,,isg; |
$contents =~ s,<!--\s*noindex\s*-->.+?<!--\s*/noindex\s*-->,,isg; |
9 |
$contents =~ s,<!--\s*noindex\s*-->.+?<!--\s*index\s*-->,,isg; |
$contents =~ s,<!--\s*noindex\s*-->.+?<!--\s*index\s*-->,,isg; |
10 |
# this will remove all script from indexing content |
# this will remove all script from indexing content |
11 |
$contents =~ s,<script>.+?</script>,,isg; |
$contents =~ s,<script[^>]*>.+?</script>,,isg; |
12 |
# remap Windows charset to ISO-8859-2 |
# remap Windows charset to ISO-8859-2 |
13 |
$contents =~ tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2 |
$contents =~ tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2 |
14 |
# this will fix badly formatted html in form: |
# this will fix badly formatted html in form: |
36 |
$contents =~ s,(<title>)([^<]+)(</title>),$1$2: $title$3,gsi if ($title); |
$contents =~ s,(<title>)([^<]+)(</title>),$1$2: $title$3,gsi if ($title); |
37 |
|
|
38 |
} |
} |
39 |
|
|
40 |
return $contents; |
return $contents; |
41 |
} |
} |
42 |
|
|