--- trunk/spider/filter.pm 2004/01/17 23:57:55 46 +++ trunk/spider/filter.pm 2004/01/20 18:40:06 51 @@ -8,7 +8,7 @@ $contents =~ s,.+?,,isg; $contents =~ s,.+?,,isg; # this will remove all script from indexing content - $contents =~ s,,,isg; + $contents =~ s,]*>.+?,,isg; # remap Windows charset to ISO-8859-2 $contents =~ tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2 # this will fix badly formatted html in form: @@ -36,6 +36,7 @@ $contents =~ s,()([^<]+)(),$1$2: $title$3,gsi if ($title); } + return $contents; }