--- trunk/spider/filter.pm 2004/03/19 09:46:33 70 +++ trunk/spider/filter.pm 2004/04/03 15:15:36 71 @@ -22,6 +22,16 @@ # remove comments between and texi2html inserts them # there and swish can't find document title then (libxml or swish bug?) while ($contents =~ s/(.*)(.*)/$1$2/msi) { }; + + # remove empty lines before/after + $contents =~ s/^\s+()/$1/is; + $contents =~ s/(<\/html>)\s+$/$1/is; + # remove cr + $contents =~ s/\r//gs; + + # remove SQL Magazine header and footer + $contents =~ s/.+?/<\/table>/is; + $contents =~ s/.+?<\/table>/<\/table>/is; # remote TPJ left column if ($contents =~ s,.+?,,isg) {