--- trunk/spider/filter.pm 2004/04/03 15:15:36 71 +++ trunk/spider/filter.pm 2004/04/07 12:54:21 74 @@ -58,21 +58,32 @@ if ($contents =~ m,,is) { $new_title = $1; - } elsif ($contents =~ m,<(h\d)\sclass="docPartTitle"[^>]*>(.+?)<\1>,is) { + print STDERR "using title '$new_title' from \n" if ($verbose); + } elsif ($contents =~ m,<(h\d)\s+class="docPartTitle"[^>]*>(.+?),is) { $new_title = $2; - } elsif ($contents =~ m,<(h\d)\sclass="docChapterTitle"[^>]*>(.+?)<\1>,is) { + print STDERR "using title '$new_title' from docPartTitle\n" if ($verbose); + } elsif ($contents =~ m,<(h\d)\s+class="docChapterTitle"[^>]*>(.+?),is) { $new_title = $2; - } elsif ($contents =~ m,<(h\d)\sclass="docSection1Title"[^>]*>(.+?)<\1>,is) { + print STDERR "using title '$new_title' from docChapterTitle\n" if ($verbose); + } elsif ($contents =~ m,<(h\d)\s+class="docSection1Title"[^>]*>(.+?),is) { $new_title = $2; - } elsif ($contents =~ m,<(h\d)\sclass="chapter"[^>]*>(.+?)<\1>,is) { + print STDERR "using title '$new_title' from docSection1Title\n" if ($verbose); + } elsif ($contents =~ m,<(h\d)\s+class="doc[^"]*Title"[^>]*>(.+?),is) { $new_title = $2; - } elsif ($contents =~ m,<(h\d)\sclass="sect1"[^>]*>(.+?)<\1>,is) { + print STDERR "using title '$new_title' from doc.+Title\n" if ($verbose); + } elsif ($contents =~ m,<(h\d)\s+class="chapter"[^>]*>(.+?),is) { $new_title = $2; + print STDERR "using title '$new_title' from chapter\n" if ($verbose); + } elsif ($contents =~ m,<(h\d)\s+class="sect1"[^>]*>(.+?),is) { + $new_title = $2; + print STDERR "using title '$new_title' from sect1\n" if ($verbose); } else { if ($contents =~ m,([^<]+),is) { $new_title = $1; + print STDERR "using title '$new_title' from \n" if ($verbose); } elsif ($contents =~ m,<h\d[^>]*>([^<]+)</h\d>,is) { $new_title = $1; + print STDERR "using title '$new_title' from <h_>\n" if ($verbose); } } @@ -87,6 +98,7 @@ $b =~ s/([^a-zA-Z])+/ /gs; if ($a =~ m/$b/i) { $new_title = $collection; + print STDERR "new_title and collection are same! [$new_title]\n" if ($verbose); } else { $new_title = "$collection :: $new_title"; }