--- trunk/spider/filter.pm 2004/04/03 15:15:36 71 +++ trunk/spider/filter.pm 2004/04/07 12:54:21 74 @@ -58,21 +58,32 @@ if ($contents =~ m,,is) { $new_title = $1; - } elsif ($contents =~ m,<(h\d)\sclass="docPartTitle"[^>]*>(.+?)<\1>,is) { + print STDERR "using title '$new_title' from \n" if ($verbose); + } elsif ($contents =~ m,<(h\d)\s+class="docPartTitle"[^>]*>(.+?)\1>,is) { $new_title = $2; - } elsif ($contents =~ m,<(h\d)\sclass="docChapterTitle"[^>]*>(.+?)<\1>,is) { + print STDERR "using title '$new_title' from docPartTitle\n" if ($verbose); + } elsif ($contents =~ m,<(h\d)\s+class="docChapterTitle"[^>]*>(.+?)\1>,is) { $new_title = $2; - } elsif ($contents =~ m,<(h\d)\sclass="docSection1Title"[^>]*>(.+?)<\1>,is) { + print STDERR "using title '$new_title' from docChapterTitle\n" if ($verbose); + } elsif ($contents =~ m,<(h\d)\s+class="docSection1Title"[^>]*>(.+?)\1>,is) { $new_title = $2; - } elsif ($contents =~ m,<(h\d)\sclass="chapter"[^>]*>(.+?)<\1>,is) { + print STDERR "using title '$new_title' from docSection1Title\n" if ($verbose); + } elsif ($contents =~ m,<(h\d)\s+class="doc[^"]*Title"[^>]*>(.+?)\1>,is) { $new_title = $2; - } elsif ($contents =~ m,<(h\d)\sclass="sect1"[^>]*>(.+?)<\1>,is) { + print STDERR "using title '$new_title' from doc.+Title\n" if ($verbose); + } elsif ($contents =~ m,<(h\d)\s+class="chapter"[^>]*>(.+?)\1>,is) { $new_title = $2; + print STDERR "using title '$new_title' from chapter\n" if ($verbose); + } elsif ($contents =~ m,<(h\d)\s+class="sect1"[^>]*>(.+?)\1>,is) { + $new_title = $2; + print STDERR "using title '$new_title' from sect1\n" if ($verbose); } else { if ($contents =~ m,