/[swish]/trunk/spider/filter.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/spider/filter.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 99 by dpavlin, Mon Aug 30 11:14:24 2004 UTC revision 100 by dpavlin, Sat Apr 30 20:21:02 2005 UTC
# Line 66  sub filter { Line 66  sub filter {
66          # construct new title (from various parts of DocBook if available)          # construct new title (from various parts of DocBook if available)
67          my $new_title;          my $new_title;
68    
69            sub create_title($) {
70                    my $contents = shift || return;
71                    my $new_title = substr($contents, 0, 4096);
72                    $new_title =~ s/<[^>]+>//gis;
73                    $new_title =~ s/^\s+//s;
74                    $new_title =~ s/^(.{50}.*?)[\n\r].+$/$1/s;
75                    $new_title =~ s/\s\s+/ /gis;
76                    print STDERR "using title '$new_title' from first lines in document\n" if ($verbose);
77                    return $new_title;
78            }
79    
80          if ($contents =~ m,<!--SafTocEntry="([^"]+)"-->,is) {          if ($contents =~ m,<!--SafTocEntry="([^"]+)"-->,is) {
81                  $new_title = $1;                  $new_title = $1;
82                  print STDERR "using title '$new_title' from <!--SafTocEntry-->\n" if ($verbose);                  print STDERR "using title '$new_title' from <!--SafTocEntry-->\n" if ($verbose);
# Line 94  sub filter { Line 105  sub filter {
105                  } elsif ($contents =~ m,<h\d[^>]*>([^<]+)</h\d>,is) {                  } elsif ($contents =~ m,<h\d[^>]*>([^<]+)</h\d>,is) {
106                          $new_title = $1;                          $new_title = $1;
107                          print STDERR "using title '$new_title' from <h_>\n" if ($verbose);                          print STDERR "using title '$new_title' from <h_>\n" if ($verbose);
108                    } else {
109                            $new_title = create_title($contents);
110                  }                  }
111          }          }
112    
# Line 115  sub filter { Line 128  sub filter {
128          } else {          } else {
129                  # fall-back to collection title                  # fall-back to collection title
130                  $new_title = $collection;                  $new_title = $collection;
131                    my $tmp = create_title($ontents);
132                    $new_title .= " :: $tmp" if ($tmp);
133          }          }
134    
135          $new_title =~ s/\s\s+/ /g;          $new_title =~ s/\s\s+/ /g;

Legend:
Removed from v.99  
changed lines
  Added in v.100

  ViewVC Help
Powered by ViewVC 1.1.26