/[wait]/trunk/lib/WAIT/Parse/HTML.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WAIT/Parse/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 117 by dpavlin, Mon May 24 13:44:01 2004 UTC revision 118 by dpavlin, Fri Jul 15 18:59:10 2005 UTC
# Line 25  use HTML::Entities qw(decode_entities); Line 25  use HTML::Entities qw(decode_entities);
25  sub split {  sub split {
26    my ($self, $html_source) = @_;    my ($self, $html_source) = @_;
27    
28    my ($title) = $html_source =~ /<title\s*>(.*?)<\/title\s*>/si;    my (undef,$title) = $html_source =~ /<(title|h1|h2|h3|h4)[^>]*>(.*?)<\/\1\s*>/si;
29    
30    my $html = HTML::Parse::parse_html($html_source);    my $html = HTML::Parse::parse_html($html_source);
31    my $formatter = HTML::FormatText->new;    my $formatter = HTML::FormatText->new;
32    
33    {    {
34     'text',  $formatter->format($html),     'text',  $formatter->format($html),
35     'title', $formatter->format(HTML::Parse::parse_html($title)),     'title', $title || 'no title',
36    };    };
37  }  }
38    
# Line 43  sub tag { Line 44  sub tag {
44    my ($pre,$title,$body)    my ($pre,$title,$body)
45        = $html_source =~ /^(.*?<title\s*>)(.*?)(<\/title\s*>.+)/si;        = $html_source =~ /^(.*?<title\s*>)(.*?)(<\/title\s*>.+)/si;
46    
47      $pre .= '';
48      $title .= '';
49      $body .= '';
50    
51    (    (
52     {'text'  => 1},  decode_entities($pre),     {'text'  => 1},  decode_entities($pre),
53     {'title' => 1},  decode_entities($title),     {'title' => 2},  decode_entities($title),
54     {'text'  => 1},  decode_entities($body),     {'text'  => 1},  decode_entities($body),
55    );    );
56  }  }
57    
58    1;

Legend:
Removed from v.117  
changed lines
  Added in v.118

  ViewVC Help
Powered by ViewVC 1.1.26