1 |
#!/usr/bin/perl |
2 |
# -*- Mode: Perl -*- |
3 |
# $Basename: HTML.pm $ |
4 |
# $Revision: 1.2 $ |
5 |
# Author : Ulrich Pfeifer with Andreas König |
6 |
# Created On : Sat Nov 1 1997 |
7 |
# Last Modified By: Ulrich Pfeifer |
8 |
# Last Modified On: Wed Nov 5 16:48:17 1997 |
9 |
# Language : CPerl |
10 |
# Update Count : 1 |
11 |
# Status : Unknown, Use with caution! |
12 |
# |
13 |
# (C) Copyright 1997, Ulrich Pfeifer, all rights reserved. |
14 |
# |
15 |
# |
16 |
|
17 |
package WAIT::Parse::HTML; |
18 |
use vars qw(@ISA); |
19 |
require HTML::Parse; |
20 |
require HTML::FormatText; |
21 |
use HTML::Entities qw(decode_entities); |
22 |
@ISA = qw(WAIT::Parse::Base); |
23 |
|
24 |
|
25 |
sub split { |
26 |
my ($self, $html_source) = @_; |
27 |
|
28 |
my ($title) = $html_source =~ /<title\s*>(.*?)<\/title\s*>/si; |
29 |
my $html = HTML::Parse::parse_html($html_source); |
30 |
my $formatter = HTML::FormatText->new; |
31 |
|
32 |
{ |
33 |
'text', $formatter->format($html), |
34 |
'title', $formatter->format(HTML::Parse::parse_html($title)), |
35 |
}; |
36 |
} |
37 |
|
38 |
sub tag { |
39 |
my ($self, $html_source) = @_; |
40 |
|
41 |
$html_source =~ tr/\r/\n/; |
42 |
|
43 |
my ($pre,$title,$body) |
44 |
= $html_source =~ /^(.*?<title\s*>)(.*?)(<\/title\s*>.+)/si; |
45 |
|
46 |
( |
47 |
{'text' => 1}, decode_entities($pre), |
48 |
{'title' => 1}, decode_entities($title), |
49 |
{'text' => 1}, decode_entities($body), |
50 |
); |
51 |
} |