1 |
#!/usr/bin/perl |
2 |
# -*- Mode: Perl -*- |
3 |
# $Basename: HTML.pm $ |
4 |
# $Revision: 1.1 $ |
5 |
# Author : Ulrich Pfeifer with Andreas König |
6 |
# Created On : Sat Nov 1 1997 |
7 |
# Last Modified By: Ulrich Pfeifer |
8 |
# Last Modified On: Mon Dec 31 14:51:55 2001 |
9 |
# Language : CPerl |
10 |
# Update Count : 7 |
11 |
# Status : Unknown, Use with caution! |
12 |
# |
13 |
# (C) Copyright 1997, Ulrich Pfeifer, all rights reserved. |
14 |
# |
15 |
# |
16 |
|
17 |
package WAIT::Parse::Ora; |
18 |
use vars qw(@ISA); |
19 |
require HTML::Parse; |
20 |
require HTML::FormatText; |
21 |
use HTML::Entities qw(decode_entities); |
22 |
@ISA = qw(WAIT::Parse::Base); |
23 |
|
24 |
|
25 |
sub split { |
26 |
my ($self, $doc) = @_; |
27 |
|
28 |
my $desc = $doc->{desc}; |
29 |
my $auth = $doc->{author}; |
30 |
my ($title) = $desc =~ /<title\s*>(.*?)<\/title\s*>/si; |
31 |
my ($author) = $auth =~ /<title\s*>(.*?)<\/title\s*>/si; |
32 |
my $html = HTML::Parse::parse_html($desc); |
33 |
my $formatter = HTML::FormatText->new; |
34 |
|
35 |
{ |
36 |
'text', $formatter->format($html), |
37 |
'title', $formatter->format(HTML::Parse::parse_html($title)), |
38 |
'author', $formatter->format(HTML::Parse::parse_html($author)), |
39 |
}; |
40 |
} |
41 |
|
42 |
sub tag { |
43 |
my ($self, $doc) = @_; |
44 |
|
45 |
my $html_source = $doc->{desc}; |
46 |
$html_source =~ tr/\r/\n/; |
47 |
|
48 |
my ($pre,$title,$body) |
49 |
= $html_source =~ /^(.*?<title\s*>)(.*?)(<\/title\s*>.+)/si; |
50 |
|
51 |
( |
52 |
{'text' => 1}, decode_entities($pre), |
53 |
{'title' => 1}, decode_entities($title), |
54 |
{'text' => 1}, decode_entities($body), |
55 |
); |
56 |
} |