/[wait]/cvs-head/lib/WAIT/Parse/Ora.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /cvs-head/lib/WAIT/Parse/Ora.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 61 - (hide annotations)
Fri Jan 4 15:11:26 2002 UTC (22 years, 5 months ago) by ulpfr
File size: 2134 byte(s)
Look for the ISBN in index.html

1 ulpfr 54 #!/usr/bin/perl
2     # -*- Mode: Perl -*-
3     # $Basename: HTML.pm $
4 ulpfr 61 # $Revision: 1.3 $
5 ulpfr 54 # Author : Ulrich Pfeifer with Andreas König
6     # Created On : Sat Nov 1 1997
7     # Last Modified By: Ulrich Pfeifer
8 ulpfr 61 # Last Modified On: Fri Jan 4 16:06:14 2002
9 ulpfr 54 # Language : CPerl
10 ulpfr 61 # Update Count : 14
11 ulpfr 54 # Status : Unknown, Use with caution!
12     #
13     # (C) Copyright 1997, Ulrich Pfeifer, all rights reserved.
14     #
15     #
16    
17     package WAIT::Parse::Ora;
18 ulpfr 58 use HTML::Parser;
19     use strict;
20 ulpfr 54 use vars qw(@ISA);
21     @ISA = qw(WAIT::Parse::Base);
22    
23 ulpfr 58 my $debug = 0;
24     my %text = (
25     p => 'text',
26     # h1 => 'text',
27     # h2 => 'text',
28     # h3 => 'text',
29     title => 'title',
30     );
31 ulpfr 54
32 ulpfr 58 my $p = HTML::Parser->new(
33     api_version => 3,
34     start_h => [\&handle_start, "tagname, attr"],
35     end_h => [\&handle_end, "tagname"],
36     text_h => [\&handle_text, "dtext"],
37     marked_sections => 1,
38     );
39     my %result;
40     my $text;
41     my $open;
42    
43     sub handle_start {
44     my $tag = shift;
45    
46     return unless $text{$tag};
47     $open++;
48     print ">" x $open, $tag, "\n" if $debug;
49     }
50    
51     sub handle_end {
52     my $tag = shift;
53    
54     return unless $text{$tag};
55     print "<" x $open, $tag, "\n" if $debug;
56     $open--;
57     $text =~ s/^\s+//;
58     $text =~ s/\s+$//;
59     $text =~ s/\s+/ /g;
60     $result{$text{$tag}} .= $text . ' ';
61     $text = '';
62     }
63    
64    
65     sub handle_text {
66     $text .= $_[0] if $open;
67     }
68    
69 ulpfr 54 sub split {
70     my ($self, $doc) = @_;
71 ulpfr 61 my %doc = ( isbn => '', author => '', about => '' );
72 ulpfr 54 my $desc = $doc->{desc};
73     my $auth = $doc->{author};
74    
75 ulpfr 61 if ($doc->{author}) {
76     %result = ();
77     $text = '';
78     $open = 0;
79     $p->parse($doc->{author});
80     $p->eof;
81     $doc{author} = $result{title};
82     $doc{about} = $result{text};
83     }
84     if ($doc->{index}) {
85     $doc->{index} =~ /ISBN\s*([-\d]+)/ and $doc{isbn} = $1;
86     }
87 ulpfr 58 %result = ();
88     $text = '';
89     $open = 0;
90 ulpfr 54
91 ulpfr 58 $p->parse($doc->{desc});
92     $p->eof;
93 ulpfr 54
94 ulpfr 58 $doc{text} = $result{text};
95     $doc{title} = $result{title};
96    
97     return \%doc;
98 ulpfr 54 }
99 ulpfr 58
100     1;

Properties

Name Value
cvs2svn:cvs-rev 1.3

  ViewVC Help
Powered by ViewVC 1.1.26