/[wait]/cvs-head/lib/WAIT/Parse/Ora.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /cvs-head/lib/WAIT/Parse/Ora.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 68 by laperla, Wed Jan 23 12:22:54 2002 UTC revision 69 by laperla, Fri Jan 25 07:27:30 2002 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl  #!/usr/bin/perl
2  #                              -*- Mode: Perl -*-  #                              -*- Mode: Perl -*-
3  # $Basename: HTML.pm $  # $Basename: HTML.pm $
4  # $Revision: 1.4 $  # $Revision: 1.5 $
5  # Author          : Ulrich Pfeifer with Andreas König  # Author          : Ulrich Pfeifer with Andreas König
6  # Created On      : Sat Nov 1 1997  # Created On      : Sat Nov 1 1997
7  # Last Modified By: Ulrich Pfeifer  # Last Modified By: Ulrich Pfeifer
# Line 16  Line 16 
16    
17  package WAIT::Parse::Ora;  package WAIT::Parse::Ora;
18  use HTML::Parser;  use HTML::Parser;
19    use Encode;
20  use strict;  use strict;
21  use vars qw(@ISA);  use vars qw(@ISA);
22  @ISA = qw(WAIT::Parse::Base);  @ISA = qw(WAIT::Parse::Base);
# Line 66  sub handle_text { Line 67  sub handle_text {
67    $text .= $_[0] if $open;    $text .= $_[0] if $open;
68  }  }
69    
70    sub my_parse ($) {
71      my($s) = @_;
72      my $ls = Encode::encode("ISO-8859-1", $s, 1); # HTML::Parser returns
73                                                    # LATIN for entities
74                                                    # and we would get
75                                                    # mixed content in
76                                                    # result
77      $p->parse($ls);
78      $p->eof;
79    }
80    
81  sub split {  sub split {
82    my ($self, $doc) = @_;    my ($self, $doc) = @_;
83    my %doc = ( isbn => '', author => '', about => '', colophon => '' );    my %doc = ( isbn => '', author => '', about => '', colophon => '' );
# Line 77  sub split { Line 89  sub split {
89      %result = ();      %result = ();
90      $text = '';      $text = '';
91      $open = 0;      $open = 0;
92      $p->parse($doc->{author});      my_parse($doc->{author});
     $p->eof;  
93      $doc{author} = $result{title};      $doc{author} = $result{title};
94      $doc{author} =~ s/^By\s+//;      $doc{author} =~ s/^By\s+//;
95      $doc{about}  = $result{text};      $doc{about}  = $result{text};
# Line 90  sub split { Line 101  sub split {
101      %result = ();      %result = ();
102      $text = '';      $text = '';
103      $open = 0;      $open = 0;
104      $p->parse($doc->{colophon});      my_parse($doc->{colophon});
     $p->eof;  
105      $doc{colophon} = $result{text};      $doc{colophon} = $result{text};
106    }    }
107    %result = ();    %result = ();
108    $text = '';    $text = '';
109    $open = 0;    $open = 0;
110    
111    $p->parse($doc->{desc});    my_parse($doc->{desc});
   $p->eof;  
112    
113    $doc{text}  = $result{text};    $doc{text}  = $result{text};
114    $doc{title} = $result{title};    $doc{title} = $result{title};
115    
116      while (my($k,$v) = each %doc) {
117        my $utf8v = Encode::decode("ISO-8859-1",$v);
118        $doc{$k} = $utf8v;
119      }
120    
121    return \%doc;    return \%doc;
122  }  }
123    

Legend:
Removed from v.68  
changed lines
  Added in v.69

  ViewVC Help
Powered by ViewVC 1.1.26