1 |
# -*- Mode: Cperl -*- |
2 |
# Nroff.pm -- |
3 |
# ITIID : $ITI$ $Header $__Header$ |
4 |
# Author : Ulrich Pfeifer |
5 |
# Created On : Mon Sep 16 19:04:37 1996 |
6 |
# Last Modified By: Ulrich Pfeifer |
7 |
# Last Modified On: Fri Jan 4 15:56:11 2002 |
8 |
# Language : CPerl |
9 |
# Update Count : 86 |
10 |
# Status : Unknown, Use with caution! |
11 |
# |
12 |
# Copyright (c) 1996-1997, Ulrich Pfeifer |
13 |
# |
14 |
|
15 |
package WAIT::Document::Ora; |
16 |
@ISA = qw(WAIT::Document::Base); |
17 |
use WAIT::Document::Base; |
18 |
|
19 |
use IO::File; |
20 |
use Encode; |
21 |
use strict; |
22 |
use Carp; |
23 |
|
24 |
sub TIEHASH { |
25 |
my $type = shift; |
26 |
my $dir = shift; |
27 |
my @files; |
28 |
|
29 |
opendir(DIR, $dir) or croak "Could not open '$dir': $!"; |
30 |
DIRENT: for my $entry (readdir DIR) { |
31 |
# next unless $entry =~ /nt/; |
32 |
if (-f "$dir/$entry/desc.html") { |
33 |
my $index = "$dir/$entry/index.html"; |
34 |
open F, $index or Carp::confess("Could not open $index: $!"); |
35 |
local $/; |
36 |
my $content = <F>; |
37 |
next DIRENT unless $content =~ m|<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\s+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\s+<html xmlns="http://www.w3.org/1999/xhtml" lang="en-US" xml:lang="en-US">\s+<head>\s+<!-- product id:|s; |
38 |
push @files, $entry; |
39 |
} |
40 |
} |
41 |
closedir DIR; |
42 |
my $self = { |
43 |
Dir => $dir, |
44 |
Files => \@files |
45 |
}; |
46 |
bless $self, ref($type) || $type; |
47 |
} |
48 |
|
49 |
sub FETCH { |
50 |
my $self = shift; |
51 |
my $file = shift; |
52 |
|
53 |
local($/) = undef; |
54 |
|
55 |
my $fh = IO::File->new(join('/',$self->{Dir},$file,'desc.html')); |
56 |
my $desc = conv_getline($fh); |
57 |
$fh = IO::File->new(join('/',$self->{Dir},$file,'author.html')); |
58 |
my $author = conv_getline($fh) if $fh; |
59 |
$fh = IO::File->new(join('/',$self->{Dir},$file,'index.html')); |
60 |
my $index = conv_getline($fh) if $fh; |
61 |
$fh = IO::File->new(join('/',$self->{Dir},$file,'colophon.html')); |
62 |
my $colophon = conv_getline($fh) if $fh; |
63 |
return { |
64 |
desc => $desc, |
65 |
author => $author, |
66 |
index => $index, |
67 |
colophon => $colophon, |
68 |
}; |
69 |
} |
70 |
|
71 |
# WAIT::Document::Ora::conv_getline |
72 |
sub conv_getline ($) { |
73 |
my($fh) = shift; |
74 |
local $/ = "\n"; |
75 |
my $firstline = <$fh>; |
76 |
my $src_enc; |
77 |
# \042 is double quote, \047 is single quote. I avoid single quotes |
78 |
# here just for easier copy and paste to the terminal (I need to |
79 |
# debug here frequently) |
80 |
if ($firstline =~ /<\?xml[^>]+encoding\s*=([\042\047])([\w\-]+)\1/) { |
81 |
$src_enc = $2; |
82 |
} else { |
83 |
$src_enc = "ISO-8859-1"; |
84 |
} |
85 |
seek $fh, 0, 0; |
86 |
undef $/; |
87 |
my $content = <$fh>; |
88 |
$content =~ s/\s+/ /gs; # eliminate TABs and CRs for easier debugging |
89 |
my $dcontent = Encode::decode($src_enc,$content); |
90 |
$dcontent; |
91 |
} |
92 |
|
93 |
sub FIRSTKEY { |
94 |
my $self = shift; |
95 |
$self->{fno} = 0; |
96 |
$self->NEXTKEY; |
97 |
} |
98 |
|
99 |
sub NEXTKEY { |
100 |
my $self = shift; |
101 |
return undef if ($self->{fno}++ > @{$self->{Files}}); |
102 |
$self->{Files}->[$self->{fno}-1]; |
103 |
} |
104 |
|
105 |
sub close { |
106 |
my $self = shift; |
107 |
|
108 |
delete $self->{fno}; |
109 |
delete $self->{Files}; # no need at query time |
110 |
} |
111 |
|
112 |
1; |