11 |
my $isbn; |
my $isbn; |
12 |
if ($href =~ m/xmlid=([^&]+)&/) { |
if ($href =~ m/xmlid=([^&]+)&/) { |
13 |
$isbn = $1; |
$isbn = $1; |
14 |
} elsif ($href =~ m/xmlid=([^&]+)$/) { |
} elsif ($href =~ m/xmlid=([^&]+)$/i) { |
15 |
$isbn = $1; |
$isbn = $1; |
16 |
} else { |
} else { |
17 |
print STDERR "skipping $href\n"; |
print STDERR "skipping $href\n"; |
58 |
} |
} |
59 |
close(IN); |
close(IN); |
60 |
|
|
61 |
$html =~ s,(<title>)O'Reilly Network Safari Bookshelf\s+-\s+,$1,gsi || die "$infile: title"; |
$html =~ s!(<title>)O'Reilly Network Safari Bookshelf\s+-\s+!$1!gsi || die "$infile: title"; |
62 |
|
|
63 |
$html =~ s,<body leftmargin="0" topmargin="0" marginwidth="0" marginheight="0">,<body leftmargin="10" topmargin="10" marginwidth="10" marginheight="10">,s || die "$infile: margins"; |
$html =~ s!<body leftmargin="0" topmargin="0" marginwidth="0" marginheight="0">!<body leftmargin="10" topmargin="10" marginwidth="10" marginheight="10">!s || die "$infile: margins"; |
64 |
$html =~ s,<a name="toppage">.*<!--Copyright.*?-->,,s || die "$infile: surround layout"; |
$html =~ s;<a name="toppage">.*<!--Copyright.*?-->;;s || die "$infile: surround layout"; |
65 |
|
|
66 |
$html =~ s,<td valign="top" class="v2">.*?(<td valign="top" class="v2" align="right">),$1,s || die "$infile: top buttons"; |
$html =~ s!<td valign="top" class="v2">.*?(<td valign="top" class="v2" align="right">)!$1!s || die "$infile: top buttons"; |
67 |
$html =~ s,<td valign="top" class="v2"><a target="_new".*?(<td valign="top" class="v2" align="right">),$1,s || warn "bottom buttons"; |
$html =~ s!<td valign="top" class="v2"><a target="_new".*?(<td valign="top" class="v2" align="right">)!$1!s || warn "bottom buttons"; |
68 |
|
|
69 |
$html =~ s,<p><b>URL</b>.*$,</body></html>,s || die "$infile: footer"; |
$html =~ s!<p><b>URL</b>.*$!</body></html>!s || die "$infile: footer"; |
70 |
|
|
71 |
$html =~ s,<!--.+?-->,,gs; |
$html =~ s;<!--.+?-->;;gs; |
72 |
|
|
73 |
$html =~ s!(<a\s+[^>]*href=")([^"]+)("[^>]*>)!xmlid2file("$2","$1","$3")!iegs || die "$infile: links"; |
$html =~ s!(<a\s+[^>]*href=")([^"]+)("[^>]*>)!xmlid2file("$2","$1","$3")!iegs || die "$infile: links"; |
74 |
|
|
78 |
print "$outfile\n"; |
print "$outfile\n"; |
79 |
print OUT $html; |
print OUT $html; |
80 |
close(OUT); |
close(OUT); |
81 |
|
|
82 |
|
|
83 |
|
# fix timestamp |
84 |
|
# atime = 8, ctime = 9 |
85 |
|
my @s = stat($infile) || die "stat $infile: $!"; |
86 |
|
utime $s[8],$s[9], $outfile || die "touch $outfile: $1"; |