9 |
$pre |= ''; |
$pre |= ''; |
10 |
$post |= ''; |
$post |= ''; |
11 |
my $isbn; |
my $isbn; |
12 |
if ($href =~ m/xmlid=([^&]+)&/) { |
if ($href =~ m/xmlid=([^&]+)&/i) { |
13 |
$isbn = $1; |
$isbn = $1; |
14 |
} elsif ($href =~ m/xmlid=([^&]+)$/i) { |
} elsif ($href =~ m/xmlid=([^&]+)$/i) { |
15 |
$isbn = $1; |
$isbn = $1; |
29 |
$mode .= "_"; |
$mode .= "_"; |
30 |
|
|
31 |
my $view; |
my $view; |
32 |
if ($href =~ m/view=([^&]+)&/) { |
if ($isbn =~ m/_index$/ && $href =~ m/view=([^&]+)&/) { |
33 |
$isbn .= "_".$1; |
$isbn .= "_".$1; |
34 |
} |
} |
35 |
|
|
39 |
$isbn .= ".html"; |
$isbn .= ".html"; |
40 |
|
|
41 |
# anchor |
# anchor |
42 |
if ($href =~ m/(#.+)$/) { |
if ($href =~ m/(#[^&]+)/) { |
43 |
$isbn .= $1; |
$isbn .= $1; |
44 |
} |
} |
45 |
|
|
61 |
$html =~ s!(<title>)O'Reilly Network Safari Bookshelf\s+-\s+!$1!gsi || die "$infile: title"; |
$html =~ s!(<title>)O'Reilly Network Safari Bookshelf\s+-\s+!$1!gsi || die "$infile: title"; |
62 |
|
|
63 |
$html =~ s!<body leftmargin="0" topmargin="0" marginwidth="0" marginheight="0">!<body leftmargin="10" topmargin="10" marginwidth="10" marginheight="10">!s || die "$infile: margins"; |
$html =~ s!<body leftmargin="0" topmargin="0" marginwidth="0" marginheight="0">!<body leftmargin="10" topmargin="10" marginwidth="10" marginheight="10">!s || die "$infile: margins"; |
64 |
$html =~ s;<a name="toppage">.*<!--Copyright.*?-->;;s || die "$infile: surround layout"; |
$html =~ s;<a name="toppage">.*<!--Copyright.*?-->;<a name="toppage"></a>;s || die "$infile: surround layout"; |
65 |
|
|
66 |
$html =~ s!<td valign="top" class="v2">.*?(<td valign="top" class="v2" align="right">)!$1!s || die "$infile: top buttons"; |
$html =~ s!<td valign="top" class="v2">.*?(<td valign="top" class="v2" align="right">)!$1!si || warn "$infile: top buttons"; |
67 |
$html =~ s!<td valign="top" class="v2"><a target="_new".*?(<td valign="top" class="v2" align="right">)!$1!s || warn "bottom buttons"; |
$html =~ s!<td valign="top" class="v2"><a target="_new".*?(<td valign="top" class="v2" align="right">)!$1!si || warn "bottom buttons"; |
68 |
|
|
69 |
$html =~ s!<p><b>URL</b>.*$!</body></html>!s || die "$infile: footer"; |
$html =~ s!<p><b>URL</b>.*$!</body></html>!si || die "$infile: footer"; |
70 |
|
|
71 |
$html =~ s;<!--.+?-->;;gs; |
$html =~ s;<!--.+?-->;;gs; |
72 |
|
|
74 |
|
|
75 |
$html =~ s!<a target="_new"[^>]*href="http://[^>]+>(.+?)</a>!$1!gs; |
$html =~ s!<a target="_new"[^>]*href="http://[^>]+>(.+?)</a>!$1!gs; |
76 |
|
|
77 |
|
$html =~ s!<img[^>]+Buy Print Version[^>]+>!!gs; |
78 |
|
$html =~ s!<a[^>]+onclick="OpenWin[^>]+mode=downloadPDF[^>]+>\s*<img[^>]+Download this chapter[^>]+>\s*</a>!!gs; |
79 |
|
|
80 |
open(OUT,"> $outfile") || die "$outfile: $!"; |
open(OUT,"> $outfile") || die "$outfile: $!"; |
81 |
print "$outfile\n"; |
print "$outfile\n"; |
82 |
print OUT $html; |
print OUT $html; |