/[safari]/filter.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /filter.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (show annotations)
Tue Feb 1 14:34:55 2005 UTC (19 years, 1 month ago) by dpavlin
Branch: MAIN
CVS Tags: HEAD
Changes since 1.5: +3 -3 lines
File MIME type: text/plain
fixed header/footer

1 #!/usr/bin/perl -w
2
3 use strict;
4 my $infile = shift @ARGV || die "$0 [filename]";
5
6 my $outfile;
7 sub xmlid2file {
8 my ($href,$pre,$post) = @_;
9 $pre |= '';
10 $post |= '';
11 my $isbn;
12 if ($href =~ m/xmlid=([^&]+)&/i) {
13 $isbn = $1;
14 } elsif ($href =~ m/xmlid=([^&]+)$/i) {
15 $isbn = $1;
16 } else {
17 print STDERR "skipping $href\n";
18 return $pre.$href.$post;
19 }
20 $isbn =~ s!%2f!_!gi;
21 $isbn =~ s!/!_!g;
22
23 my $mode = "";
24 if ($href =~ m/mode=([^&]+)&/) {
25 $mode = $1
26 } else {
27 $mode = "section";
28 }
29 $mode .= "_";
30
31 my $view;
32 if ($isbn =~ m/_index$/ && $href =~ m/view=([^&]+)&/) {
33 $isbn .= "_".$1;
34 }
35
36 # remove anchor from absolute URLs
37 $isbn =~ s/#.+$//;
38
39 $isbn .= ".html";
40
41 # anchor
42 if ($href =~ m/(#[^&]+)/) {
43 $isbn .= $1;
44 }
45
46
47 return $pre.$mode.$isbn.$post;
48 }
49
50 $outfile = xmlid2file($infile);
51 my $html;
52
53 open(IN,"$infile") || die "$infile: $!";
54 while(<IN>) {
55 chomp;
56 chomp;
57 $html .= $_."\n";
58 }
59 close(IN);
60
61 $html =~ s!(<title>)O'Reilly Network Safari Bookshelf\s+-\s+!$1!gsi || die "$infile: title";
62
63 $html =~ s!<body leftmargin="0" topmargin="0" marginwidth="0" marginheight="0">!<body leftmargin="10" topmargin="10" marginwidth="10" marginheight="10">!s || die "$infile: margins";
64 $html =~ s;<a name="toppage">.*<!--Copyright.*?-->;<a name="toppage"></a>;s || die "$infile: surround layout";
65
66 $html =~ s!<td valign="top" class="v2">.*?(<td valign="top" class="v2" align="right">)!$1!si || warn "$infile: top buttons";
67 $html =~ s!<td valign="top" class="v2"><a target="_new".*?(<td valign="top" class="v2" align="right">)!$1!si || warn "bottom buttons";
68
69 $html =~ s!<p><b>URL</b>.*$!</body></html>!si || die "$infile: footer";
70
71 $html =~ s;<!--.+?-->;;gs;
72
73 $html =~ s!(<a\s+[^>]*href=")([^"]+)("[^>]*>)!xmlid2file("$2","$1","$3")!iegs || die "$infile: links";
74
75 $html =~ s!<a target="_new"[^>]*href="http://[^>]+>(.+?)</a>!$1!gs;
76
77 $html =~ s!<img[^>]+Buy Print Version[^>]+>!!gs;
78 $html =~ s!<a[^>]+onclick="OpenWin[^>]+mode=downloadPDF[^>]+>\s*<img[^>]+Download this chapter[^>]+>\s*</a>!!gs;
79
80 open(OUT,"> $outfile") || die "$outfile: $!";
81 print "$outfile\n";
82 print OUT $html;
83 close(OUT);
84
85
86 # fix timestamp
87 # atime = 8, ctime = 9
88 my @s = stat($infile) || die "stat $infile: $!";
89 utime $s[8],$s[9], $outfile || die "touch $outfile: $1";

  ViewVC Help
Powered by ViewVC 1.1.26