/[safari]/filter.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /filter.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (show annotations)
Sun Dec 14 19:11:30 2003 UTC (20 years, 4 months ago) by dpavlin
Branch: MAIN
Branch point for: foo
File MIME type: text/plain
Initial revision

1 #!/usr/bin/perl -w
2
3 use strict;
4 my $infile = shift @ARGV || die "$0 [filename]";
5
6 my $outfile;
7 sub xmlid2file {
8 my ($href,$pre,$post) = @_;
9 $pre |= '';
10 $post |= '';
11 my $isbn;
12 if ($href =~ m/xmlid=([^&]+)&/) {
13 $isbn = $1;
14 } elsif ($href =~ m/xmlid=([^&]+)$/) {
15 $isbn = $1;
16 } else {
17 print STDERR "skipping $href\n";
18 return $pre.$href.$post;
19 }
20 $isbn =~ s!%2f!_!gi;
21 $isbn =~ s!/!_!g;
22
23 my $mode = "";
24 if ($href =~ m/mode=([^&]+)&/) {
25 $mode = $1
26 } else {
27 $mode = "section";
28 }
29 $mode .= "_";
30
31 my $view;
32 if ($href =~ m/view=([^&]+)&/) {
33 $isbn .= "_".$1;
34 }
35
36 # remove anchor from absolute URLs
37 $isbn =~ s/#.+$//;
38
39 $isbn .= ".html";
40
41 # anchor
42 if ($href =~ m/(#.+)$/) {
43 $isbn .= $1;
44 }
45
46
47 return $pre.$mode.$isbn.$post;
48 }
49
50 $outfile = xmlid2file($infile);
51 my $html;
52
53 open(IN,"$infile") || die "$infile: $!";
54 while(<IN>) {
55 chomp;
56 chomp;
57 $html .= $_."\n";
58 }
59 close(IN);
60
61 $html =~ s,(<title>)O'Reilly Network Safari Bookshelf\s+-\s+,$1,gsi || die "$infile: title";
62
63 $html =~ s,<body leftmargin="0" topmargin="0" marginwidth="0" marginheight="0">,<body leftmargin="10" topmargin="10" marginwidth="10" marginheight="10">,s || die "$infile: margins";
64 $html =~ s,<a name="toppage">.*<!--Copyright.*?-->,,s || die "$infile: surround layout";
65
66 $html =~ s,<td valign="top" class="v2">.*?(<td valign="top" class="v2" align="right">),$1,s || die "$infile: top buttons";
67 $html =~ s,<td valign="top" class="v2"><a target="_new".*?(<td valign="top" class="v2" align="right">),$1,s || warn "bottom buttons";
68
69 $html =~ s,<p><b>URL</b>.*$,</body></html>,s || die "$infile: footer";
70
71 $html =~ s,<!--.+?-->,,gs;
72
73 $html =~ s!(<a\s+[^>]*href=")([^"]+)("[^>]*>)!xmlid2file("$2","$1","$3")!iegs || die "$infile: links";
74
75 $html =~ s!<a target="_new"[^>]*href="http://[^>]+>(.+?)</a>!$1!gs;
76
77 open(OUT,"> $outfile") || die "$outfile: $!";
78 print "$outfile\n";
79 print OUT $html;
80 close(OUT);

  ViewVC Help
Powered by ViewVC 1.1.26