/[safari]/filter.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /filter.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1.1.1 - (hide annotations) (vendor branch)
Sun Dec 14 19:11:30 2003 UTC (20 years, 4 months ago) by dpavlin
Branch: foo
CVS Tags: bar0
Changes since 1.1: +0 -0 lines
File MIME type: text/plain
initial import (working? :-)

1 dpavlin 1.1 #!/usr/bin/perl -w
2    
3     use strict;
4     my $infile = shift @ARGV || die "$0 [filename]";
5    
6     my $outfile;
7     sub xmlid2file {
8     my ($href,$pre,$post) = @_;
9     $pre |= '';
10     $post |= '';
11     my $isbn;
12     if ($href =~ m/xmlid=([^&]+)&/) {
13     $isbn = $1;
14     } elsif ($href =~ m/xmlid=([^&]+)$/) {
15     $isbn = $1;
16     } else {
17     print STDERR "skipping $href\n";
18     return $pre.$href.$post;
19     }
20     $isbn =~ s!%2f!_!gi;
21     $isbn =~ s!/!_!g;
22    
23     my $mode = "";
24     if ($href =~ m/mode=([^&]+)&/) {
25     $mode = $1
26     } else {
27     $mode = "section";
28     }
29     $mode .= "_";
30    
31     my $view;
32     if ($href =~ m/view=([^&]+)&/) {
33     $isbn .= "_".$1;
34     }
35    
36     # remove anchor from absolute URLs
37     $isbn =~ s/#.+$//;
38    
39     $isbn .= ".html";
40    
41     # anchor
42     if ($href =~ m/(#.+)$/) {
43     $isbn .= $1;
44     }
45    
46    
47     return $pre.$mode.$isbn.$post;
48     }
49    
50     $outfile = xmlid2file($infile);
51     my $html;
52    
53     open(IN,"$infile") || die "$infile: $!";
54     while(<IN>) {
55     chomp;
56     chomp;
57     $html .= $_."\n";
58     }
59     close(IN);
60    
61     $html =~ s,(<title>)O'Reilly Network Safari Bookshelf\s+-\s+,$1,gsi || die "$infile: title";
62    
63     $html =~ s,<body leftmargin="0" topmargin="0" marginwidth="0" marginheight="0">,<body leftmargin="10" topmargin="10" marginwidth="10" marginheight="10">,s || die "$infile: margins";
64     $html =~ s,<a name="toppage">.*<!--Copyright.*?-->,,s || die "$infile: surround layout";
65    
66     $html =~ s,<td valign="top" class="v2">.*?(<td valign="top" class="v2" align="right">),$1,s || die "$infile: top buttons";
67     $html =~ s,<td valign="top" class="v2"><a target="_new".*?(<td valign="top" class="v2" align="right">),$1,s || warn "bottom buttons";
68    
69     $html =~ s,<p><b>URL</b>.*$,</body></html>,s || die "$infile: footer";
70    
71     $html =~ s,<!--.+?-->,,gs;
72    
73     $html =~ s!(<a\s+[^>]*href=")([^"]+)("[^>]*>)!xmlid2file("$2","$1","$3")!iegs || die "$infile: links";
74    
75     $html =~ s!<a target="_new"[^>]*href="http://[^>]+>(.+?)</a>!$1!gs;
76    
77     open(OUT,"> $outfile") || die "$outfile: $!";
78     print "$outfile\n";
79     print OUT $html;
80     close(OUT);

  ViewVC Help
Powered by ViewVC 1.1.26