/[perl]/mirror_cpan.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /mirror_cpan.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (hide annotations)
Tue Jan 20 19:48:05 2004 UTC (20 years, 2 months ago) by dpavlin
Branch: MAIN
Changes since 1.4: +12 -7 lines
File MIME type: text/plain
create gziped readme files (to save disk space)

1 dpavlin 1.1 #!/usr/bin/perl -w
2     use strict;
3     $|++;
4    
5     # Mirror CPAN latest archive. Based on article "Mirroring your own mini-CPAN"
6     # by Randal L. Schwartz for Linux Magazine Column 42 (Nov 2002) available on
7     # http://www.stonehenge.com/merlyn/LinuxMag/col42.html
8     # http://www.stonehenge.com/merlyn/LinuxMag/col42.listing.txt
9    
10     # TODO:
11     # - support for ZIP archives (and fix .tar.gz cludges)
12     # - add version requirement for Archive::Tar (0.22 has a bug which
13     # prevents it to extract some tars)
14    
15     ### CONFIG
16    
17     #my $REMOTE = "http://ftp.linux.hr/CPAN/";
18 dpavlin 1.3 #my $REMOTE = "http://www.cpan.org/";
19     my $REMOTE = "http://cpan.pliva.hr/";
20 dpavlin 1.1 # my $REMOTE = "http://fi.cpan.org/";
21     # my $REMOTE = "http://au.cpan.org/";
22     # my $REMOTE = "file://Users/merlyn/MIRROR/CPAN/";
23    
24     ## warning: unknown files below this dir are deleted!
25 dpavlin 1.2 #my $LOCAL = "/mirrors/cpan/CPAN/";
26     my $LOCAL = "/rest/cpan/CPAN/";
27 dpavlin 1.1
28     my $TRACE = 0;
29    
30     ### END CONFIG
31    
32     ## core -
33     use File::Path qw(mkpath);
34     use File::Basename qw(dirname);
35     use File::Spec::Functions qw(catfile devnull);
36     use File::Find qw(find);
37 dpavlin 1.4 use Getopt::Long;
38 dpavlin 1.5 use IO::Zlib;
39 dpavlin 1.1
40     ## LWP -
41     use URI ();
42     use LWP::Simple qw(mirror RC_OK RC_NOT_MODIFIED);
43    
44     ## Compress::Zlib -
45     use Compress::Zlib qw(gzopen $gzerrno);
46    
47     ## Archive::Tar -
48     use Archive::Tar qw();
49 dpavlin 1.4
50     ## process command-line arguments
51     my $result = GetOptions(
52     "local=s" => \$LOCAL,
53     "remote=s" => \$REMOTE,
54     "verbose!" => \$TRACE,
55     "debug!" => \$TRACE
56     );
57    
58     print "local path: $LOCAL\nremote URI: $REMOTE\n" if ($TRACE);
59 dpavlin 1.1
60     ## first, get index files
61     my_mirror($_) for qw(
62     authors/01mailrc.txt.gz
63     modules/02packages.details.txt.gz
64     modules/03modlist.data.gz
65     );
66    
67     ## now walk the packages list
68     my $details = catfile($LOCAL, qw(modules 02packages.details.txt.gz));
69     my $gz = gzopen($details, "rb") or die "Cannot open details: $gzerrno";
70     my $inheader = 1;
71     while ($gz->gzreadline($_) > 0) {
72     if ($inheader) {
73     $inheader = 0 unless /\S/;
74     next;
75     }
76    
77     my ($module, $version, $path) = split;
78     next if $path =~ m{/perl-5}; # skip Perl distributions
79     my_mirror("authors/id/$path", 1);
80     }
81    
82     ## finally, clean the files we didn't stick there
83     clean_unmirrored();
84    
85     exit 0;
86    
87     BEGIN {
88     ## %mirrored tracks the already done, keyed by filename
89     ## 1 = local-checked, 2 = remote-mirrored
90     my %mirrored;
91    
92     sub my_mirror {
93     my $path = shift; # partial URL
94     my $skip_if_present = shift; # true/false
95    
96     my $remote_uri = URI->new_abs($path, $REMOTE)->as_string; # full URL
97     my $local_file = catfile($LOCAL, split "/", $path); # native absolute file
98     my $checksum_might_be_up_to_date = 1;
99    
100     if ($skip_if_present and -f $local_file) {
101     ## upgrade to checked if not already
102     $mirrored{$local_file} = 1 unless $mirrored{$local_file};
103     } elsif (($mirrored{$local_file} || 0) < 2) {
104     ## upgrade to full mirror
105     $mirrored{$local_file} = 2;
106    
107     mkpath(dirname($local_file), $TRACE, 0711);
108     print $path if $TRACE;
109     my $status = mirror($remote_uri, $local_file);
110    
111     if ($status == RC_OK) {
112     $checksum_might_be_up_to_date = 0;
113     print " ... updated\n" if $TRACE;
114     } elsif ($status != RC_NOT_MODIFIED) {
115     warn "\n$remote_uri: $status\n";
116     return;
117     } else {
118     print " ... up to date\n" if $TRACE;
119     }
120     }
121    
122     if ($path =~ m{^authors/id}) { # maybe fetch CHECKSUMS
123     my $checksum_path =
124     URI->new_abs("CHECKSUMS", $remote_uri)->rel($REMOTE);
125     if ($path ne $checksum_path) {
126     my_mirror($checksum_path, $checksum_might_be_up_to_date);
127     }
128     }
129     }
130    
131     sub clean_unmirrored {
132     find sub {
133 dpavlin 1.5 return if /\.readme.gz$/; # don't erase readme files
134 dpavlin 1.1 check_readme($File::Find::name);
135     return unless -f and not $mirrored{$File::Find::name};
136     print "$File::Find::name ... removed\n" if $TRACE;
137     unlink $_ or warn "Cannot remove $File::Find::name: $!";
138     my $path = $File::Find::name;
139 dpavlin 1.5 if ($path =~ s/(\.tar\.gz|\.tgz)/.readme.gz/g && -f $path) {
140 dpavlin 1.1 # only if we erase archive also!
141     unlink $path or warn "Cannot remove $path: $!";
142     }
143     }, $LOCAL;
144     }
145    
146     sub check_readme {
147    
148     my $path = shift;
149     # fixup some things
150     my $readme_path = $path;
151 dpavlin 1.5 $readme_path =~ s/\.(tar\.gz|\.tgz)/.readme.gz/g || return; # just .tar.gz is supported!
152 dpavlin 1.1
153     my $at = Archive::Tar->new($path) or die "Archive::Tar failed on $path\n";
154    
155     if (! -f $readme_path) {
156     # create readme file
157 dpavlin 1.5 my @readmes = sort grep m{^[^/]+/README\z}i, $at->list_files();
158 dpavlin 1.1 my $readme;
159    
160     if ($readme = shift @readmes) {
161 dpavlin 1.5 my $fh = IO::Zlib->new($readme_path, "wb");
162     if (defined $fh) {
163     print $fh $at->get_content($readme);
164     $fh->close;
165     } else {
166     die "Cannot create $readme_path: $!";
167     }
168 dpavlin 1.1
169     print "$readme_path ... created\n" if $TRACE;
170    
171     } else {
172    
173     $readme_path =~ s/^.+\/(.+)$/$1/;
174     print "can't find readme for $readme_path ...\n" if $TRACE;
175    
176     }
177    
178     }
179     }
180    
181    
182     }

  ViewVC Help
Powered by ViewVC 1.1.26