/[perl]/mirror_cpan.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /mirror_cpan.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (hide annotations)
Sat Jan 31 18:25:54 2004 UTC (20 years, 2 months ago) by dpavlin
Branch: MAIN
CVS Tags: HEAD
Changes since 1.5: +19 -4 lines
File MIME type: text/plain
optionally compress readme files

1 dpavlin 1.1 #!/usr/bin/perl -w
2     use strict;
3     $|++;
4    
5     # Mirror CPAN latest archive. Based on article "Mirroring your own mini-CPAN"
6     # by Randal L. Schwartz for Linux Magazine Column 42 (Nov 2002) available on
7     # http://www.stonehenge.com/merlyn/LinuxMag/col42.html
8     # http://www.stonehenge.com/merlyn/LinuxMag/col42.listing.txt
9    
10     # TODO:
11     # - support for ZIP archives (and fix .tar.gz cludges)
12     # - add version requirement for Archive::Tar (0.22 has a bug which
13     # prevents it to extract some tars)
14    
15     ### CONFIG
16    
17     #my $REMOTE = "http://ftp.linux.hr/CPAN/";
18 dpavlin 1.3 #my $REMOTE = "http://www.cpan.org/";
19     my $REMOTE = "http://cpan.pliva.hr/";
20 dpavlin 1.1 # my $REMOTE = "http://fi.cpan.org/";
21     # my $REMOTE = "http://au.cpan.org/";
22     # my $REMOTE = "file://Users/merlyn/MIRROR/CPAN/";
23    
24     ## warning: unknown files below this dir are deleted!
25 dpavlin 1.2 #my $LOCAL = "/mirrors/cpan/CPAN/";
26     my $LOCAL = "/rest/cpan/CPAN/";
27 dpavlin 1.1
28     my $TRACE = 0;
29    
30 dpavlin 1.6 # This may or may not save you some disks space (depending on filesystem
31     # that you use to store CPAN mirror)
32     #
33     # If you want to create gziped readme files, change $readme_ext to
34     # my $readme_ext = '.readme.gz';
35     #
36     # I found out that gziping readme files doesn't save any
37     # space at one server and saves space on my laptop. YMMV
38     my $readme_ext = '.readme.gz';
39    
40 dpavlin 1.1 ### END CONFIG
41    
42     ## core -
43     use File::Path qw(mkpath);
44     use File::Basename qw(dirname);
45     use File::Spec::Functions qw(catfile devnull);
46     use File::Find qw(find);
47 dpavlin 1.4 use Getopt::Long;
48 dpavlin 1.5 use IO::Zlib;
49 dpavlin 1.1
50     ## LWP -
51     use URI ();
52     use LWP::Simple qw(mirror RC_OK RC_NOT_MODIFIED);
53    
54     ## Compress::Zlib -
55     use Compress::Zlib qw(gzopen $gzerrno);
56    
57     ## Archive::Tar -
58     use Archive::Tar qw();
59 dpavlin 1.4
60     ## process command-line arguments
61     my $result = GetOptions(
62     "local=s" => \$LOCAL,
63     "remote=s" => \$REMOTE,
64     "verbose!" => \$TRACE,
65     "debug!" => \$TRACE
66     );
67    
68     print "local path: $LOCAL\nremote URI: $REMOTE\n" if ($TRACE);
69 dpavlin 1.1
70     ## first, get index files
71     my_mirror($_) for qw(
72     authors/01mailrc.txt.gz
73     modules/02packages.details.txt.gz
74     modules/03modlist.data.gz
75     );
76    
77     ## now walk the packages list
78     my $details = catfile($LOCAL, qw(modules 02packages.details.txt.gz));
79     my $gz = gzopen($details, "rb") or die "Cannot open details: $gzerrno";
80     my $inheader = 1;
81     while ($gz->gzreadline($_) > 0) {
82     if ($inheader) {
83     $inheader = 0 unless /\S/;
84     next;
85     }
86    
87     my ($module, $version, $path) = split;
88     next if $path =~ m{/perl-5}; # skip Perl distributions
89     my_mirror("authors/id/$path", 1);
90     }
91    
92     ## finally, clean the files we didn't stick there
93     clean_unmirrored();
94    
95     exit 0;
96    
97     BEGIN {
98     ## %mirrored tracks the already done, keyed by filename
99     ## 1 = local-checked, 2 = remote-mirrored
100     my %mirrored;
101    
102     sub my_mirror {
103     my $path = shift; # partial URL
104     my $skip_if_present = shift; # true/false
105    
106     my $remote_uri = URI->new_abs($path, $REMOTE)->as_string; # full URL
107     my $local_file = catfile($LOCAL, split "/", $path); # native absolute file
108     my $checksum_might_be_up_to_date = 1;
109    
110     if ($skip_if_present and -f $local_file) {
111     ## upgrade to checked if not already
112     $mirrored{$local_file} = 1 unless $mirrored{$local_file};
113     } elsif (($mirrored{$local_file} || 0) < 2) {
114     ## upgrade to full mirror
115     $mirrored{$local_file} = 2;
116    
117     mkpath(dirname($local_file), $TRACE, 0711);
118     print $path if $TRACE;
119     my $status = mirror($remote_uri, $local_file);
120    
121     if ($status == RC_OK) {
122     $checksum_might_be_up_to_date = 0;
123     print " ... updated\n" if $TRACE;
124     } elsif ($status != RC_NOT_MODIFIED) {
125     warn "\n$remote_uri: $status\n";
126     return;
127     } else {
128     print " ... up to date\n" if $TRACE;
129     }
130     }
131    
132     if ($path =~ m{^authors/id}) { # maybe fetch CHECKSUMS
133     my $checksum_path =
134     URI->new_abs("CHECKSUMS", $remote_uri)->rel($REMOTE);
135     if ($path ne $checksum_path) {
136     my_mirror($checksum_path, $checksum_might_be_up_to_date);
137     }
138     }
139     }
140    
141     sub clean_unmirrored {
142     find sub {
143 dpavlin 1.6 return if /${readme_ext}$/; # don't erase readme files
144 dpavlin 1.1 check_readme($File::Find::name);
145     return unless -f and not $mirrored{$File::Find::name};
146     print "$File::Find::name ... removed\n" if $TRACE;
147     unlink $_ or warn "Cannot remove $File::Find::name: $!";
148     my $path = $File::Find::name;
149 dpavlin 1.6 if ($path =~ s/(\.tar\.gz|\.tgz)/${readme_ext}/g && -f $path) {
150 dpavlin 1.1 # only if we erase archive also!
151     unlink $path or warn "Cannot remove $path: $!";
152     }
153     }, $LOCAL;
154     }
155    
156     sub check_readme {
157    
158     my $path = shift;
159     # fixup some things
160     my $readme_path = $path;
161 dpavlin 1.6 $readme_path =~ s/\.(tar\.gz|\.tgz)/${readme_ext}/g || return; # just .tar.gz is supported!
162 dpavlin 1.1
163     my $at = Archive::Tar->new($path) or die "Archive::Tar failed on $path\n";
164    
165     if (! -f $readme_path) {
166     # create readme file
167 dpavlin 1.5 my @readmes = sort grep m{^[^/]+/README\z}i, $at->list_files();
168 dpavlin 1.1 my $readme;
169    
170     if ($readme = shift @readmes) {
171 dpavlin 1.6 my $fh;
172     if ($readme_ext =~ m/\.gz/) {
173     $fh = IO::Zlib->new($readme_path, "wb");
174     } else {
175     $fh = IO::File->new($readme_path, "w");
176     }
177 dpavlin 1.5 if (defined $fh) {
178     print $fh $at->get_content($readme);
179     $fh->close;
180     } else {
181     die "Cannot create $readme_path: $!";
182     }
183 dpavlin 1.1
184     print "$readme_path ... created\n" if $TRACE;
185    
186     } else {
187    
188     $readme_path =~ s/^.+\/(.+)$/$1/;
189     print "can't find readme for $readme_path ...\n" if $TRACE;
190    
191     }
192    
193     }
194     }
195    
196    
197     }

  ViewVC Help
Powered by ViewVC 1.1.26