/[perl]/mirror_cpan.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /mirror_cpan.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (hide annotations)
Wed Aug 6 17:55:19 2003 UTC (20 years, 8 months ago) by dpavlin
Branch: MAIN
Changes since 1.2: +4 -1 lines
File MIME type: text/plain
support -v and -d switch

1 dpavlin 1.1 #!/usr/bin/perl -w
2     use strict;
3     $|++;
4    
5     # Mirror CPAN latest archive. Based on article "Mirroring your own mini-CPAN"
6     # by Randal L. Schwartz for Linux Magazine Column 42 (Nov 2002) available on
7     # http://www.stonehenge.com/merlyn/LinuxMag/col42.html
8     # http://www.stonehenge.com/merlyn/LinuxMag/col42.listing.txt
9    
10     # TODO:
11     # - support for ZIP archives (and fix .tar.gz cludges)
12     # - add version requirement for Archive::Tar (0.22 has a bug which
13     # prevents it to extract some tars)
14    
15     ### CONFIG
16    
17     #my $REMOTE = "http://ftp.linux.hr/CPAN/";
18 dpavlin 1.3 #my $REMOTE = "http://www.cpan.org/";
19     my $REMOTE = "http://cpan.pliva.hr/";
20 dpavlin 1.1 # my $REMOTE = "http://fi.cpan.org/";
21     # my $REMOTE = "http://au.cpan.org/";
22     # my $REMOTE = "file://Users/merlyn/MIRROR/CPAN/";
23    
24     ## warning: unknown files below this dir are deleted!
25 dpavlin 1.2 #my $LOCAL = "/mirrors/cpan/CPAN/";
26     my $LOCAL = "/rest/cpan/CPAN/";
27 dpavlin 1.1
28     my $TRACE = 0;
29 dpavlin 1.3 $TRACE = 1 if grep(/-v/,@ARGV);
30     $TRACE = 1 if grep(/-d/,@ARGV);
31 dpavlin 1.1
32     ### END CONFIG
33    
34     ## core -
35     use File::Path qw(mkpath);
36     use File::Basename qw(dirname);
37     use File::Spec::Functions qw(catfile devnull);
38     use File::Find qw(find);
39    
40     ## LWP -
41     use URI ();
42     use LWP::Simple qw(mirror RC_OK RC_NOT_MODIFIED);
43    
44     ## Compress::Zlib -
45     use Compress::Zlib qw(gzopen $gzerrno);
46    
47     ## Archive::Tar -
48     use Archive::Tar qw();
49    
50     ## first, get index files
51     my_mirror($_) for qw(
52     authors/01mailrc.txt.gz
53     modules/02packages.details.txt.gz
54     modules/03modlist.data.gz
55     );
56    
57     ## now walk the packages list
58     my $details = catfile($LOCAL, qw(modules 02packages.details.txt.gz));
59     my $gz = gzopen($details, "rb") or die "Cannot open details: $gzerrno";
60     my $inheader = 1;
61     while ($gz->gzreadline($_) > 0) {
62     if ($inheader) {
63     $inheader = 0 unless /\S/;
64     next;
65     }
66    
67     my ($module, $version, $path) = split;
68     next if $path =~ m{/perl-5}; # skip Perl distributions
69     my_mirror("authors/id/$path", 1);
70     }
71    
72     ## finally, clean the files we didn't stick there
73     clean_unmirrored();
74    
75     exit 0;
76    
77     BEGIN {
78     ## %mirrored tracks the already done, keyed by filename
79     ## 1 = local-checked, 2 = remote-mirrored
80     my %mirrored;
81    
82     sub my_mirror {
83     my $path = shift; # partial URL
84     my $skip_if_present = shift; # true/false
85    
86     my $remote_uri = URI->new_abs($path, $REMOTE)->as_string; # full URL
87     my $local_file = catfile($LOCAL, split "/", $path); # native absolute file
88     my $checksum_might_be_up_to_date = 1;
89    
90     if ($skip_if_present and -f $local_file) {
91     ## upgrade to checked if not already
92     $mirrored{$local_file} = 1 unless $mirrored{$local_file};
93     } elsif (($mirrored{$local_file} || 0) < 2) {
94     ## upgrade to full mirror
95     $mirrored{$local_file} = 2;
96    
97     mkpath(dirname($local_file), $TRACE, 0711);
98     print $path if $TRACE;
99     my $status = mirror($remote_uri, $local_file);
100    
101     if ($status == RC_OK) {
102     $checksum_might_be_up_to_date = 0;
103     print " ... updated\n" if $TRACE;
104     } elsif ($status != RC_NOT_MODIFIED) {
105     warn "\n$remote_uri: $status\n";
106     return;
107     } else {
108     print " ... up to date\n" if $TRACE;
109     }
110     }
111    
112     if ($path =~ m{^authors/id}) { # maybe fetch CHECKSUMS
113     my $checksum_path =
114     URI->new_abs("CHECKSUMS", $remote_uri)->rel($REMOTE);
115     if ($path ne $checksum_path) {
116     my_mirror($checksum_path, $checksum_might_be_up_to_date);
117     }
118     }
119     }
120    
121     sub clean_unmirrored {
122     find sub {
123     return if /\.readme$/; # don't erase readme files
124     check_readme($File::Find::name);
125     return unless -f and not $mirrored{$File::Find::name};
126     print "$File::Find::name ... removed\n" if $TRACE;
127     unlink $_ or warn "Cannot remove $File::Find::name: $!";
128     my $path = $File::Find::name;
129 dpavlin 1.2 if ($path =~ s/(\.tar\.gz|\.tgz)/.readme/g && -f $path) {
130 dpavlin 1.1 # only if we erase archive also!
131     unlink $path or warn "Cannot remove $path: $!";
132     }
133     }, $LOCAL;
134     }
135    
136     sub check_readme {
137    
138     my $path = shift;
139     # fixup some things
140     my $readme_path = $path;
141 dpavlin 1.2 $readme_path =~ s/\.(tar\.gz|\.tgz)/.readme/g || return; # just .tar.gz is supported!
142 dpavlin 1.1
143     my $at = Archive::Tar->new($path) or die "Archive::Tar failed on $path\n";
144    
145     if (! -f $readme_path) {
146     # create readme file
147     my @readmes = sort grep m{^[^/]+/README\z}, $at->list_files();
148     my $readme;
149    
150     if ($readme = shift @readmes) {
151     open(R, "> $readme_path") || die "Cannot create $readme_path: $!";
152     print R $at->get_content($readme);
153     close(R);
154    
155     print "$readme_path ... created\n" if $TRACE;
156    
157     } else {
158    
159     $readme_path =~ s/^.+\/(.+)$/$1/;
160     print "can't find readme for $readme_path ...\n" if $TRACE;
161    
162     }
163    
164     }
165     }
166    
167    
168     }

  ViewVC Help
Powered by ViewVC 1.1.26