1 |
dpavlin |
1.1 |
#!/usr/bin/perl -w |
2 |
|
|
use strict; |
3 |
|
|
$|++; |
4 |
|
|
|
5 |
|
|
# Mirror CPAN latest archive. Based on article "Mirroring your own mini-CPAN" |
6 |
|
|
# by Randal L. Schwartz for Linux Magazine Column 42 (Nov 2002) available on |
7 |
|
|
# http://www.stonehenge.com/merlyn/LinuxMag/col42.html |
8 |
|
|
# http://www.stonehenge.com/merlyn/LinuxMag/col42.listing.txt |
9 |
|
|
|
10 |
|
|
# TODO: |
11 |
|
|
# - support for ZIP archives (and fix .tar.gz cludges) |
12 |
|
|
# - add version requirement for Archive::Tar (0.22 has a bug which |
13 |
|
|
# prevents it to extract some tars) |
14 |
|
|
|
15 |
|
|
### CONFIG |
16 |
|
|
|
17 |
|
|
#my $REMOTE = "http://ftp.linux.hr/CPAN/"; |
18 |
dpavlin |
1.3 |
#my $REMOTE = "http://www.cpan.org/"; |
19 |
|
|
my $REMOTE = "http://cpan.pliva.hr/"; |
20 |
dpavlin |
1.1 |
# my $REMOTE = "http://fi.cpan.org/"; |
21 |
|
|
# my $REMOTE = "http://au.cpan.org/"; |
22 |
|
|
# my $REMOTE = "file://Users/merlyn/MIRROR/CPAN/"; |
23 |
|
|
|
24 |
|
|
## warning: unknown files below this dir are deleted! |
25 |
dpavlin |
1.2 |
#my $LOCAL = "/mirrors/cpan/CPAN/"; |
26 |
|
|
my $LOCAL = "/rest/cpan/CPAN/"; |
27 |
dpavlin |
1.1 |
|
28 |
|
|
my $TRACE = 0; |
29 |
dpavlin |
1.3 |
$TRACE = 1 if grep(/-v/,@ARGV); |
30 |
|
|
$TRACE = 1 if grep(/-d/,@ARGV); |
31 |
dpavlin |
1.1 |
|
32 |
|
|
### END CONFIG |
33 |
|
|
|
34 |
|
|
## core - |
35 |
|
|
use File::Path qw(mkpath); |
36 |
|
|
use File::Basename qw(dirname); |
37 |
|
|
use File::Spec::Functions qw(catfile devnull); |
38 |
|
|
use File::Find qw(find); |
39 |
|
|
|
40 |
|
|
## LWP - |
41 |
|
|
use URI (); |
42 |
|
|
use LWP::Simple qw(mirror RC_OK RC_NOT_MODIFIED); |
43 |
|
|
|
44 |
|
|
## Compress::Zlib - |
45 |
|
|
use Compress::Zlib qw(gzopen $gzerrno); |
46 |
|
|
|
47 |
|
|
## Archive::Tar - |
48 |
|
|
use Archive::Tar qw(); |
49 |
|
|
|
50 |
|
|
## first, get index files |
51 |
|
|
my_mirror($_) for qw( |
52 |
|
|
authors/01mailrc.txt.gz |
53 |
|
|
modules/02packages.details.txt.gz |
54 |
|
|
modules/03modlist.data.gz |
55 |
|
|
); |
56 |
|
|
|
57 |
|
|
## now walk the packages list |
58 |
|
|
my $details = catfile($LOCAL, qw(modules 02packages.details.txt.gz)); |
59 |
|
|
my $gz = gzopen($details, "rb") or die "Cannot open details: $gzerrno"; |
60 |
|
|
my $inheader = 1; |
61 |
|
|
while ($gz->gzreadline($_) > 0) { |
62 |
|
|
if ($inheader) { |
63 |
|
|
$inheader = 0 unless /\S/; |
64 |
|
|
next; |
65 |
|
|
} |
66 |
|
|
|
67 |
|
|
my ($module, $version, $path) = split; |
68 |
|
|
next if $path =~ m{/perl-5}; # skip Perl distributions |
69 |
|
|
my_mirror("authors/id/$path", 1); |
70 |
|
|
} |
71 |
|
|
|
72 |
|
|
## finally, clean the files we didn't stick there |
73 |
|
|
clean_unmirrored(); |
74 |
|
|
|
75 |
|
|
exit 0; |
76 |
|
|
|
77 |
|
|
BEGIN { |
78 |
|
|
## %mirrored tracks the already done, keyed by filename |
79 |
|
|
## 1 = local-checked, 2 = remote-mirrored |
80 |
|
|
my %mirrored; |
81 |
|
|
|
82 |
|
|
sub my_mirror { |
83 |
|
|
my $path = shift; # partial URL |
84 |
|
|
my $skip_if_present = shift; # true/false |
85 |
|
|
|
86 |
|
|
my $remote_uri = URI->new_abs($path, $REMOTE)->as_string; # full URL |
87 |
|
|
my $local_file = catfile($LOCAL, split "/", $path); # native absolute file |
88 |
|
|
my $checksum_might_be_up_to_date = 1; |
89 |
|
|
|
90 |
|
|
if ($skip_if_present and -f $local_file) { |
91 |
|
|
## upgrade to checked if not already |
92 |
|
|
$mirrored{$local_file} = 1 unless $mirrored{$local_file}; |
93 |
|
|
} elsif (($mirrored{$local_file} || 0) < 2) { |
94 |
|
|
## upgrade to full mirror |
95 |
|
|
$mirrored{$local_file} = 2; |
96 |
|
|
|
97 |
|
|
mkpath(dirname($local_file), $TRACE, 0711); |
98 |
|
|
print $path if $TRACE; |
99 |
|
|
my $status = mirror($remote_uri, $local_file); |
100 |
|
|
|
101 |
|
|
if ($status == RC_OK) { |
102 |
|
|
$checksum_might_be_up_to_date = 0; |
103 |
|
|
print " ... updated\n" if $TRACE; |
104 |
|
|
} elsif ($status != RC_NOT_MODIFIED) { |
105 |
|
|
warn "\n$remote_uri: $status\n"; |
106 |
|
|
return; |
107 |
|
|
} else { |
108 |
|
|
print " ... up to date\n" if $TRACE; |
109 |
|
|
} |
110 |
|
|
} |
111 |
|
|
|
112 |
|
|
if ($path =~ m{^authors/id}) { # maybe fetch CHECKSUMS |
113 |
|
|
my $checksum_path = |
114 |
|
|
URI->new_abs("CHECKSUMS", $remote_uri)->rel($REMOTE); |
115 |
|
|
if ($path ne $checksum_path) { |
116 |
|
|
my_mirror($checksum_path, $checksum_might_be_up_to_date); |
117 |
|
|
} |
118 |
|
|
} |
119 |
|
|
} |
120 |
|
|
|
121 |
|
|
sub clean_unmirrored { |
122 |
|
|
find sub { |
123 |
|
|
return if /\.readme$/; # don't erase readme files |
124 |
|
|
check_readme($File::Find::name); |
125 |
|
|
return unless -f and not $mirrored{$File::Find::name}; |
126 |
|
|
print "$File::Find::name ... removed\n" if $TRACE; |
127 |
|
|
unlink $_ or warn "Cannot remove $File::Find::name: $!"; |
128 |
|
|
my $path = $File::Find::name; |
129 |
dpavlin |
1.2 |
if ($path =~ s/(\.tar\.gz|\.tgz)/.readme/g && -f $path) { |
130 |
dpavlin |
1.1 |
# only if we erase archive also! |
131 |
|
|
unlink $path or warn "Cannot remove $path: $!"; |
132 |
|
|
} |
133 |
|
|
}, $LOCAL; |
134 |
|
|
} |
135 |
|
|
|
136 |
|
|
sub check_readme { |
137 |
|
|
|
138 |
|
|
my $path = shift; |
139 |
|
|
# fixup some things |
140 |
|
|
my $readme_path = $path; |
141 |
dpavlin |
1.2 |
$readme_path =~ s/\.(tar\.gz|\.tgz)/.readme/g || return; # just .tar.gz is supported! |
142 |
dpavlin |
1.1 |
|
143 |
|
|
my $at = Archive::Tar->new($path) or die "Archive::Tar failed on $path\n"; |
144 |
|
|
|
145 |
|
|
if (! -f $readme_path) { |
146 |
|
|
# create readme file |
147 |
|
|
my @readmes = sort grep m{^[^/]+/README\z}, $at->list_files(); |
148 |
|
|
my $readme; |
149 |
|
|
|
150 |
|
|
if ($readme = shift @readmes) { |
151 |
|
|
open(R, "> $readme_path") || die "Cannot create $readme_path: $!"; |
152 |
|
|
print R $at->get_content($readme); |
153 |
|
|
close(R); |
154 |
|
|
|
155 |
|
|
print "$readme_path ... created\n" if $TRACE; |
156 |
|
|
|
157 |
|
|
} else { |
158 |
|
|
|
159 |
|
|
$readme_path =~ s/^.+\/(.+)$/$1/; |
160 |
|
|
print "can't find readme for $readme_path ...\n" if $TRACE; |
161 |
|
|
|
162 |
|
|
} |
163 |
|
|
|
164 |
|
|
} |
165 |
|
|
} |
166 |
|
|
|
167 |
|
|
|
168 |
|
|
} |