/[perl]/mirror_cpan.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /mirror_cpan.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (show annotations)
Tue Jan 20 19:48:05 2004 UTC (20 years, 2 months ago) by dpavlin
Branch: MAIN
Changes since 1.4: +12 -7 lines
File MIME type: text/plain
create gziped readme files (to save disk space)

1 #!/usr/bin/perl -w
2 use strict;
3 $|++;
4
5 # Mirror CPAN latest archive. Based on article "Mirroring your own mini-CPAN"
6 # by Randal L. Schwartz for Linux Magazine Column 42 (Nov 2002) available on
7 # http://www.stonehenge.com/merlyn/LinuxMag/col42.html
8 # http://www.stonehenge.com/merlyn/LinuxMag/col42.listing.txt
9
10 # TODO:
11 # - support for ZIP archives (and fix .tar.gz cludges)
12 # - add version requirement for Archive::Tar (0.22 has a bug which
13 # prevents it to extract some tars)
14
15 ### CONFIG
16
17 #my $REMOTE = "http://ftp.linux.hr/CPAN/";
18 #my $REMOTE = "http://www.cpan.org/";
19 my $REMOTE = "http://cpan.pliva.hr/";
20 # my $REMOTE = "http://fi.cpan.org/";
21 # my $REMOTE = "http://au.cpan.org/";
22 # my $REMOTE = "file://Users/merlyn/MIRROR/CPAN/";
23
24 ## warning: unknown files below this dir are deleted!
25 #my $LOCAL = "/mirrors/cpan/CPAN/";
26 my $LOCAL = "/rest/cpan/CPAN/";
27
28 my $TRACE = 0;
29
30 ### END CONFIG
31
32 ## core -
33 use File::Path qw(mkpath);
34 use File::Basename qw(dirname);
35 use File::Spec::Functions qw(catfile devnull);
36 use File::Find qw(find);
37 use Getopt::Long;
38 use IO::Zlib;
39
40 ## LWP -
41 use URI ();
42 use LWP::Simple qw(mirror RC_OK RC_NOT_MODIFIED);
43
44 ## Compress::Zlib -
45 use Compress::Zlib qw(gzopen $gzerrno);
46
47 ## Archive::Tar -
48 use Archive::Tar qw();
49
50 ## process command-line arguments
51 my $result = GetOptions(
52 "local=s" => \$LOCAL,
53 "remote=s" => \$REMOTE,
54 "verbose!" => \$TRACE,
55 "debug!" => \$TRACE
56 );
57
58 print "local path: $LOCAL\nremote URI: $REMOTE\n" if ($TRACE);
59
60 ## first, get index files
61 my_mirror($_) for qw(
62 authors/01mailrc.txt.gz
63 modules/02packages.details.txt.gz
64 modules/03modlist.data.gz
65 );
66
67 ## now walk the packages list
68 my $details = catfile($LOCAL, qw(modules 02packages.details.txt.gz));
69 my $gz = gzopen($details, "rb") or die "Cannot open details: $gzerrno";
70 my $inheader = 1;
71 while ($gz->gzreadline($_) > 0) {
72 if ($inheader) {
73 $inheader = 0 unless /\S/;
74 next;
75 }
76
77 my ($module, $version, $path) = split;
78 next if $path =~ m{/perl-5}; # skip Perl distributions
79 my_mirror("authors/id/$path", 1);
80 }
81
82 ## finally, clean the files we didn't stick there
83 clean_unmirrored();
84
85 exit 0;
86
87 BEGIN {
88 ## %mirrored tracks the already done, keyed by filename
89 ## 1 = local-checked, 2 = remote-mirrored
90 my %mirrored;
91
92 sub my_mirror {
93 my $path = shift; # partial URL
94 my $skip_if_present = shift; # true/false
95
96 my $remote_uri = URI->new_abs($path, $REMOTE)->as_string; # full URL
97 my $local_file = catfile($LOCAL, split "/", $path); # native absolute file
98 my $checksum_might_be_up_to_date = 1;
99
100 if ($skip_if_present and -f $local_file) {
101 ## upgrade to checked if not already
102 $mirrored{$local_file} = 1 unless $mirrored{$local_file};
103 } elsif (($mirrored{$local_file} || 0) < 2) {
104 ## upgrade to full mirror
105 $mirrored{$local_file} = 2;
106
107 mkpath(dirname($local_file), $TRACE, 0711);
108 print $path if $TRACE;
109 my $status = mirror($remote_uri, $local_file);
110
111 if ($status == RC_OK) {
112 $checksum_might_be_up_to_date = 0;
113 print " ... updated\n" if $TRACE;
114 } elsif ($status != RC_NOT_MODIFIED) {
115 warn "\n$remote_uri: $status\n";
116 return;
117 } else {
118 print " ... up to date\n" if $TRACE;
119 }
120 }
121
122 if ($path =~ m{^authors/id}) { # maybe fetch CHECKSUMS
123 my $checksum_path =
124 URI->new_abs("CHECKSUMS", $remote_uri)->rel($REMOTE);
125 if ($path ne $checksum_path) {
126 my_mirror($checksum_path, $checksum_might_be_up_to_date);
127 }
128 }
129 }
130
131 sub clean_unmirrored {
132 find sub {
133 return if /\.readme.gz$/; # don't erase readme files
134 check_readme($File::Find::name);
135 return unless -f and not $mirrored{$File::Find::name};
136 print "$File::Find::name ... removed\n" if $TRACE;
137 unlink $_ or warn "Cannot remove $File::Find::name: $!";
138 my $path = $File::Find::name;
139 if ($path =~ s/(\.tar\.gz|\.tgz)/.readme.gz/g && -f $path) {
140 # only if we erase archive also!
141 unlink $path or warn "Cannot remove $path: $!";
142 }
143 }, $LOCAL;
144 }
145
146 sub check_readme {
147
148 my $path = shift;
149 # fixup some things
150 my $readme_path = $path;
151 $readme_path =~ s/\.(tar\.gz|\.tgz)/.readme.gz/g || return; # just .tar.gz is supported!
152
153 my $at = Archive::Tar->new($path) or die "Archive::Tar failed on $path\n";
154
155 if (! -f $readme_path) {
156 # create readme file
157 my @readmes = sort grep m{^[^/]+/README\z}i, $at->list_files();
158 my $readme;
159
160 if ($readme = shift @readmes) {
161 my $fh = IO::Zlib->new($readme_path, "wb");
162 if (defined $fh) {
163 print $fh $at->get_content($readme);
164 $fh->close;
165 } else {
166 die "Cannot create $readme_path: $!";
167 }
168
169 print "$readme_path ... created\n" if $TRACE;
170
171 } else {
172
173 $readme_path =~ s/^.+\/(.+)$/$1/;
174 print "can't find readme for $readme_path ...\n" if $TRACE;
175
176 }
177
178 }
179 }
180
181
182 }

  ViewVC Help
Powered by ViewVC 1.1.26