/[perl]/mirror_cpan.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /mirror_cpan.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (hide annotations)
Thu Sep 4 14:39:17 2003 UTC (20 years, 7 months ago) by dpavlin
Branch: MAIN
Changes since 1.3: +11 -2 lines
File MIME type: text/plain
added command-line argument

1 dpavlin 1.1 #!/usr/bin/perl -w
2     use strict;
3     $|++;
4    
5     # Mirror CPAN latest archive. Based on article "Mirroring your own mini-CPAN"
6     # by Randal L. Schwartz for Linux Magazine Column 42 (Nov 2002) available on
7     # http://www.stonehenge.com/merlyn/LinuxMag/col42.html
8     # http://www.stonehenge.com/merlyn/LinuxMag/col42.listing.txt
9    
10     # TODO:
11     # - support for ZIP archives (and fix .tar.gz cludges)
12     # - add version requirement for Archive::Tar (0.22 has a bug which
13     # prevents it to extract some tars)
14    
15     ### CONFIG
16    
17     #my $REMOTE = "http://ftp.linux.hr/CPAN/";
18 dpavlin 1.3 #my $REMOTE = "http://www.cpan.org/";
19     my $REMOTE = "http://cpan.pliva.hr/";
20 dpavlin 1.1 # my $REMOTE = "http://fi.cpan.org/";
21     # my $REMOTE = "http://au.cpan.org/";
22     # my $REMOTE = "file://Users/merlyn/MIRROR/CPAN/";
23    
24     ## warning: unknown files below this dir are deleted!
25 dpavlin 1.2 #my $LOCAL = "/mirrors/cpan/CPAN/";
26     my $LOCAL = "/rest/cpan/CPAN/";
27 dpavlin 1.1
28     my $TRACE = 0;
29    
30     ### END CONFIG
31    
32     ## core -
33     use File::Path qw(mkpath);
34     use File::Basename qw(dirname);
35     use File::Spec::Functions qw(catfile devnull);
36     use File::Find qw(find);
37 dpavlin 1.4 use Getopt::Long;
38 dpavlin 1.1
39     ## LWP -
40     use URI ();
41     use LWP::Simple qw(mirror RC_OK RC_NOT_MODIFIED);
42    
43     ## Compress::Zlib -
44     use Compress::Zlib qw(gzopen $gzerrno);
45    
46     ## Archive::Tar -
47     use Archive::Tar qw();
48 dpavlin 1.4
49     ## process command-line arguments
50     my $result = GetOptions(
51     "local=s" => \$LOCAL,
52     "remote=s" => \$REMOTE,
53     "verbose!" => \$TRACE,
54     "debug!" => \$TRACE
55     );
56    
57     print "local path: $LOCAL\nremote URI: $REMOTE\n" if ($TRACE);
58 dpavlin 1.1
59     ## first, get index files
60     my_mirror($_) for qw(
61     authors/01mailrc.txt.gz
62     modules/02packages.details.txt.gz
63     modules/03modlist.data.gz
64     );
65    
66     ## now walk the packages list
67     my $details = catfile($LOCAL, qw(modules 02packages.details.txt.gz));
68     my $gz = gzopen($details, "rb") or die "Cannot open details: $gzerrno";
69     my $inheader = 1;
70     while ($gz->gzreadline($_) > 0) {
71     if ($inheader) {
72     $inheader = 0 unless /\S/;
73     next;
74     }
75    
76     my ($module, $version, $path) = split;
77     next if $path =~ m{/perl-5}; # skip Perl distributions
78     my_mirror("authors/id/$path", 1);
79     }
80    
81     ## finally, clean the files we didn't stick there
82     clean_unmirrored();
83    
84     exit 0;
85    
86     BEGIN {
87     ## %mirrored tracks the already done, keyed by filename
88     ## 1 = local-checked, 2 = remote-mirrored
89     my %mirrored;
90    
91     sub my_mirror {
92     my $path = shift; # partial URL
93     my $skip_if_present = shift; # true/false
94    
95     my $remote_uri = URI->new_abs($path, $REMOTE)->as_string; # full URL
96     my $local_file = catfile($LOCAL, split "/", $path); # native absolute file
97     my $checksum_might_be_up_to_date = 1;
98    
99     if ($skip_if_present and -f $local_file) {
100     ## upgrade to checked if not already
101     $mirrored{$local_file} = 1 unless $mirrored{$local_file};
102     } elsif (($mirrored{$local_file} || 0) < 2) {
103     ## upgrade to full mirror
104     $mirrored{$local_file} = 2;
105    
106     mkpath(dirname($local_file), $TRACE, 0711);
107     print $path if $TRACE;
108     my $status = mirror($remote_uri, $local_file);
109    
110     if ($status == RC_OK) {
111     $checksum_might_be_up_to_date = 0;
112     print " ... updated\n" if $TRACE;
113     } elsif ($status != RC_NOT_MODIFIED) {
114     warn "\n$remote_uri: $status\n";
115     return;
116     } else {
117     print " ... up to date\n" if $TRACE;
118     }
119     }
120    
121     if ($path =~ m{^authors/id}) { # maybe fetch CHECKSUMS
122     my $checksum_path =
123     URI->new_abs("CHECKSUMS", $remote_uri)->rel($REMOTE);
124     if ($path ne $checksum_path) {
125     my_mirror($checksum_path, $checksum_might_be_up_to_date);
126     }
127     }
128     }
129    
130     sub clean_unmirrored {
131     find sub {
132     return if /\.readme$/; # don't erase readme files
133     check_readme($File::Find::name);
134     return unless -f and not $mirrored{$File::Find::name};
135     print "$File::Find::name ... removed\n" if $TRACE;
136     unlink $_ or warn "Cannot remove $File::Find::name: $!";
137     my $path = $File::Find::name;
138 dpavlin 1.2 if ($path =~ s/(\.tar\.gz|\.tgz)/.readme/g && -f $path) {
139 dpavlin 1.1 # only if we erase archive also!
140     unlink $path or warn "Cannot remove $path: $!";
141     }
142     }, $LOCAL;
143     }
144    
145     sub check_readme {
146    
147     my $path = shift;
148     # fixup some things
149     my $readme_path = $path;
150 dpavlin 1.2 $readme_path =~ s/\.(tar\.gz|\.tgz)/.readme/g || return; # just .tar.gz is supported!
151 dpavlin 1.1
152     my $at = Archive::Tar->new($path) or die "Archive::Tar failed on $path\n";
153    
154     if (! -f $readme_path) {
155     # create readme file
156     my @readmes = sort grep m{^[^/]+/README\z}, $at->list_files();
157     my $readme;
158    
159     if ($readme = shift @readmes) {
160     open(R, "> $readme_path") || die "Cannot create $readme_path: $!";
161     print R $at->get_content($readme);
162     close(R);
163    
164     print "$readme_path ... created\n" if $TRACE;
165    
166     } else {
167    
168     $readme_path =~ s/^.+\/(.+)$/$1/;
169     print "can't find readme for $readme_path ...\n" if $TRACE;
170    
171     }
172    
173     }
174     }
175    
176    
177     }

  ViewVC Help
Powered by ViewVC 1.1.26