/[wait]/branches/CPAN/script/cpanwait
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /branches/CPAN/script/cpanwait

Parent Directory Parent Directory | Revision Log Revision Log


Revision 11 - (show annotations)
Fri Apr 28 15:41:10 2000 UTC (23 years, 11 months ago) by unknown
File size: 19070 byte(s)
This commit was manufactured by cvs2svn to create branch 'CPAN'.
1 #!/usr/local/perl5.005_56.Mar06/bin/perl -w
2 ######################### -*- Mode: Perl -*- #########################
3 ##
4 ## $Basename: cpanwait $
5 ## $Revision: 1.7 $
6 ##
7 ## Author : Ulrich Pfeifer
8 ## Created On : Sat Jan 4 18:09:28 1997
9 ##
10 ## Last Modified By : Ulrich Pfeifer
11 ## Last Modified On : Sun Nov 22 18:44:36 1998
12 ##
13 ## Copyright (c) 1996-1997, Ulrich Pfeifer
14 ##
15 ##
16 ######################################################################
17
18 eval 'exec perl -S $0 "$@"'
19 if 0;
20
21
22 use strict;
23
24 use File::Path;
25 use DB_File;
26 use Getopt::Long;
27 use File::Find;
28 use File::Basename;
29 use IO::File;
30
31 require WAIT::Config;
32 require WAIT::Database;
33 require WAIT::Parse::Pod;
34 require WAIT::Document::Tar;
35
36
37 my %OPT = (database => 'DB',
38 dir => $WAIT::Config->{WAIT_home} || '/tmp',
39 table => 'cpan',
40 clean => 0,
41 remove => [],
42 force => 0,
43 cpan => '/usr/src/perl/CPAN/sources',
44 trust_mtime => 1,
45 match => 'authors/id/',
46 test => 0,
47 # cpan => 'ftp://ftp.rz.ruhr-uni-bochum.de/pub/CPAN',
48 cpan => 'ftp://ftp.uni-hamburg.de:/pub/soft/lang/perl/CPAN',
49 keep => '/app/unido-i06/src/share/lang/perl/96a/CPAN/sources',
50 );
51
52 GetOptions(\%OPT,
53 'database=s',
54 'dir=s',
55 'cpan=s',
56 'table=s',
57 'keep=s',
58 'match=s',
59 'clean!',
60 'test=i', # test level 0: normal
61 # 1: don't change db
62 # 2: don't look at archives even
63
64 'remove=s@',
65 'force!', # force indexing even if seen
66 'trust_mtime!', # use mtime instead of version number
67 ) || die "Usage: ...\n";
68
69
70 clean_database(
71 database => $OPT{database},
72 dir => $OPT{dir},
73 table => $OPT{table},
74 ) if $OPT{clean};
75
76 my $db = WAIT::Database->open(
77 name => $OPT{database},
78 'directory' => $OPT{dir},
79 )
80 || WAIT::Database->create(
81 name => $OPT{database},
82 'directory' => $OPT{dir},
83 )
84 or die "Could not open/create database '$OPT{dir}/$OPT{database}': $@";
85
86 my $layout= new WAIT::Parse::Pod;
87
88 my $tb = $db->table(name => $OPT{table})
89 || create_table(db => $db, table => $OPT{table}, layout => $layout);
90
91 # Map e.g. '.../latest' to 'perl'. Used in wanted(). Effects version
92 # considerations. Value *must* match common prefix. Aliasing should be
93 # used if CPAN contains serveral distributions with different name but
94 # same root directory.
95 # We still have a problem if there are different root directories!
96
97 my %ALIAS = (# tar name real (root) name
98 'latest' => 'perl',
99 'perl5db-kit' => 'DB',
100 'SGI-FM' => 'FM',
101 'net-ext' => 'Net',
102 'VelocisSQL' => 'Velocis',
103 'Net-ext' => 'Net',
104 'Curses-DevKit' => 'Cdk',
105 'PostgresPerl' => 'Postgres',
106 'perlpdf' => 'PERLPDF',
107 'Des-perl' => 'Des',
108 'SGI-GL' => 'GL',
109 'DBD-DB2' => 'DB2',
110 );
111 my %NEW_ALIAS; # found in this pass
112
113 # Map module names to pathes. Generated by wanted() doing alisaing.
114 my %ARCHIVE;
115
116 # Map module names to latest version. Generated by wanted()
117 my %VERSION;
118
119
120 # Mapping for modules with common root not matching modules name that
121 # are not aliased. This is just for prefix stripping and not strictly
122 # necessary. Takes effect after version considerations.
123 my %TR = (# tar name root to strip
124 'Net_SSLeay.pm' => 'SSLeay/',
125 'EventDrivenServer' => 'Server/',
126 'bio_lib.pl.' => '',
127 'AlarmCall' => 'Sys/',
128 'Cdk-ext' => 'Cdk/',
129 'Sx' => '\d.\d/',
130 'DumpStack' => 'Devel/',
131 'StatisticsDescriptive'=> 'Statistics/',
132 'Term-Gnuplot' => 'Gnuplot/',
133 'iodbc_ext' => 'iodbc-ext-\d.\d/',
134 'UNIVERSAL' => '',
135 'Term-Query' => 'Query/',
136 'SelfStubber' => 'Devel/',
137 'CallerItem' => 'Devel/',
138 );
139
140 my $DIR = $tb->dir;
141 my $DATA = $tb->dir . "/data";
142 my $LWP;
143
144
145 if (@{$OPT{remove}}) {
146 my $pod;
147 for $pod (@{$OPT{remove}}) {
148 unless (-e $pod) {
149 $pod = "$DIR/$pod";
150 }
151 index_pod(file => $pod, remove => 1) if -f $pod;
152 unlink $pod or warn "Could not unlink '$pod': $!\n";
153 #$tb->sync;
154 }
155 $tb->close;
156 $db->close;
157 exit;
158 }
159
160 # Now get the beef
161 if ($OPT{cpan} =~ /^(http|ftp):/) {
162 $LWP = 1;
163 require LWP::Simple;
164 LWP::Simple->import();
165
166 mkpath($DATA,1,0755) or
167 die "Could not generate '$DATA/': $!"
168 unless -d $DATA;
169
170 if (! -f "$DATA/find-ls.gz" or -M "$DATA/find-ls.gz" > 0.5) {
171 my $status = mirror("$OPT{cpan}/indices/find-ls.gz", "$DATA/find-ls.gz");
172 if ($status != &RC_OK and $status != &RC_NOT_MODIFIED) {
173 # we could use Net:FTP here ...
174 die "Was unable to mirror '$OPT{cpan}/indices/find-ls.gz'\n";
175 }
176 }
177 my $fh = new IO::File "gzip -cd $DATA/find-ls.gz |";
178 die "Could not open 'gzip -cd $DATA/find-ls.gz': !$\n" unless $fh;
179
180 my $line;
181 while (defined ($line = <$fh>)) {
182 chomp($line);
183 my ($mon, $mday, $time, $file, $is_link) = (split ' ', $line)[7..11];
184
185 next if defined $is_link;
186 my $mtime = mtime($mon, $mday, $time);
187
188 $file =~ s:^\./::;
189 ($_) = fileparse($file);
190 $File::Find::name = $file;
191 wanted($mtime);
192 }
193 } else {
194 find(sub {&wanted((stat($_))[9])}, $OPT{cpan});
195 }
196
197 ARCHIVE:
198 for my $tar (sort keys %ARCHIVE) {
199 next if $OPT{match} and $ARCHIVE{$tar} !~ /$OPT{match}/o;
200 my $base = (split /\//, $ARCHIVE{$tar})[-1];
201 my $parent;
202
203 # logging
204 if ($OPT{trust_mtime}) {
205 printf "%-20s %10s %s\t", $tar,
206 substr(scalar(localtime($VERSION{$tar})),0,10), $base;
207 } else {
208 printf "%-20s %10.5f %s\t", $tar, $VERSION{$tar}, $base;
209 }
210
211 # Remember the archive
212 # We should have an extra table for the tar file data ...
213 if (!$OPT{force} and $tb->have(docid => $base)) {
214 print "skipping\n";
215 next ARCHIVE;
216 } else {
217 $parent = $tb->insert(docid => $base,
218 headline => $ARCHIVE{$tar}) unless $OPT{test};
219 print "indexing\n";
220 }
221
222 next ARCHIVE if $OPT{test} > 1;
223
224 my $TAR = myget($tar);
225
226 next ARCHIVE unless $TAR; # not able to fetch it
227
228 my %tar;
229 tie (%tar,
230 'WAIT::Document::Tar',
231 sub { $_[0] =~ /\.(pm|pod|PL)$/ or $_[0] =~ /readme/i},
232 #sub { $_[0] !~ m:/$: },
233 $TAR)
234 or warn "Could not tie '$TAR'\n";
235
236 my $sloppy;
237 my ($key, $val);
238
239 FILE:
240 while (($key, $val) = each %tar) {
241 my $file = fname($key);
242
243 # don't index directories
244 next if $file =~ /\/$/;
245
246 # is it a POD file?
247 next FILE unless $file =~ /readme/i or $val =~ /\n=head/;
248
249 # remove directory prefix
250 unless ($sloppy # no common root
251 or $file =~ s:^\Q$tar\E[^/]*/:: # common root, maybe alias
252 or ($TR{$tar} # common root, not aliased
253 and $file =~ s:^\Q$TR{$tar}\E::)
254 ) {
255 # try to determine an alias
256 warn "Bad directory prefix: '$file'\n";
257 my ($prefix) = split /\//, $file;
258
259 while ($key = (tied %tar)->NEXTKEY) {
260 my $file = fname($key);
261
262 next if $file =~ /\/$/;
263 unless ($file =~ m:^$prefix/: or $file eq $prefix) {
264 warn "Archive contains different prefixes: $prefix,$file\n";
265 $prefix = '';
266 last;
267 }
268 }
269 if ($prefix) {
270 print "Please alias '$tar' to '$prefix' next time!\n";
271 print "See alias table later.\n";
272 $NEW_ALIAS{$tar} = $prefix;
273 $tb->delete_by_key($parent);
274 next ARCHIVE;
275 } else {
276 print "Assuming that tar file name $tar is a valid prefix\n";
277 $sloppy = 1;
278
279 # We may reset too much here! But that this is not exact
280 # science anyway. Maybe we should ignore using 'next ARCHIVE'.
281
282 $key = (tied %tar)->FIRSTKEY;
283 redo FILE;
284 }
285 }
286
287 # remove /lib prefix
288 $file =~ s:^lib/::;
289
290 # generate new path
291 my $path = "$DATA/$tar/$file";
292
293 my ($sbase, $sdir) = fileparse($path);
294 my $fh;
295
296 unless ($OPT{test}) {
297 if (-f $path) {
298 index_pod(file => $path, remove => 1);
299 unlink $path or warn "Could not unlink '$path' $!\n";
300 } elsif (!-d $sdir) {
301 mkpath($sdir,1,0755) or die "Could not mkpath($sdir): $!\n";
302 }
303 $fh = new IO::File "> $path";
304 die "Could not write '$path': $!\n" unless $fh;
305 }
306
307 if ($file =~ /readme|install/i) { # make READMEs verbatim pods
308 $val =~ s/\n/\n /g;
309 $val = "=head1 NAME\n\n$tar $file\n\n=head1 DESCRIPTION\n\n $val"
310 unless $val =~ /^=head/m;
311 } else { # remove non-pod stuff
312 my $nval = $val; $val = '';
313 my $cutting = 1;
314
315 for (split /\n/, $nval) {
316 if (/^=cut|!NO!SUBS!/) {
317 $cutting = 1;
318 } elsif ($cutting and /^=head/) {
319 $cutting = 0;
320 }
321 unless ($cutting) {
322 $val .= $_ . "\n";
323 }
324 }
325 }
326 unless ($OPT{test}) {
327 $fh->print($val);
328 index_pod(file => $path, parent => $parent,
329 text => $val, source => $ARCHIVE{$tar});
330 }
331 }
332
333 if ($LWP and !$OPT{keep}) {
334 unlink $TAR or warn
335 "Could not unlink '$TAR': $!\n";
336 }
337 }
338
339 if (%NEW_ALIAS) {
340 print "\%ALIAS = (\n";
341 for (keys %NEW_ALIAS) {
342 print "\t'$_'\t=> '$NEW_ALIAS{$_}',\n";
343 }
344 print "\t);\n";
345 }
346
347 # we are done
348 $db->close();
349 exit;
350
351 sub fname ($) {
352 my $key = shift;
353 my ($ntar, $file) = split $;, $key;
354
355 # remove leading './' - shudder
356 $file =~ s/^\.\///;
357
358 return($file);
359 }
360
361 sub myget {
362 my $tar = shift;
363 my $TAR;
364
365 if ($LWP) { # fetch the archive
366 if ($OPT{keep}) {
367 $TAR = "$OPT{keep}/$ARCHIVE{$tar}";
368 print "Keeping in '$TAR'\n" unless -e $TAR;
369 my ($base, $path) = fileparse($TAR);
370 unless (-d $path) {
371 mkpath($path,1,0755) or
372 die "Could not mkpath($path)\n";
373 }
374 } else {
375 $TAR = "/tmp/$tar.tar.gz";
376 }
377 unless (-e $TAR) { # lwp mirror seems to fetch ftp: in any case?
378 print "Fetching $OPT{cpan}/$ARCHIVE{$tar}\n";
379 my $status = mirror("$OPT{cpan}/$ARCHIVE{$tar}", $TAR);
380 if ($status != &RC_OK and $status != &RC_NOT_MODIFIED) {
381 warn "Was unable to mirror '$ARCHIVE{$tar}, skipping'\n";
382 return;
383 }
384 }
385 }
386 $TAR;
387 }
388
389 sub index_pod {
390 my %parm = @_;
391 my $did = $parm{file};
392 my $rel_did = $did;
393 my $abs_did = $did;
394
395 if ($rel_did =~ s:$DIR/::) {
396 $abs_did = "$DIR/$rel_did";
397 }
398
399 undef $did;
400
401 # check for both variants
402 if ($tb->have('docid' => $rel_did)) {
403 $did = $rel_did;
404 } elsif ($tb->have('docid' => $abs_did)) {
405 $did = $abs_did;
406 }
407 if ($did) { # have it version
408 if (!$parm{remove}) {
409 warn "duplicate: $did\n";
410 return;
411 }
412 } else { # not seen yet
413 $did = $rel_did;
414 if ($parm{remove}) {
415 print "missing: $did\n";
416 return;
417 }
418 }
419
420 $parm{'text'} ||= WAIT::Document::Find->FETCH($abs_did);
421
422 unless (defined $parm{'text'}) {
423 print "unavailable: $did\n";
424 return;
425 }
426
427 my $record = $layout->split($parm{'text'});
428 $record->{size} = length($parm{'text'});
429 my $headline = $record->{name} || $did;
430
431 $headline =~ s/^$DATA//o; # $did
432 $headline =~ s/\s+/ /g; $headline =~ s/^\s+//;
433
434 printf "%s %s\n", ($parm{remove})?'-':'+', substr($headline,0,70);
435 if ($parm{remove}) {
436 $tb->delete('docid' => $did,
437 headline => $headline,
438 %{$record});
439 } else {
440 $tb->insert('docid' => $did,
441 headline => $headline,
442 source => $parm{source},
443 parent => $parm{parent},
444 %{$record});
445 }
446 }
447
448 # This *must* remove the version in *any* case. It should compute a
449 # resonable version number - but usually mtimes should be used.
450 sub version {
451 local ($_) = @_;
452
453 # remove alpha/beta postfix
454 s/([-_\d])(a|b|alpha|beta|src)$/$1/;
455
456 # jperl1.3@4.019.tar.gz
457 s/@\d.\d+//;
458
459 # oraperl-v2.4-gk.tar.gz
460 s/-v(\d)/$1/;
461
462 # lettered versions - shudder
463 s/([-_\d\.])([a-z])([\d\._])/sprintf "$1%02d$3", ord(lc $2) - ord('a') /ei;
464 s/([-_\d\.])([a-z])$/sprintf "$1%02d", ord(lc $2) - ord('a') /ei;
465
466 # thanks libwww-5b12 ;-)
467 s/(\d+)b/($1-1).'.'/e;
468 s/(\d+)a/($1-2).'.'/e;
469
470 # replace '-pre' by '0.'
471 s/-pre([\.\d])/-0.$1/;
472 s/\.\././g;
473 s/(\d)_(\d)/$1$2/g;
474
475 # chop '[-.]' and thelike
476 s/\W$//;
477
478 # ram's versions Storable-0.4@p
479 s/\@/./;
480
481 if (s/[-_]?(\d+)\.(0\d+)\.(\d+)$//) {
482 return($_, $1 + "0.$2" + $3 / 1000000);
483 } elsif (s/[-_]?(\d+)\.(\d+)\.(\d+)$//) {
484 return($_, $1 + $2/1000 + $3 / 1000000);
485 } elsif (s/[-_]?(\d+\.[\d_]+)$//) {
486 return($_, $1);
487 } elsif (s/[-_]?([\d_]+)$//) {
488 return($_, $1);
489 } elsif (s/-(\d+.\d+)-/-/) { # perl-4.019-ref-guide
490 return($_, $1);
491 } else {
492 if ($_ =~ /\d/) { # smells like an unknown scheme
493 warn "Odd version Numbering: '$File::Find::name'\n";
494 return($_, undef);
495 } else { # assume version 0
496 warn "No version Numbering: '$File::Find::name'\n";
497 return($_, 0);
498 }
499
500 }
501 }
502
503 sub wanted {
504 my $mtime = shift; # called by parse_file_ls();
505
506 return unless /^(.*)\.tar(\.gz|\.Z)$/;
507 my ($archive, $version) = version($1);
508
509 unless (defined $version) {
510 warn "Skipping $1\n";
511 return;
512 }
513
514 # Check for file alias
515 $archive = $ALIAS{$archive} if $ALIAS{$archive};
516
517 # Check for path alias.
518 if ($File::Find::name =~ m(/CPAN/(?:source/)?(.*\Q$archive\E))) {
519 if ($ALIAS{$1}) {
520 $archive = $ALIAS{$1};
521 }
522 }
523
524 if ($OPT{trust_mtime}) {
525 $version = $mtime;
526 } else {
527 $version =~ s/(\d)_/$1/;
528 $version ||= $mtime; # mtime
529 }
530
531 if (!exists $ARCHIVE{$archive}
532 or $VERSION{$archive} < $version) {
533 $ARCHIVE{$archive} = $File::Find::name;
534 $VERSION{$archive} = $version;
535 }
536 }
537
538 sub clean_database {
539 my %parm = @_;
540
541 my $db = WAIT::Database->open(
542 name => $parm{database},
543 'directory' => $parm{dir},
544 )
545 or die "Could not open database '$parm{dir}/$parm{database}': $@";
546 my $tbl = $db->table(name => $parm{table});
547 if ($tbl) {
548 $tbl->drop or
549 die "Could not open table '$parm{tabel}': $@";
550 }
551
552 $db->close;
553 }
554
555 sub create_table {
556 my %parm = @_;
557
558 my $access = bless {}, 'WAIT::Document::Find';
559
560 my $stem = [{
561 'prefix' => ['isotr', 'isolc'],
562 'intervall' => ['isotr', 'isolc'],
563 }, 'isotr', 'isolc', 'split2', 'stop', 'Stem'];
564 my $text = [{
565 'prefix' => ['isotr', 'isolc'],
566 'intervall' => ['isotr', 'isolc'],
567 },
568 'isotr', 'isolc', 'split2', 'stop'];
569 my $sound = ['isotr', 'isolc', 'split2', 'Soundex'],;
570
571 my $tb =
572 $parm{db}->create_table
573 (name => $parm{table},
574 attr => ['docid', 'headline', 'source', 'size', 'parent'],
575 keyset => [['docid']],
576 layout => $parm{layout},
577 access => $access,
578 invindex =>
579 [
580 'name' => $stem,
581 'synopsis' => $stem,
582 'bugs' => $stem,
583 'description' => $stem,
584 'text' => $stem,
585 'environment' => $text,
586 'example' => $text, 'example' => $stem,
587 'author' => $sound, 'author' => $stem,
588 ]
589 );
590 die "Could not create table '$parm{table}'" unless $tb;
591 $tb;
592 }
593
594 my %MON;
595 my $YEAR;
596
597 BEGIN {
598 my $i = 1;
599 for (qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)) {
600 $MON{$_} = $i++;
601 }
602 $YEAR = (localtime(time))[5];
603 }
604
605 # We could/should use Date::GetDate here
606 use Time::Local;
607 sub mtime {
608 my ($mon, $mday, $time) = @_;
609 my ($hour, $min, $year, $monn) = (0,0);
610
611 if ($time =~ /(\d+):(\d+)/) {
612 ($hour, $min) = ($1, $2);
613 $year = $YEAR;
614 } else {
615 $year = $time;
616 }
617 $monn = $MON{$mon} || $MON{ucfirst lc $mon} || warn "Unknown month: '$mon'";
618 my $guess = timelocal(0,$min,$hour,$mday,$monn-1,$year);
619 if ($guess > time) {
620 $guess = timelocal(0,$min,$hour,$mday,$monn-1,$year-1);
621 }
622 $guess;
623 }
624
625
626 __END__
627 ## ###################################################################
628 ## pod
629 ## ###################################################################
630
631 =head1 NAME
632
633 cpan - generate an WAIT index for CPAN
634
635 =head1 SYNOPSIS
636
637 B<cpan>
638 [B<-clean>] [B<-noclean>]
639 [B<-cpan> I<url or directory>]
640 [B<-database> I<dbname>]
641 [B<-dir> I<directory>]
642 [B<-force>] [B<-noforce>]
643 [B<-keep> I<directory>]
644 [B<-match> I<regexp>]
645 [B<-table> I<table name>]
646 [B<-test> I<level>]
647 [B<-trust_mtime>] [B<-notrust_mtime>]
648
649 =head1 DESCRIPTION
650
651 TBS
652
653 =head1 OPTIONS
654
655 =over 5
656
657 =item B<-clean> / B<-noclean>
658
659 Clean the table befor indexing. Default is B<off>.
660
661 =item B<-cpan> I<url or directory>
662
663 Default directory or URL for indexing. If an URL is given, there
664 currently must be a file F<indices/find-ls.gz> relative to it which
665 contains the output of C<find . -ls | gzip>.
666 Default is F<ftp://ftp.rz.ruhr-uni-bochum.de/pub/CPAN>.
667
668
669 =item B<-database> I<dbname>
670
671 Specify database name. Default is F<DB>.
672
673 =item B<-dir> I<directory>
674
675 Alternate directory were databases are located. Default is the
676 directory specified during configuration of WAIT.
677
678 =item B<-force>
679
680 Force reindexing, even if B<cpan> thinks files are up to date.
681 Default is B<off>
682
683 =item B<-keep> I<directory>
684
685 If fetching from a remote server, keep files in I<directory>. Default is
686 F</app/unido-i06/src/share/lang/perl/96a/CPAN/sources>.
687
688 =item B<-match> I<regexp>
689
690 Limit to patches matching I<regexp>. Default is F<authors/id/>.
691
692 =item B<-table> I<table name>
693
694 Specify an alternate table name. Default is C<cpan>.
695
696 =item B<-test> I<level>
697
698 Set test level, were B<0> means normal operation, B<1> means, don't
699 really index and B<2> means, don't even get archives and examine them.
700
701 =item B<-trust_mtime> / B<-notrust_mtime>
702
703 If B<on>, the files mtimes are used to decide, which version of an
704 archive is the newest. If b<off>, the version extracted is used
705 (beware, there are far more version numbering schemes than B<cpan> can
706 parse).
707
708 =head1 AUTHOR
709
710 Ulrich Pfeifer E<lt>F<pfeifer@ls6.informatik.uni-dortumund.de>E<gt>

Properties

Name Value
cvs2svn:cvs-rev 1.1
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26