/[wait]/trunk/script/cpanwait
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/script/cpanwait

Parent Directory Parent Directory | Revision Log Revision Log


Revision 109 - (show annotations)
Tue Jul 13 17:50:27 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 26352 byte(s)
pod fixes

1 #!/usr/bin/perl -w
2 ######################### -*- Mode: Perl -*- #########################
3 ##
4 ## $Basename: cpanwait $
5 ## $Revision: 1.7 $
6 ##
7 ## Author : Ulrich Pfeifer
8 ## Created On : Sat Jan 4 18:09:28 1997
9 ##
10 ## Last Modified By : Ulrich Pfeifer
11 ## Last Modified On : Sun Nov 22 18:44:36 1998
12 ##
13 ## Copyright (c) 1996-1997, Ulrich Pfeifer
14 ##
15 ##
16 ######################################################################
17
18 use strict;
19
20 use File::Path;
21 use DB_File;
22 use Getopt::Long;
23 use File::Find;
24 use File::Basename;
25 use IO::File;
26 use IO::Zlib;
27
28 #use lib '/data/wait/lib';
29 use blib;
30
31 require WAIT::Config;
32 require WAIT::Database;
33 require WAIT::Parse::Pod;
34 require WAIT::Document::Tar;
35
36 sub fname($);
37
38 # maximum number of archives to index (set to -1 for unlimited)
39 my $max = -1;
40
41 my %OPT = (database => 'DB',
42 dir => $WAIT::Config->{WAIT_home} || '/tmp',
43 table => 'cpan',
44 clean => 0,
45 remove => [],
46 force => 0,
47 # cpan => '/usr/src/perl/CPAN/sources',
48 cpan => '/rest/cpan/CPAN/',
49 trust_mtime => 1,
50 match => 'authors/id/',
51 test => 0,
52 # cpan => 'ftp://ftp.rz.ruhr-uni-bochum.de/pub/CPAN',
53 # cpan => 'ftp://ftp.uni-hamburg.de:/pub/soft/lang/perl/CPAN',
54 keep => '/tmp/CPAN/',
55 );
56
57 GetOptions(\%OPT,
58 'database=s',
59 'dir=s',
60 'cpan=s',
61 'table=s',
62 'keep=s',
63 'match=s',
64 'clean!',
65 'test=i', # test level 0: normal
66 # 1: don't change db
67 # 2: don't look at archives even
68
69 'remove=s@',
70 'force!', # force indexing even if seen
71 'trust_mtime!', # use mtime instead of version number
72 'max=i',
73 'reorg!',
74 ) || die "Usage: ...\n";
75
76 $max ||= $OPT{max};
77
78 clean_database(
79 database => $OPT{database},
80 dir => $OPT{dir},
81 table => $OPT{table},
82 ) if $OPT{clean};
83
84 my $db = WAIT::Database->open(
85 name => $OPT{database},
86 'directory' => $OPT{dir},
87 )
88 || WAIT::Database->create(
89 name => $OPT{database},
90 'directory' => $OPT{dir},
91 )
92 or die "Could not open/create database '$OPT{dir}/$OPT{database}': $@";
93
94 my $layout= new WAIT::Parse::Pod;
95
96 my $tb = $db->table(name => $OPT{table})
97 || create_table(db => $db, table => $OPT{table}, layout => $layout);
98
99 # Map e.g. '.../latest' to 'perl'. Used in wanted(). Effects version
100 # considerations. Value *must* match common prefix. Aliasing should be
101 # used if CPAN contains serveral distributions with different name but
102 # same root directory.
103 # We still have a problem if there are different root directories!
104
105 my %ALIAS = (# tar name real (root) name
106 'Games-Scrabble' => 'Games',
107 'HTML-ParseBrowser' => 'HTML',
108 'iodbc_ext' => 'iodbc-ext-0.1',
109 'sol-inst' => 'Solaris',
110 'WebService-Validator-CSS-223C' => 'WebService-Validator-CSS-W3C-0.02',
111 'MPEG-ID3212Tag' => 'MPEG-ID3v2Tag-0.36',
112 'WebService-GoogleHack' => 'WebService',
113 'Db-Mediasurface-ReadConfig' => 'ReadConfig',
114 'Tie-Array-RestrictUpdates' => 'Tie',
115 'HTML-Lister' => 'HTML',
116 'Net-253950-AsyncZ' => 'Net-Z3950-AsyncZ-0.08',
117 'ChildExit_0' => 'ChildExit-0.1',
118 'Tie-TieConstant' => 'TieConstant.pm',
119 'Crypt-OpenSSL-23509' => 'Crypt-OpenSSL-X509-0.2',
120 'subclustv' => 'blib',
121 'finance-yahooquote' => 'Finance-YahooQuote-0.20',
122 'HPUX-FS' => 'FS',
123 'Business-DE-Konto' => 'Business',
124 'Digest-MD5-124p' => 'Digest-MD5-M4p-0.01',
125 'AKDB_Okewo_de' => 'AKDB',
126 'ExtUtils-0577' => 'ExtUtils-F77-1.14',
127 'LispFmt' => 'Lisp::Fmt-0.00',
128 'Acme-Stegano' => 'Acme',
129 'Acme-RTB' => 'Acme',
130 'WWW-Search-PRWire' => 'work',
131 'Video-Capture-214l' => 'Video-Capture-V4l-0.224',
132 'Tie-DirHandle' => 'Tie',
133 'DB2' => 'DBD-DB2-0.71a',
134 'Tie-Scalar-RestrictUpdates' => 'Tie',
135 'Math-MVPoly' => 'MVPoly',
136 'PlugIn' => 'PlugIn.pm',
137 'Lingua-ID-Nums2Words' => 'Nums2Words-0.01',
138 'chronos-1.' => 'Chronos',
139 'jp_beta' => 'jperl_beta_r1',
140 'Bundle-223C-Validator' => 'Bundle-W3C-Validator-0.6.5',
141 'Text-199' => 'Text-T9-1.0',
142 'Games-Literati' => 'Games',
143 'VMS-IndexedFile' => 'VMS',
144 'authen-rbac' => 'Authen',
145 'Graphics-EPS' => 'EPS.pm',
146 'new.spirit-2.' => 'new.spirit',
147 'Tk-MListbox' => 'MListbox-1.11',
148 'DBD-SQLrelay' => 'SQLRelay.pm',
149 'Tie-RDBM-Cached' => 'RDBM',
150 'PDL_IO_HDF' => 'HDF',
151 'HPUX-LVM' => 'LVM',
152 'Parse-Nibbler' => 'Parse',
153 'Digest-Perl-MD4' => 'MD4',
154 'Crypt-Imail' => 'Imail',
155 'ubertext' => 'Text-UberText-0.95',
156 'MP3-123U' => 'M3U',
157 'Qmail-Control' => 'Qmail',
158 'T-LXS' => 'Text-LevenshteinXS-0.02',
159 'HTML-Paginator' => 'HTML',
160 'swig' => 'SWIG1.1p5',
161 'MIDI-Realtime' => 'MIDI',
162 'sparky-public' => 'Sparky-Public-1.06',
163 'Chemistry-MolecularMass' => 'Chemistry',
164 'Net-253950-SimpleServer' => 'Net-Z3950-SimpleServer-0.08',
165 'NewsClipper-OpenSource' => 'NewsClipper-1.32-OpenSource',
166 'Win32API-Resources' => 'Resources.pm',
167 'Unicode-Collate-Standard-2131_1' => 'Unicode-Collate-Standard-V3_1_1-0.1',
168 'Net-026Term' => 'Net-C6Term-0.11',
169 'BitArray1' => 'BitArray',
170 'Audio-Radio-214L' => 'Audio-Radio-V4L-0.01',
171 'Devel-AutoProfiler' => 'Devel',
172 'Brasil-Checar-CGC' => 'Brasil',
173 'AI-NeuralNet-SOM' => 'SOM.pm',
174 'Net-BitTorrent-File-fix' => 'Net-BitTorrent-File-1.01',
175 'VMS-FindFile' => 'VMS',
176 'LoadHtml.' => 'README',
177 'Time-Compare' => 'Time',
178 'ShiftJIS-230213-MapUTF' => 'ShiftJIS-X0213-MapUTF-0.21',
179 'Image-WMF' => 'Image',
180 'sdf-2.0.eta' => 'sdf-2.001beta1',
181 'Math-Expr-LATEST' => 'Math-Expr-0.4',
182 'MP3-Player-PktConcert' => 'MP3',
183 'Apache-OWA' => 'OWA',
184 'Audio-Gramofile' => 'Audio',
185 'DBIx-Copy' => 'Copy',
186 'P4-024' => 'P4-C4-2.021',
187 'Disassemble-2386' => 'Disassemble-X86-0.13',
188 'Proc-Swarm' => 'Swarm-0.5',
189 'Smil' => 'perlysmil',
190 'Net-SSH-2232Perl' => 'Net-SSH-W32Perl-0.05',
191 'Win32-SerialPort' => 'SerialPort-0.19',
192 'Lingua-ID-Words2Nums' => 'Words2Nums-0.01',
193 'Parse-Text' => 'Text',
194 'DBIx-HTMLView-LATEST' => 'DBIx-HTMLView-0.9',
195 'Apache-NNTPGateway' => 'NNTPGateway-0.9',
196 'XPathToXML' => 'XPathToXML.pm',
197 'XML-WMM-ASX' => 'XML',
198 'CGISession' => 'CGI',
199 'Net-SMS-142' => 'Net-SMS-O2-0.019',
200 'Search-253950' => 'Search-Z3950-0.05',
201 'Date-Christmas' => 'Christmas',
202 'Win32-InternetExplorer-Window' => 'Win32',
203 'Apache-WAP-MailPeek' => 'MailPeek',
204 'Statistics-Table-F' => 'Statistics',
205 'BerkeleyDB_Locks' => 'BerkeleyDB-Locks-0_2',
206 'HookPrePostCall' => 'PrePostCall-1.2',
207 'Oak-AAS-Service-DBI_13_PAM' => 'Oak-AAS-Service-DBI_N_PAM-1.8',
208 'Math-Vector' => 'Vector.pm',
209 'Audio-124pDecrypt' => 'Audio-M4pDecrypt-0.04',
210 'libao-perl_0.03' => 'libao-perl-0.03',
211 'CGI-EZForm' => 'EZForm',
212 'Data-Locations-fixed' => 'Data-Locations-5.2-fixed',
213 'HTML-Template-Filter-Dreamweaver' => 'Dreamweaver',
214 'LineByLine' => 'LineByLine.pm',
215 'Geo-0400' => 'Geo-E00-0.05',
216 'WebService-Validator-HTML-223C' => 'WebService-Validator-HTML-W3C-0.03',
217 'DateTime-Format-223CDTF' => 'DateTime-Format-W3CDTF-0.04',
218 'DBD_SQLFLEX' => 'DBD-Sqlflex',
219 'Text-Number' => 'Number',
220 'DBIx-DataLookup' => 'DBIx',
221 'MP3-ID3211Tag' => 'MP3-ID3v1Tag-1.11',
222 'Text-Striphigh' => 'Striphigh-0.02',
223 'Tie-SortHash' => 'SortHash',
224 'Apache-AccessAbuse' => 'AccessAbuse',
225 'MP3-123U-Parser' => 'MP3-M3U-Parser',
226 'Net-253950' => 'Net-Z3950-0.44',
227 'Net-RBLClient' => 'RBLCLient-0.2',
228 'CGI-EasyCGI' => 'CGI',
229 'http-handle' => 'HTTP::Handle',
230 'JPEG-Comment' => 'JPEG',
231 'router-lg' => 'Router',
232 'Db-Mediasurface' => 'Mediasurface',
233 'Text-BarGraph' => 'bargraph',
234 'TL' => 'Text-Levenshtein-0.04',
235 'Config-Vars' => 'Config-0.01',
236 'Tie-PerfectHash' => 'Tie',
237 'DNS-TinyDNS' => 'DNS',
238 'DesignPattern-Factory' => 'Factory',
239 'WWW-01_Rail' => 'WWW-B_Rail-0.01',
240 'Win32-Exchange' => 'blib',
241 'Math-RPN' => 'Math',
242 'Db-Mediasurface-Cache' => 'Cache',
243 'perl_archie.' => 'Archie.pm',
244 'Acme-PGPSign' => 'Acme',
245 'HTML-Widget-sideBar' => 'HTML-Widget-SideBar-1.00',
246 'log' => 'Games',
247 'File-List' => 'File',
248 'Schedule-Cronchik' => 'Schedule',
249 'Curses-Devkit' => 'Cdk',
250 'Pod-PalmDoc' => 'Pod',
251 'Easy-WML' => 'Easy WML 0.1',
252 'Interval.' => 'Date',
253 'Brasil-Checar-CPF' => 'Brasil',
254 'Apache-WAP-AutoIndex' => 'AutoIndex',
255
256 'SOM.pm' => 'SOM.pm',
257 'PlugIn.pm' => 'PlugIn.pm',
258 'XPathToXML.pm' => 'XPathToXML.pm',
259 'Vector.pm' => 'Vector.pm',
260 'LineByLine.pm' => 'LineByLine.pm',
261 'Archie.pm' => 'Archie.pm',
262 'TieConstant.pm' => 'TieConstant.pm',
263 'EPS.pm' => 'EPS.pm',
264 'SQLRelay.pm' => 'SQLRelay.pm',
265 'Resources.pm' => 'Resources.pm',
266 'README' => 'README',
267
268 );
269 my %NEW_ALIAS; # found in this pass
270
271 # Map module names to pathes. Generated by wanted() doing alisaing.
272 my %ARCHIVE;
273
274 # Map module names to latest version. Generated by wanted()
275 my %VERSION;
276
277
278 # Mapping for modules with common root not matching modules name that
279 # are not aliased. This is just for prefix stripping and not strictly
280 # necessary. Takes effect after version considerations.
281 my %TR = (# tar name root to strip
282 'Net_SSLeay.pm' => 'SSLeay/',
283 'EventDrivenServer' => 'Server/',
284 'bio_lib.pl.' => '',
285 'AlarmCall' => 'Sys/',
286 'Cdk-ext' => 'Cdk/',
287 'Sx' => '\d.\d/',
288 'DumpStack' => 'Devel/',
289 'StatisticsDescriptive'=> 'Statistics/',
290 'Term-Gnuplot' => 'Gnuplot/',
291 'iodbc_ext' => 'iodbc-ext-\d.\d/',
292 'UNIVERSAL' => '',
293 'Term-Query' => 'Query/',
294 'SelfStubber' => 'Devel/',
295 'CallerItem' => 'Devel/',
296 );
297
298 my $DIR = $tb->dir;
299 my $DATA = $tb->dir . "/data";
300 my $LWP;
301
302
303 if (@{$OPT{remove}}) {
304 my $pod;
305 for $pod (@{$OPT{remove}}) {
306 unless (-e $pod) {
307 $pod = "$DIR/$pod";
308 }
309 index_pod(file => $pod, remove => 1) if -f $pod;
310 unlink $pod or warn "Could not unlink '$pod': $!\n";
311 #$tb->sync;
312 }
313 $tb->close;
314 $db->close;
315 exit;
316 }
317
318 # Now get the beef
319 if ($OPT{cpan} =~ /^(http|ftp):/) {
320 $LWP = 1;
321 require LWP::Simple;
322 LWP::Simple->import();
323
324 mkpath($DATA,1,0755) or
325 die "Could not generate '$DATA/': $!"
326 unless -d $DATA;
327
328 if (! -f "$DATA/find-ls.gz" or -M "$DATA/find-ls.gz" > 0.5) {
329 my $status = mirror("$OPT{cpan}/indices/find-ls.gz", "$DATA/find-ls.gz");
330 if ($status != &RC_OK and $status != &RC_NOT_MODIFIED) {
331 # we could use Net:FTP here ...
332 die "Was unable to mirror '$OPT{cpan}/indices/find-ls.gz'\n";
333 }
334 }
335 my $fh = new IO::File "gzip -cd $DATA/find-ls.gz |";
336 die "Could not open 'gzip -cd $DATA/find-ls.gz': !$\n" unless $fh;
337
338 my $line;
339 while (defined ($line = <$fh>)) {
340 chomp($line);
341 my ($mon, $mday, $time, $file, $is_link) = (split ' ', $line)[7..11];
342
343 next if defined $is_link;
344 my $mtime = mtime($mon, $mday, $time);
345
346 $file =~ s:^\./::;
347 ($_) = fileparse($file);
348 $File::Find::name = $file;
349 wanted($mtime);
350 }
351 } else {
352 find(sub {&wanted((stat($_))[9])}, $OPT{cpan});
353 }
354
355 ARCHIVE:
356 for my $tar (sort keys %ARCHIVE) {
357 next if $OPT{match} and $ARCHIVE{$tar} !~ /$OPT{match}/o;
358 my $base = (split /\//, $ARCHIVE{$tar})[-1];
359 my $parent;
360
361 # logging
362 if ($OPT{trust_mtime}) {
363 printf "%-20s %10s %s\t", $tar,
364 substr(scalar(localtime($VERSION{$tar})),0,10), $base;
365 } else {
366 printf "%-20s %10.5f %s\t", $tar, $VERSION{$tar}, $base;
367 }
368
369 # Remember the archive
370 # We should have an extra table for the tar file data ...
371 if (!$OPT{force} and $tb->have(docid => $base)) {
372 print "skipping\n";
373 next ARCHIVE;
374 } else {
375 $parent = $tb->insert(docid => $base,
376 headline => $ARCHIVE{$tar}) unless $OPT{test};
377 print "indexing\n";
378 }
379
380 next ARCHIVE if $OPT{test} > 1;
381
382 my $TAR = myget($tar);
383
384 next ARCHIVE unless $TAR; # not able to fetch it
385
386 my %tar;
387 tie (%tar,
388 'WAIT::Document::Tar',
389 sub { $_[0] =~ /\.(pm|pod|PL)$/i or $_[0] =~ /readme/i},
390 #sub { $_[0] !~ m:/$: },
391 $TAR)
392 or warn "Could not tie '$TAR'\n";
393
394 my $sloppy;
395 my ($key, $val);
396
397 FILE:
398 while (($key, $val) = each %tar) {
399 my $file = fname($key);
400
401 # don't index directories
402 next if $file =~ /\/$/;
403
404 # is it a POD file?
405 next FILE unless $file =~ /readme/i or $val =~ /\n=head/;
406
407 # remove directory prefix
408 unless ($sloppy # no common root
409 or $file =~ s:^\Q$tar\E[^/]*/:: # common root, maybe alias
410 or ($TR{$tar} # common root, not aliased
411 and $file =~ s:^\Q$TR{$tar}\E::)
412 ) {
413 # try to determine an alias
414 warn "Bad directory prefix: '$file'\n";
415 my ($prefix) = split /\//, $file;
416
417 while ($key = (tied %tar)->NEXTKEY) {
418 my $file = fname($key);
419
420 next if $file =~ /\/$/;
421 unless ($file =~ m:^$prefix/: or $file eq $prefix) {
422 warn "Archive contains different prefixes: $prefix,$file\n";
423 $prefix = '';
424 last;
425 }
426 }
427 if ($prefix) {
428 print "Please alias '$tar' to '$prefix' next time!\n";
429 print "See alias table later.\n";
430 $NEW_ALIAS{$tar} = $prefix;
431 $tb->delete_by_key($parent);
432 next ARCHIVE;
433 } else {
434 print "Assuming that tar file name $tar is a valid prefix\n";
435 $sloppy = 1;
436
437 # We may reset too much here! But that this is not exact
438 # science anyway. Maybe we should ignore using 'next ARCHIVE'.
439
440 $key = (tied %tar)->FIRSTKEY;
441 redo FILE;
442 }
443 }
444
445 # remove /lib prefix
446 $file =~ s:^lib/::;
447
448 # generate new path
449 my $path = "$DATA/$tar/$file";
450
451 my ($sbase, $sdir) = fileparse($path);
452 my $fh;
453
454 unless ($OPT{test}) {
455 if (-f $path) {
456 index_pod(file => $path, remove => 1);
457 unlink $path or warn "Could not unlink '$path' $!\n";
458 } elsif (!-d $sdir) {
459 mkpath($sdir,1,0755) or die "Could not mkpath($sdir): $!\n";
460 }
461 # $fh = new IO::File "> $path";
462 $fh = new IO::Zlib "$path.gz","wb";
463 die "Could not write '$path': $!\n" unless $fh;
464 }
465
466 if ($file =~ /readme|install/i) { # make READMEs verbatim pods
467 $val =~ s/\n/\n /g;
468 $val = "=head1 NAME\n\n$tar $file\n\n=head1 DESCRIPTION\n\n $val"
469 unless $val =~ /^=head/m;
470 } else { # remove non-pod stuff
471 my $nval = $val; $val = '';
472 my $cutting = 1;
473
474 for (split /\n/, $nval) {
475 if (/^=cut|!NO!SUBS!/) {
476 $cutting = 1;
477 } elsif ($cutting and /^=head/) {
478 $cutting = 0;
479 }
480 unless ($cutting) {
481 $val .= $_ . "\n";
482 }
483 }
484 }
485 unless ($OPT{test}) {
486 $fh->print($val);
487 index_pod(file => $path, parent => $parent,
488 text => $val, source => $ARCHIVE{$tar});
489 }
490 }
491
492 if ($LWP and !$OPT{keep}) {
493 unlink $TAR or warn
494 "Could not unlink '$TAR': $!\n";
495 }
496 }
497
498 if (%NEW_ALIAS) {
499 print "\%ALIAS = (\n";
500 for (keys %NEW_ALIAS) {
501 print "\t'$_'\t=> '$NEW_ALIAS{$_}',\n";
502 }
503 print "\t);\n";
504 }
505
506 if ($OPT{reorg}) {
507 my $now = time;
508 warn "Starting reorg\n";
509 $tb->set(top=>1);
510 warn sprintf "Finished reorg %d seconds\n", time - $now;
511 }
512
513 # we are done
514 $db->close();
515 exit;
516
517 sub fname ($) {
518 my $key = shift;
519 my ($ntar, $file) = split $;, $key;
520
521 # remove leading './' - shudder
522 $file =~ s/^\.\///;
523
524 return($file);
525 }
526
527 sub myget {
528 my $tar = shift;
529 my $TAR;
530
531 if ($LWP) { # fetch the archive
532 if ($OPT{keep}) {
533 $TAR = "$OPT{keep}/$ARCHIVE{$tar}";
534 print "Keeping in '$TAR'\n" unless -e $TAR;
535 my ($base, $path) = fileparse($TAR);
536 unless (-d $path) {
537 mkpath($path,1,0755) or
538 die "Could not mkpath($path)\n";
539 }
540 } else {
541 $TAR = "/tmp/$tar.tar.gz";
542 }
543 unless (-e $TAR) { # lwp mirror seems to fetch ftp: in any case?
544 print "Fetching $OPT{cpan}/$ARCHIVE{$tar}\n";
545 my $status = mirror("$OPT{cpan}/$ARCHIVE{$tar}", $TAR);
546 if ($status != &RC_OK and $status != &RC_NOT_MODIFIED) {
547 warn "Was unable to mirror '$ARCHIVE{$tar}, skipping'\n";
548 return;
549 }
550 }
551 } else {
552 $TAR = $ARCHIVE{$tar};
553 }
554 $TAR;
555 }
556
557 sub index_pod {
558 my %parm = @_;
559 my $did = $parm{file};
560 my $rel_did = $did;
561 my $abs_did = $did;
562
563 if ($rel_did =~ s:$DIR/::) {
564 $abs_did = "$DIR/$rel_did";
565 }
566
567 undef $did;
568
569 # check for both variants
570 if ($tb->have('docid' => $rel_did)) {
571 $did = $rel_did;
572 } elsif ($tb->have('docid' => $abs_did)) {
573 $did = $abs_did;
574 }
575 if ($did) { # have it version
576 if (!$parm{remove}) {
577 warn "duplicate: $did\n";
578 return;
579 }
580 } else { # not seen yet
581 $did = $rel_did;
582 if ($parm{remove}) {
583 print "missing: $did\n";
584 return;
585 }
586 }
587
588 $parm{'text'} ||= WAIT::Document::Find->FETCH($abs_did);
589
590 unless (defined $parm{'text'}) {
591 print "unavailable: $did\n";
592 return;
593 }
594
595 my $record = $layout->split($parm{'text'});
596
597 if (! $record) {
598 print "empty pod: $did\n";
599 return;
600 }
601
602 $record->{size} = length($parm{'text'});
603 my $headline = $record->{name} || $did;
604
605 $headline =~ s/^$DATA//o; # $did
606 $headline =~ s/\s+/ /g; $headline =~ s/^\s+//;
607
608 printf "%s %s\n", ($parm{remove})?'-':'+', substr($headline,0,70);
609 if ($parm{remove}) {
610 $tb->delete('docid' => $did,
611 headline => $headline,
612 %{$record});
613 } else {
614 $tb->insert('docid' => $did,
615 headline => $headline,
616 source => $parm{source},
617 parent => $parm{parent},
618 %{$record});
619 }
620 }
621
622 # This *must* remove the version in *any* case. It should compute a
623 # resonable version number - but usually mtimes should be used.
624 sub version {
625 local ($_) = @_;
626
627 # remove alpha/beta postfix
628 s/([-_\d])(a|b|alpha|beta|src)$/$1/;
629
630 # jperl1.3@4.019.tar.gz
631 s/@\d.\d+//;
632
633 # oraperl-v2.4-gk.tar.gz
634 s/-v(\d)/$1/;
635
636 # lettered versions - shudder
637 s/([-_\d\.])([a-z])([\d\._])/sprintf "$1%02d$3", ord(lc $2) - ord('a') /ei;
638 s/([-_\d\.])([a-z])$/sprintf "$1%02d", ord(lc $2) - ord('a') /ei;
639
640 # thanks libwww-5b12 ;-)
641 s/(\d+)b/($1-1).'.'/e;
642 s/(\d+)a/($1-2).'.'/e;
643
644 # replace '-pre' by '0.'
645 s/-pre([\.\d])/-0.$1/;
646 s/\.\././g;
647 s/(\d)_(\d)/$1$2/g;
648
649 # chop '[-.]' and thelike
650 s/\W$//;
651
652 # ram's versions Storable-0.4@p
653 s/\@/./;
654
655 if (s/[-_]?(\d+)\.(0\d+)\.(\d+)$//) {
656 return($_, $1 + "0.$2" + $3 / 1000000);
657 } elsif (s/[-_]?(\d+)\.(\d+)\.(\d+)$//) {
658 return($_, $1 + $2/1000 + $3 / 1000000);
659 } elsif (s/[-_]?(\d+\.[\d_]+)$//) {
660 return($_, $1);
661 } elsif (s/[-_]?([\d_]+)$//) {
662 return($_, $1);
663 } elsif (s/-(\d+.\d+)-/-/) { # perl-4.019-ref-guide
664 return($_, $1);
665 } else {
666 if ($_ =~ /\d/) { # smells like an unknown scheme
667 warn "Odd version Numbering: '$File::Find::name'\n";
668 return($_, undef);
669 } else { # assume version 0
670 warn "No version Numbering: '$File::Find::name'\n";
671 return($_, 0);
672 }
673
674 }
675 }
676
677 sub wanted {
678 my $mtime = shift; # called by parse_file_ls();
679
680 return unless /^(.*)\.tar(\.gz|\.Z)$/;
681
682 return if (! $max);
683 $max--;
684
685 my ($archive, $version) = version($1);
686
687 unless (defined $version) {
688 warn "Skipping $1\n";
689 return;
690 }
691
692 # Check for file alias
693 $archive = $ALIAS{$archive} if $ALIAS{$archive};
694
695 # Check for path alias.
696 if ($File::Find::name =~ m(/CPAN/(?:source/)?(.*\Q$archive\E))) {
697 if ($ALIAS{$1}) {
698 $archive = $ALIAS{$1};
699 }
700 }
701
702 if ($OPT{trust_mtime}) {
703 $version = $mtime;
704 } else {
705 $version =~ s/(\d)_/$1/;
706 $version ||= $mtime; # mtime
707 }
708
709 if (!exists $ARCHIVE{$archive}
710 or $VERSION{$archive} < $version) {
711 $ARCHIVE{$archive} = $File::Find::name;
712 $VERSION{$archive} = $version;
713 }
714 }
715
716 sub clean_database {
717 my %parm = @_;
718
719 my $db = WAIT::Database->open(
720 name => $parm{database},
721 'directory' => $parm{dir},
722 )
723 or die "Could not open database '$parm{dir}/$parm{database}': $@";
724 my $tbl = $db->table(name => $parm{table});
725 if ($tbl) {
726 $tbl->drop or
727 die "Could not open table '$parm{tabel}': $@";
728 }
729
730 $db->close;
731 }
732
733 sub create_table {
734 my %parm = @_;
735
736 my $access = bless {}, 'WAIT::Document::Find';
737
738 my $stem = [{
739 'prefix' => ['isotr', 'isolc'],
740 'intervall' => ['isotr', 'isolc'],
741 }, 'isotr', 'isolc', 'split2', 'stop', 'Stem'];
742 my $text = [{
743 'prefix' => ['isotr', 'isolc'],
744 'intervall' => ['isotr', 'isolc'],
745 },
746 'isotr', 'isolc', 'split2', 'stop'];
747 my $sound = ['isotr', 'isolc', 'split2', 'Soundex'],;
748
749 my $tb =
750 $parm{db}->create_table
751 (name => $parm{table},
752 attr => ['docid', 'headline', 'source', 'size', 'parent'],
753 keyset => [['docid']],
754 layout => $parm{layout},
755 access => $access,
756 invindex =>
757 [
758 'name' => $stem,
759 'synopsis' => $stem,
760 'bugs' => $stem,
761 'description' => $stem,
762 'text' => $stem,
763 'environment' => $text,
764 'example' => $text, 'example' => $stem,
765 'author' => $sound, 'author' => $stem,
766 ]
767 );
768 die "Could not create table '$parm{table}'" unless $tb;
769 $tb;
770 }
771
772 my %MON;
773 my $YEAR;
774
775 BEGIN {
776 my $i = 1;
777 for (qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)) {
778 $MON{$_} = $i++;
779 }
780 $YEAR = (localtime(time))[5];
781 }
782
783 # We could/should use Date::GetDate here
784 use Time::Local;
785 sub mtime {
786 my ($mon, $mday, $time) = @_;
787 my ($hour, $min, $year, $monn) = (0,0);
788
789 if ($time =~ /(\d+):(\d+)/) {
790 ($hour, $min) = ($1, $2);
791 $year = $YEAR;
792 } else {
793 $year = $time;
794 }
795 $monn = $MON{$mon} || $MON{ucfirst lc $mon} || warn "Unknown month: '$mon'";
796 my $guess = timelocal(0,$min,$hour,$mday,$monn-1,$year);
797 if ($guess > time) {
798 $guess = timelocal(0,$min,$hour,$mday,$monn-1,$year-1);
799 }
800 $guess;
801 }
802
803 $WAIT::Config = $WAIT::Config;
804
805 __END__
806 ## ###################################################################
807 ## pod
808 ## ###################################################################
809
810 =head1 NAME
811
812 cpanwait - generate an WAIT index for CPAN
813
814 =head1 SYNOPSIS
815
816 B<cpanwait>
817 [B<-clean>] [B<-noclean>]
818 [B<-cpan> I<url or directory>]
819 [B<-database> I<dbname>]
820 [B<-dir> I<directory>]
821 [B<-force>] [B<-noforce>]
822 [B<-keep> I<directory>]
823 [B<-match> I<regexp>]
824 [B<-table> I<table name>]
825 [B<-test> I<level>]
826 [B<-trust_mtime>] [B<-notrust_mtime>]
827
828 =head1 DESCRIPTION
829
830 TBS
831
832 =head1 OPTIONS
833
834 =over 5
835
836 =item B<-clean> / B<-noclean>
837
838 Clean the table befor indexing. Default is B<off>.
839
840 =item B<-cpan> I<url or directory>
841
842 Default directory or URL for indexing. If an URL is given, there
843 currently must be a file F<indices/find-ls.gz> relative to it which
844 contains the output of C<find . -ls | gzip>.
845 Default is F<ftp://ftp.rz.ruhr-uni-bochum.de/pub/CPAN>.
846
847
848 =item B<-database> I<dbname>
849
850 Specify database name. Default is F<DB>.
851
852 =item B<-dir> I<directory>
853
854 Alternate directory were databases are located. Default is the
855 directory specified during configuration of WAIT.
856
857 =item B<-force>
858
859 Force reindexing, even if B<cpan> thinks files are up to date.
860 Default is B<off>
861
862 =item B<-keep> I<directory>
863
864 If fetching from a remote server, keep files in I<directory>. Default is
865 F</app/unido-i06/src/share/lang/perl/96a/CPAN/sources>.
866
867 =item B<-match> I<regexp>
868
869 Limit to patches matching I<regexp>. Default is F<authors/id/>.
870
871 =item B<-table> I<table name>
872
873 Specify an alternate table name. Default is C<cpan>.
874
875 =item B<-test> I<level>
876
877 Set test level, were B<0> means normal operation, B<1> means, don't
878 really index and B<2> means, don't even get archives and examine them.
879
880 =item B<-trust_mtime> / B<-notrust_mtime>
881
882 If B<on>, the files mtimes are used to decide, which version of an
883 archive is the newest. If b<off>, the version extracted is used
884 (beware, there are far more version numbering schemes than B<cpan> can
885 parse).
886
887 =back
888
889 =head1 AUTHOR
890
891 Ulrich Pfeifer E<lt>F<pfeifer@ls6.informatik.uni-dortumund.de>E<gt>

Properties

Name Value
cvs2svn:cvs-rev 1.1
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26