--- trunk/perl/scripts/cpanest 2005/09/16 17:29:53 22 +++ trunk/perl/scripts/cpanest 2005/12/29 20:08:46 34 @@ -91,12 +91,12 @@ use strict; use File::Path; -use DB_File; use Getopt::Long; use File::Find; use File::Basename; use IO::File; use IO::Zlib; +use POSIX qw/strftime/; use lib '/data/wait/lib'; @@ -153,7 +153,7 @@ my $tb = new HyperEstraier::WAIT::Table( uri => $OPT{node}, - attr => ['docid', 'headline', 'source', 'size', 'parent'], + attr => ['docid', 'headline', 'source', 'size', 'parent', 'version'], key => 'docid', invindex => [ qw/name synopsis bugs description text environment example author/ ], debug => $OPT{debug}, @@ -422,12 +422,16 @@ my $base = (split /\//, $ARCHIVE{$tar})[-1]; my $parent; + my %attr; + # logging if ($OPT{trust_mtime}) { - printf "%-20s %10s %s\t", $tar, - substr(scalar(localtime($VERSION{$tar})),0,10), $base; + $attr{'@mdate'} = strftime('%Y-%m-%dT%H:%M:%S+00:00', gmtime($VERSION{$tar})); + $parent->{'@mdate'} = $attr{'@mdate'}; + printf "%-20s %10s %s\t", $tar, $attr{'@mdate'}, $base; } else { - printf "%-20s %10.5f %s\t", $tar, $VERSION{$tar}, $base; + $attr{'version'} = $VERSION{$tar}; + printf "%-20s %10.5f %s\t", $tar, $attr{'version'}, $base; } # Remember the archive @@ -436,8 +440,10 @@ print "skipping\n"; next ARCHIVE; } else { - $parent = $tb->insert(docid => $base, - headline => $ARCHIVE{$tar}) unless $OPT{test}; + $parent->{_id} = $tb->insert(docid => $base, + headline => $ARCHIVE{$tar}, + %attr + ) unless $OPT{test}; print "indexing\n"; } @@ -492,7 +498,7 @@ print "Please alias '$tar' to '$prefix' next time!\n"; print "See alias table later.\n"; $NEW_ALIAS{$tar} = $prefix; - $tb->delete_by_key($parent); + $tb->delete_by_key($parent->{_id}); next ARCHIVE; } else { print "Assuming that tar file name $tar is a valid prefix\n"; @@ -549,7 +555,8 @@ unless ($OPT{test}) { $fh->print($val); index_pod(file => $path, parent => $parent, - text => $val, source => $ARCHIVE{$tar}); + text => $val, source => $ARCHIVE{$tar}, + ); } } @@ -628,7 +635,7 @@ $did = $abs_did; } if ($did) { # have it version - if (!$parm{remove}) { + if (!$parm{remove} and !$OPT{force}) { warn "duplicate: $did\n"; return; } @@ -657,6 +664,9 @@ $record->{size} = length($parm{'text'}); my $headline = $record->{name} || $did; + # additional fields for Hyper Estraier + $record->{'@mdate'} = $parm{'mdate'} if ($parm{'mdate'}); + $headline =~ s/^$DATA//o; # $did $headline =~ s/\s+/ /g; $headline =~ s/^\s+//; @@ -666,10 +676,14 @@ headline => $headline, %{$record}); } else { + foreach (keys %{$parm{parent}}) { + next if (/^_/); + $record->{$_} = $parm{parent}->{$_} if ($parm{parent}->{$_}); + } $tb->insert('docid' => $did, headline => $headline, source => $parm{source}, - parent => $parm{parent}, + parent => $parm{parent}->{_id}, %{$record}); } } @@ -735,7 +749,7 @@ return if (! $max); $max--; - return unless /^(.*)\.tar(\.gz|\.Z)$/; + return unless /^(.*)\.(tar\.(gz|Z)|tgz)$/; my ($archive, $version) = version($1); @@ -768,45 +782,6 @@ } } -sub create_table { - my %parm = @_; - - my $access = bless {}, 'WAIT::Document::Find'; - - my $stem = [{ - 'prefix' => ['isotr', 'isolc'], - 'intervall' => ['isotr', 'isolc'], - }, 'isotr', 'isolc', 'split2', 'stop', 'Stem']; - my $text = [{ - 'prefix' => ['isotr', 'isolc'], - 'intervall' => ['isotr', 'isolc'], - }, - 'isotr', 'isolc', 'split2', 'stop']; - my $sound = ['isotr', 'isolc', 'split2', 'Soundex'],; - - my $tb = - $parm{db}->create_table - (name => $parm{table}, - attr => ['docid', 'headline', 'source', 'size', 'parent'], - keyset => [['docid']], - layout => $parm{layout}, - access => $access, - invindex => - [ - 'name' => $stem, - 'synopsis' => $stem, - 'bugs' => $stem, - 'description' => $stem, - 'text' => $stem, - 'environment' => $text, - 'example' => $text, 'example' => $stem, - 'author' => $sound, 'author' => $stem, - ] - ); - die "Could not create table '$parm{table}'" unless $tb; - $tb; -} - my %MON; my $YEAR; @@ -943,7 +918,7 @@ my @invindex = $self->{'invindex'} || die "no invindex in object"; foreach my $attr (keys %{$args}) { - if (grep(/^$attr$/, @{ $self->{'attr'} })) { + if (grep(/^$attr$/, @{ $self->{'attr'} }) or $attr =~ m/^@/o) { $doc->add_attr($attr, $args->{$attr}); } if (grep(/^$attr$/, @{ $self->{'invindex'} })) { @@ -954,10 +929,15 @@ print STDERR $doc->dump_draft if ($self->{'debug'}); my $id; - unless ($id = $self->{'node'}->put_doc($doc)) { + unless ($self->{'node'}->put_doc($doc)) { printf STDERR "ERROR: %d\n", $self->{'node'}->status; - #} else { - # print STDERR "id: $id\n"; + } else { + $id = $self->{'node'}->uri_to_id( $uri ); + if ($id != -1) { + print STDERR "id: $id\n" if ($self->{'debug'}) + } else { + print STDERR "ERROR: can't find id for newly insrted document $uri\n"; + } } return $id;