91 |
use strict; |
use strict; |
92 |
|
|
93 |
use File::Path; |
use File::Path; |
|
use DB_File; |
|
94 |
use Getopt::Long; |
use Getopt::Long; |
95 |
use File::Find; |
use File::Find; |
96 |
use File::Basename; |
use File::Basename; |
97 |
use IO::File; |
use IO::File; |
98 |
use IO::Zlib; |
use IO::Zlib; |
99 |
|
use POSIX qw/strftime/; |
100 |
|
|
101 |
use lib '/data/wait/lib'; |
use lib '/data/wait/lib'; |
102 |
|
|
153 |
|
|
154 |
my $tb = new HyperEstraier::WAIT::Table( |
my $tb = new HyperEstraier::WAIT::Table( |
155 |
uri => $OPT{node}, |
uri => $OPT{node}, |
156 |
attr => ['docid', 'headline', 'source', 'size', 'parent'], |
attr => ['docid', 'headline', 'source', 'size', 'parent', 'version'], |
157 |
key => 'docid', |
key => 'docid', |
158 |
invindex => [ qw/name synopsis bugs description text environment example author/ ], |
invindex => [ qw/name synopsis bugs description text environment example author/ ], |
159 |
debug => $OPT{debug}, |
debug => $OPT{debug}, |
422 |
my $base = (split /\//, $ARCHIVE{$tar})[-1]; |
my $base = (split /\//, $ARCHIVE{$tar})[-1]; |
423 |
my $parent; |
my $parent; |
424 |
|
|
425 |
|
my %attr; |
426 |
|
|
427 |
# logging |
# logging |
428 |
if ($OPT{trust_mtime}) { |
if ($OPT{trust_mtime}) { |
429 |
printf "%-20s %10s %s\t", $tar, |
$attr{'@mdate'} = strftime('%Y-%m-%dT%H:%M:%S+00:00', gmtime($VERSION{$tar})); |
430 |
substr(scalar(localtime($VERSION{$tar})),0,10), $base; |
$parent->{'@mdate'} = $attr{'@mdate'}; |
431 |
|
printf "%-20s %10s %s\t", $tar, $attr{'@mdate'}, $base; |
432 |
} else { |
} else { |
433 |
printf "%-20s %10.5f %s\t", $tar, $VERSION{$tar}, $base; |
$attr{'version'} = $VERSION{$tar}; |
434 |
|
printf "%-20s %10.5f %s\t", $tar, $attr{'version'}, $base; |
435 |
} |
} |
436 |
|
|
437 |
# Remember the archive |
# Remember the archive |
440 |
print "skipping\n"; |
print "skipping\n"; |
441 |
next ARCHIVE; |
next ARCHIVE; |
442 |
} else { |
} else { |
443 |
$parent = $tb->insert(docid => $base, |
$parent->{_id} = $tb->insert(docid => $base, |
444 |
headline => $ARCHIVE{$tar}) unless $OPT{test}; |
headline => $ARCHIVE{$tar}, |
445 |
|
%attr |
446 |
|
) unless $OPT{test}; |
447 |
print "indexing\n"; |
print "indexing\n"; |
448 |
} |
} |
449 |
|
|
498 |
print "Please alias '$tar' to '$prefix' next time!\n"; |
print "Please alias '$tar' to '$prefix' next time!\n"; |
499 |
print "See alias table later.\n"; |
print "See alias table later.\n"; |
500 |
$NEW_ALIAS{$tar} = $prefix; |
$NEW_ALIAS{$tar} = $prefix; |
501 |
$tb->delete_by_key($parent); |
$tb->delete_by_key($parent->{_id}); |
502 |
next ARCHIVE; |
next ARCHIVE; |
503 |
} else { |
} else { |
504 |
print "Assuming that tar file name $tar is a valid prefix\n"; |
print "Assuming that tar file name $tar is a valid prefix\n"; |
555 |
unless ($OPT{test}) { |
unless ($OPT{test}) { |
556 |
$fh->print($val); |
$fh->print($val); |
557 |
index_pod(file => $path, parent => $parent, |
index_pod(file => $path, parent => $parent, |
558 |
text => $val, source => $ARCHIVE{$tar}); |
text => $val, source => $ARCHIVE{$tar}, |
559 |
|
); |
560 |
} |
} |
561 |
} |
} |
562 |
|
|
635 |
$did = $abs_did; |
$did = $abs_did; |
636 |
} |
} |
637 |
if ($did) { # have it version |
if ($did) { # have it version |
638 |
if (!$parm{remove}) { |
if (!$parm{remove} and !$OPT{force}) { |
639 |
warn "duplicate: $did\n"; |
warn "duplicate: $did\n"; |
640 |
return; |
return; |
641 |
} |
} |
664 |
$record->{size} = length($parm{'text'}); |
$record->{size} = length($parm{'text'}); |
665 |
my $headline = $record->{name} || $did; |
my $headline = $record->{name} || $did; |
666 |
|
|
667 |
|
# additional fields for Hyper Estraier |
668 |
|
$record->{'@mdate'} = $parm{'mdate'} if ($parm{'mdate'}); |
669 |
|
|
670 |
$headline =~ s/^$DATA//o; # $did |
$headline =~ s/^$DATA//o; # $did |
671 |
$headline =~ s/\s+/ /g; $headline =~ s/^\s+//; |
$headline =~ s/\s+/ /g; $headline =~ s/^\s+//; |
672 |
|
|
676 |
headline => $headline, |
headline => $headline, |
677 |
%{$record}); |
%{$record}); |
678 |
} else { |
} else { |
679 |
|
foreach (keys %{$parm{parent}}) { |
680 |
|
next if (/^_/); |
681 |
|
$record->{$_} = $parm{parent}->{$_} if ($parm{parent}->{$_}); |
682 |
|
} |
683 |
$tb->insert('docid' => $did, |
$tb->insert('docid' => $did, |
684 |
headline => $headline, |
headline => $headline, |
685 |
source => $parm{source}, |
source => $parm{source}, |
686 |
parent => $parm{parent}, |
parent => $parm{parent}->{_id}, |
687 |
%{$record}); |
%{$record}); |
688 |
} |
} |
689 |
} |
} |
749 |
return if (! $max); |
return if (! $max); |
750 |
$max--; |
$max--; |
751 |
|
|
752 |
return unless /^(.*)\.tar(\.gz|\.Z)$/; |
return unless /^(.*)\.(tar\.(gz|Z)|tgz)$/; |
753 |
|
|
754 |
my ($archive, $version) = version($1); |
my ($archive, $version) = version($1); |
755 |
|
|
782 |
} |
} |
783 |
} |
} |
784 |
|
|
|
sub create_table { |
|
|
my %parm = @_; |
|
|
|
|
|
my $access = bless {}, 'WAIT::Document::Find'; |
|
|
|
|
|
my $stem = [{ |
|
|
'prefix' => ['isotr', 'isolc'], |
|
|
'intervall' => ['isotr', 'isolc'], |
|
|
}, 'isotr', 'isolc', 'split2', 'stop', 'Stem']; |
|
|
my $text = [{ |
|
|
'prefix' => ['isotr', 'isolc'], |
|
|
'intervall' => ['isotr', 'isolc'], |
|
|
}, |
|
|
'isotr', 'isolc', 'split2', 'stop']; |
|
|
my $sound = ['isotr', 'isolc', 'split2', 'Soundex'],; |
|
|
|
|
|
my $tb = |
|
|
$parm{db}->create_table |
|
|
(name => $parm{table}, |
|
|
attr => ['docid', 'headline', 'source', 'size', 'parent'], |
|
|
keyset => [['docid']], |
|
|
layout => $parm{layout}, |
|
|
access => $access, |
|
|
invindex => |
|
|
[ |
|
|
'name' => $stem, |
|
|
'synopsis' => $stem, |
|
|
'bugs' => $stem, |
|
|
'description' => $stem, |
|
|
'text' => $stem, |
|
|
'environment' => $text, |
|
|
'example' => $text, 'example' => $stem, |
|
|
'author' => $sound, 'author' => $stem, |
|
|
] |
|
|
); |
|
|
die "Could not create table '$parm{table}'" unless $tb; |
|
|
$tb; |
|
|
} |
|
|
|
|
785 |
my %MON; |
my %MON; |
786 |
my $YEAR; |
my $YEAR; |
787 |
|
|
918 |
my @invindex = $self->{'invindex'} || die "no invindex in object"; |
my @invindex = $self->{'invindex'} || die "no invindex in object"; |
919 |
|
|
920 |
foreach my $attr (keys %{$args}) { |
foreach my $attr (keys %{$args}) { |
921 |
if (grep(/^$attr$/, @{ $self->{'attr'} })) { |
if (grep(/^$attr$/, @{ $self->{'attr'} }) or $attr =~ m/^@/o) { |
922 |
$doc->add_attr($attr, $args->{$attr}); |
$doc->add_attr($attr, $args->{$attr}); |
923 |
} |
} |
924 |
if (grep(/^$attr$/, @{ $self->{'invindex'} })) { |
if (grep(/^$attr$/, @{ $self->{'invindex'} })) { |
929 |
print STDERR $doc->dump_draft if ($self->{'debug'}); |
print STDERR $doc->dump_draft if ($self->{'debug'}); |
930 |
|
|
931 |
my $id; |
my $id; |
932 |
unless ($id = $self->{'node'}->put_doc($doc)) { |
unless ($self->{'node'}->put_doc($doc)) { |
933 |
printf STDERR "ERROR: %d\n", $self->{'node'}->status; |
printf STDERR "ERROR: %d\n", $self->{'node'}->status; |
934 |
#} else { |
} else { |
935 |
# print STDERR "id: $id\n"; |
$id = $self->{'node'}->uri_to_id( $uri ); |
936 |
|
if ($id != -1) { |
937 |
|
print STDERR "id: $id\n" if ($self->{'debug'}) |
938 |
|
} else { |
939 |
|
print STDERR "ERROR: can't find id for newly insrted document $uri\n"; |
940 |
|
} |
941 |
} |
} |
942 |
|
|
943 |
return $id; |
return $id; |