97 |
use File::Basename; |
use File::Basename; |
98 |
use IO::File; |
use IO::File; |
99 |
use IO::Zlib; |
use IO::Zlib; |
100 |
|
use POSIX qw/strftime/; |
101 |
|
|
102 |
use lib '/data/wait/lib'; |
use lib '/data/wait/lib'; |
103 |
|
|
154 |
|
|
155 |
my $tb = new HyperEstraier::WAIT::Table( |
my $tb = new HyperEstraier::WAIT::Table( |
156 |
uri => $OPT{node}, |
uri => $OPT{node}, |
157 |
attr => ['docid', 'headline', 'source', 'size', 'parent'], |
attr => ['docid', 'headline', 'source', 'size', 'parent', 'version'], |
158 |
key => 'docid', |
key => 'docid', |
159 |
invindex => [ qw/name synopsis bugs description text environment example author/ ], |
invindex => [ qw/name synopsis bugs description text environment example author/ ], |
160 |
debug => $OPT{debug}, |
debug => $OPT{debug}, |
423 |
my $base = (split /\//, $ARCHIVE{$tar})[-1]; |
my $base = (split /\//, $ARCHIVE{$tar})[-1]; |
424 |
my $parent; |
my $parent; |
425 |
|
|
426 |
|
my %attr; |
427 |
|
|
428 |
# logging |
# logging |
429 |
if ($OPT{trust_mtime}) { |
if ($OPT{trust_mtime}) { |
430 |
printf "%-20s %10s %s\t", $tar, |
$attr{'@mdate'} = strftime('%Y-%m-%dT%H:%M:%S+00:00', gmtime($VERSION{$tar})); |
431 |
substr(scalar(localtime($VERSION{$tar})),0,10), $base; |
$parent->{'@mdate'} = $attr{'@mdate'}; |
432 |
|
printf "%-20s %10s %s\t", $tar, $attr{'@mdate'}, $base; |
433 |
} else { |
} else { |
434 |
printf "%-20s %10.5f %s\t", $tar, $VERSION{$tar}, $base; |
$attr{'version'} = $VERSION{$tar}; |
435 |
|
printf "%-20s %10.5f %s\t", $tar, $attr{'version'}, $base; |
436 |
} |
} |
437 |
|
|
438 |
# Remember the archive |
# Remember the archive |
441 |
print "skipping\n"; |
print "skipping\n"; |
442 |
next ARCHIVE; |
next ARCHIVE; |
443 |
} else { |
} else { |
444 |
$parent = $tb->insert(docid => $base, |
$parent->{_id} = $tb->insert(docid => $base, |
445 |
headline => $ARCHIVE{$tar}) unless $OPT{test}; |
headline => $ARCHIVE{$tar}, |
446 |
|
%attr |
447 |
|
) unless $OPT{test}; |
448 |
print "indexing\n"; |
print "indexing\n"; |
449 |
} |
} |
450 |
|
|
499 |
print "Please alias '$tar' to '$prefix' next time!\n"; |
print "Please alias '$tar' to '$prefix' next time!\n"; |
500 |
print "See alias table later.\n"; |
print "See alias table later.\n"; |
501 |
$NEW_ALIAS{$tar} = $prefix; |
$NEW_ALIAS{$tar} = $prefix; |
502 |
$tb->delete_by_key($parent); |
$tb->delete_by_key($parent->{_id}); |
503 |
next ARCHIVE; |
next ARCHIVE; |
504 |
} else { |
} else { |
505 |
print "Assuming that tar file name $tar is a valid prefix\n"; |
print "Assuming that tar file name $tar is a valid prefix\n"; |
556 |
unless ($OPT{test}) { |
unless ($OPT{test}) { |
557 |
$fh->print($val); |
$fh->print($val); |
558 |
index_pod(file => $path, parent => $parent, |
index_pod(file => $path, parent => $parent, |
559 |
text => $val, source => $ARCHIVE{$tar}); |
text => $val, source => $ARCHIVE{$tar}, |
560 |
|
); |
561 |
} |
} |
562 |
} |
} |
563 |
|
|
636 |
$did = $abs_did; |
$did = $abs_did; |
637 |
} |
} |
638 |
if ($did) { # have it version |
if ($did) { # have it version |
639 |
if (!$parm{remove}) { |
if (!$parm{remove} and !$OPT{force}) { |
640 |
warn "duplicate: $did\n"; |
warn "duplicate: $did\n"; |
641 |
return; |
return; |
642 |
} |
} |
665 |
$record->{size} = length($parm{'text'}); |
$record->{size} = length($parm{'text'}); |
666 |
my $headline = $record->{name} || $did; |
my $headline = $record->{name} || $did; |
667 |
|
|
668 |
|
# additional fields for Hyper Estraier |
669 |
|
$record->{'@mdate'} = $parm{'mdate'} if ($parm{'mdate'}); |
670 |
|
|
671 |
$headline =~ s/^$DATA//o; # $did |
$headline =~ s/^$DATA//o; # $did |
672 |
$headline =~ s/\s+/ /g; $headline =~ s/^\s+//; |
$headline =~ s/\s+/ /g; $headline =~ s/^\s+//; |
673 |
|
|
677 |
headline => $headline, |
headline => $headline, |
678 |
%{$record}); |
%{$record}); |
679 |
} else { |
} else { |
680 |
|
foreach (keys %{$parm{parent}}) { |
681 |
|
next if (/^_/); |
682 |
|
$record->{$_} = $parm{parent}->{$_} if ($parm{parent}->{$_}); |
683 |
|
} |
684 |
$tb->insert('docid' => $did, |
$tb->insert('docid' => $did, |
685 |
headline => $headline, |
headline => $headline, |
686 |
source => $parm{source}, |
source => $parm{source}, |
687 |
parent => $parm{parent}, |
parent => $parm{parent}->{_id}, |
688 |
%{$record}); |
%{$record}); |
689 |
} |
} |
690 |
} |
} |
750 |
return if (! $max); |
return if (! $max); |
751 |
$max--; |
$max--; |
752 |
|
|
753 |
return unless /^(.*)\.tar(\.gz|\.Z)$/; |
return unless /^(.*)\.(tar\.(gz|Z)|tgz)$/; |
754 |
|
|
755 |
my ($archive, $version) = version($1); |
my ($archive, $version) = version($1); |
756 |
|
|
783 |
} |
} |
784 |
} |
} |
785 |
|
|
|
sub create_table { |
|
|
my %parm = @_; |
|
|
|
|
|
my $access = bless {}, 'WAIT::Document::Find'; |
|
|
|
|
|
my $stem = [{ |
|
|
'prefix' => ['isotr', 'isolc'], |
|
|
'intervall' => ['isotr', 'isolc'], |
|
|
}, 'isotr', 'isolc', 'split2', 'stop', 'Stem']; |
|
|
my $text = [{ |
|
|
'prefix' => ['isotr', 'isolc'], |
|
|
'intervall' => ['isotr', 'isolc'], |
|
|
}, |
|
|
'isotr', 'isolc', 'split2', 'stop']; |
|
|
my $sound = ['isotr', 'isolc', 'split2', 'Soundex'],; |
|
|
|
|
|
my $tb = |
|
|
$parm{db}->create_table |
|
|
(name => $parm{table}, |
|
|
attr => ['docid', 'headline', 'source', 'size', 'parent'], |
|
|
keyset => [['docid']], |
|
|
layout => $parm{layout}, |
|
|
access => $access, |
|
|
invindex => |
|
|
[ |
|
|
'name' => $stem, |
|
|
'synopsis' => $stem, |
|
|
'bugs' => $stem, |
|
|
'description' => $stem, |
|
|
'text' => $stem, |
|
|
'environment' => $text, |
|
|
'example' => $text, 'example' => $stem, |
|
|
'author' => $sound, 'author' => $stem, |
|
|
] |
|
|
); |
|
|
die "Could not create table '$parm{table}'" unless $tb; |
|
|
$tb; |
|
|
} |
|
|
|
|
786 |
my %MON; |
my %MON; |
787 |
my $YEAR; |
my $YEAR; |
788 |
|
|
919 |
my @invindex = $self->{'invindex'} || die "no invindex in object"; |
my @invindex = $self->{'invindex'} || die "no invindex in object"; |
920 |
|
|
921 |
foreach my $attr (keys %{$args}) { |
foreach my $attr (keys %{$args}) { |
922 |
if (grep(/^$attr$/, @{ $self->{'attr'} })) { |
if (grep(/^$attr$/, @{ $self->{'attr'} }) or $attr =~ m/^@/o) { |
923 |
$doc->add_attr($attr, $args->{$attr}); |
$doc->add_attr($attr, $args->{$attr}); |
924 |
} |
} |
925 |
if (grep(/^$attr$/, @{ $self->{'invindex'} })) { |
if (grep(/^$attr$/, @{ $self->{'invindex'} })) { |
930 |
print STDERR $doc->dump_draft if ($self->{'debug'}); |
print STDERR $doc->dump_draft if ($self->{'debug'}); |
931 |
|
|
932 |
my $id; |
my $id; |
933 |
unless ($id = $self->{'node'}->put_doc($doc)) { |
unless ($self->{'node'}->put_doc($doc)) { |
934 |
printf STDERR "ERROR: %d\n", $self->{'node'}->status; |
printf STDERR "ERROR: %d\n", $self->{'node'}->status; |
935 |
#} else { |
} else { |
936 |
# print STDERR "id: $id\n"; |
$id = $self->{'node'}->uri_to_id( $uri ); |
937 |
|
if ($id != -1) { |
938 |
|
print STDERR "id: $id\n" if ($self->{'debug'}) |
939 |
|
} else { |
940 |
|
print STDERR "ERROR: can't find id for newly insrted document $uri\n"; |
941 |
|
} |
942 |
} |
} |
943 |
|
|
944 |
return $id; |
return $id; |
954 |
my $self = shift; |
my $self = shift; |
955 |
my $key_v = shift || die "no key?"; |
my $key_v = shift || die "no key?"; |
956 |
|
|
957 |
my $doc = HyperEstraier::Document->new; |
unless ($self->{'node'}->out_doc_by_uri( 'file://' . $key_v )) { |
958 |
$doc->add_attr('@uri', 'file:///' . $key_v); |
print STDERR "WARNING: can't delete document $key_v\n"; |
959 |
|
} |
|
# $self->{'node'}->edit_doc($doc); |
|
|
$self->{'node'}->put_doc($doc) or printf STDERR "ERROR: %d\n", $self->{'node'}->status; |
|
960 |
} |
} |
961 |
|
|
962 |
=head2 delete |
=head2 delete |