91 |
use strict; |
use strict; |
92 |
|
|
93 |
use File::Path; |
use File::Path; |
|
use DB_File; |
|
94 |
use Getopt::Long; |
use Getopt::Long; |
95 |
use File::Find; |
use File::Find; |
96 |
use File::Basename; |
use File::Basename; |
153 |
|
|
154 |
my $tb = new HyperEstraier::WAIT::Table( |
my $tb = new HyperEstraier::WAIT::Table( |
155 |
uri => $OPT{node}, |
uri => $OPT{node}, |
156 |
attr => ['docid', 'headline', 'source', 'size', 'parent'], |
attr => ['docid', 'headline', 'source', 'size', 'parent', 'version'], |
157 |
key => 'docid', |
key => 'docid', |
158 |
invindex => [ qw/name synopsis bugs description text environment example author/ ], |
invindex => [ qw/name synopsis bugs description text environment example author/ ], |
159 |
debug => $OPT{debug}, |
debug => $OPT{debug}, |
421 |
next if $OPT{match} and $ARCHIVE{$tar} !~ /$OPT{match}/o; |
next if $OPT{match} and $ARCHIVE{$tar} !~ /$OPT{match}/o; |
422 |
my $base = (split /\//, $ARCHIVE{$tar})[-1]; |
my $base = (split /\//, $ARCHIVE{$tar})[-1]; |
423 |
my $parent; |
my $parent; |
|
my $parent_mdate; |
|
424 |
|
|
425 |
my %attr; |
my %attr; |
426 |
|
|
427 |
# logging |
# logging |
428 |
if ($OPT{trust_mtime}) { |
if ($OPT{trust_mtime}) { |
429 |
$attr{'@mdate'} = strftime('%Y-%m-%dT%H:%M:%S+00:00', gmtime($VERSION{$tar})); |
$attr{'@mdate'} = strftime('%Y-%m-%dT%H:%M:%S+00:00', gmtime($VERSION{$tar})); |
430 |
$parent_mdate = $attr{'@mdate'}; |
$parent->{'@mdate'} = $attr{'@mdate'}; |
431 |
printf "%-20s %10s %s\t", $tar, $attr{'@mdate'}, $base; |
printf "%-20s %10s %s\t", $tar, $attr{'@mdate'}, $base; |
432 |
} else { |
} else { |
433 |
$attr{'version'} = $VERSION{$tar}; |
$attr{'version'} = $VERSION{$tar}; |
440 |
print "skipping\n"; |
print "skipping\n"; |
441 |
next ARCHIVE; |
next ARCHIVE; |
442 |
} else { |
} else { |
443 |
$parent = $tb->insert(docid => $base, |
$parent->{_id} = $tb->insert(docid => $base, |
444 |
headline => $ARCHIVE{$tar}, |
headline => $ARCHIVE{$tar}, |
445 |
%attr |
%attr |
446 |
) unless $OPT{test}; |
) unless $OPT{test}; |
498 |
print "Please alias '$tar' to '$prefix' next time!\n"; |
print "Please alias '$tar' to '$prefix' next time!\n"; |
499 |
print "See alias table later.\n"; |
print "See alias table later.\n"; |
500 |
$NEW_ALIAS{$tar} = $prefix; |
$NEW_ALIAS{$tar} = $prefix; |
501 |
$tb->delete_by_key($parent); |
$tb->delete_by_key($parent->{_id}); |
502 |
next ARCHIVE; |
next ARCHIVE; |
503 |
} else { |
} else { |
504 |
print "Assuming that tar file name $tar is a valid prefix\n"; |
print "Assuming that tar file name $tar is a valid prefix\n"; |
556 |
$fh->print($val); |
$fh->print($val); |
557 |
index_pod(file => $path, parent => $parent, |
index_pod(file => $path, parent => $parent, |
558 |
text => $val, source => $ARCHIVE{$tar}, |
text => $val, source => $ARCHIVE{$tar}, |
|
mdate => $parent_mdate, |
|
559 |
); |
); |
560 |
} |
} |
561 |
} |
} |
664 |
$record->{size} = length($parm{'text'}); |
$record->{size} = length($parm{'text'}); |
665 |
my $headline = $record->{name} || $did; |
my $headline = $record->{name} || $did; |
666 |
|
|
667 |
|
# additional fields for Hyper Estraier |
668 |
$record->{'@mdate'} = $parm{'mdate'} if ($parm{'mdate'}); |
$record->{'@mdate'} = $parm{'mdate'} if ($parm{'mdate'}); |
669 |
|
|
670 |
$headline =~ s/^$DATA//o; # $did |
$headline =~ s/^$DATA//o; # $did |
676 |
headline => $headline, |
headline => $headline, |
677 |
%{$record}); |
%{$record}); |
678 |
} else { |
} else { |
679 |
|
foreach (keys %{$parm{parent}}) { |
680 |
|
next if (/^_/); |
681 |
|
$record->{$_} = $parm{parent}->{$_} if ($parm{parent}->{$_}); |
682 |
|
} |
683 |
$tb->insert('docid' => $did, |
$tb->insert('docid' => $did, |
684 |
headline => $headline, |
headline => $headline, |
685 |
source => $parm{source}, |
source => $parm{source}, |
686 |
parent => $parm{parent}, |
parent => $parm{parent}->{_id}, |
687 |
%{$record}); |
%{$record}); |
688 |
} |
} |
689 |
} |
} |
782 |
} |
} |
783 |
} |
} |
784 |
|
|
|
sub create_table { |
|
|
my %parm = @_; |
|
|
|
|
|
my $access = bless {}, 'WAIT::Document::Find'; |
|
|
|
|
|
my $stem = [{ |
|
|
'prefix' => ['isotr', 'isolc'], |
|
|
'intervall' => ['isotr', 'isolc'], |
|
|
}, 'isotr', 'isolc', 'split2', 'stop', 'Stem']; |
|
|
my $text = [{ |
|
|
'prefix' => ['isotr', 'isolc'], |
|
|
'intervall' => ['isotr', 'isolc'], |
|
|
}, |
|
|
'isotr', 'isolc', 'split2', 'stop']; |
|
|
my $sound = ['isotr', 'isolc', 'split2', 'Soundex'],; |
|
|
|
|
|
my $tb = |
|
|
$parm{db}->create_table |
|
|
(name => $parm{table}, |
|
|
attr => ['docid', 'headline', 'source', 'size', 'parent'], |
|
|
keyset => [['docid']], |
|
|
layout => $parm{layout}, |
|
|
access => $access, |
|
|
invindex => |
|
|
[ |
|
|
'name' => $stem, |
|
|
'synopsis' => $stem, |
|
|
'bugs' => $stem, |
|
|
'description' => $stem, |
|
|
'text' => $stem, |
|
|
'environment' => $text, |
|
|
'example' => $text, 'example' => $stem, |
|
|
'author' => $sound, 'author' => $stem, |
|
|
] |
|
|
); |
|
|
die "Could not create table '$parm{table}'" unless $tb; |
|
|
$tb; |
|
|
} |
|
|
|
|
785 |
my %MON; |
my %MON; |
786 |
my $YEAR; |
my $YEAR; |
787 |
|
|
929 |
print STDERR $doc->dump_draft if ($self->{'debug'}); |
print STDERR $doc->dump_draft if ($self->{'debug'}); |
930 |
|
|
931 |
my $id; |
my $id; |
932 |
unless ($id = $self->{'node'}->put_doc($doc)) { |
unless ($self->{'node'}->put_doc($doc)) { |
933 |
printf STDERR "ERROR: %d\n", $self->{'node'}->status; |
printf STDERR "ERROR: %d\n", $self->{'node'}->status; |
934 |
#} else { |
} else { |
935 |
# print STDERR "id: $id\n"; |
$id = $self->{'node'}->uri_to_id( $uri ); |
936 |
|
if ($id != -1) { |
937 |
|
print STDERR "id: $id\n" if ($self->{'debug'}) |
938 |
|
} else { |
939 |
|
print STDERR "ERROR: can't find id for newly insrted document $uri\n"; |
940 |
|
} |
941 |
} |
} |
942 |
|
|
943 |
return $id; |
return $id; |