/[hyperestraier_wrappers]/trunk/perl/scripts/cpanest
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/perl/scripts/cpanest

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 22 by dpavlin, Fri Sep 16 17:29:53 2005 UTC revision 34 by dpavlin, Thu Dec 29 20:08:46 2005 UTC
# Line 91  Copyright (c) 2005, Dobrica Pavlinusic Line 91  Copyright (c) 2005, Dobrica Pavlinusic
91  use strict;  use strict;
92    
93  use File::Path;  use File::Path;
 use DB_File;  
94  use Getopt::Long;  use Getopt::Long;
95  use File::Find;  use File::Find;
96  use File::Basename;  use File::Basename;
97  use IO::File;  use IO::File;
98  use IO::Zlib;  use IO::Zlib;
99    use POSIX qw/strftime/;
100    
101  use lib '/data/wait/lib';  use lib '/data/wait/lib';
102    
# Line 153  if ($OPT{max}) { Line 153  if ($OPT{max}) {
153    
154  my $tb = new HyperEstraier::WAIT::Table(  my $tb = new HyperEstraier::WAIT::Table(
155          uri     => $OPT{node},          uri     => $OPT{node},
156          attr    => ['docid', 'headline', 'source', 'size', 'parent'],          attr    => ['docid', 'headline', 'source', 'size', 'parent', 'version'],
157          key     => 'docid',          key     => 'docid',
158          invindex => [ qw/name synopsis bugs description text environment example author/ ],          invindex => [ qw/name synopsis bugs description text environment example author/ ],
159          debug   => $OPT{debug},          debug   => $OPT{debug},
# Line 422  for my $tar (sort keys %ARCHIVE) { Line 422  for my $tar (sort keys %ARCHIVE) {
422    my $base = (split /\//, $ARCHIVE{$tar})[-1];    my $base = (split /\//, $ARCHIVE{$tar})[-1];
423    my $parent;    my $parent;
424    
425      my %attr;
426    
427    # logging    # logging
428    if ($OPT{trust_mtime}) {    if ($OPT{trust_mtime}) {
429      printf "%-20s %10s %s\t", $tar,      $attr{'@mdate'} = strftime('%Y-%m-%dT%H:%M:%S+00:00', gmtime($VERSION{$tar}));
430          substr(scalar(localtime($VERSION{$tar})),0,10), $base;      $parent->{'@mdate'} = $attr{'@mdate'};
431        printf "%-20s %10s %s\t", $tar, $attr{'@mdate'}, $base;
432    } else {    } else {
433      printf "%-20s %10.5f %s\t", $tar, $VERSION{$tar}, $base;      $attr{'version'} = $VERSION{$tar};
434        printf "%-20s %10.5f %s\t", $tar, $attr{'version'}, $base;
435    }    }
436    
437    # Remember the archive    # Remember the archive
# Line 436  for my $tar (sort keys %ARCHIVE) { Line 440  for my $tar (sort keys %ARCHIVE) {
440      print "skipping\n";      print "skipping\n";
441      next ARCHIVE;      next ARCHIVE;
442    } else {    } else {
443      $parent = $tb->insert(docid    => $base,      $parent->{_id} = $tb->insert(docid    => $base,
444                            headline => $ARCHIVE{$tar}) unless $OPT{test};                            headline => $ARCHIVE{$tar},
445                              %attr
446        ) unless $OPT{test};
447      print "indexing\n";      print "indexing\n";
448    }    }
449    
# Line 492  for my $tar (sort keys %ARCHIVE) { Line 498  for my $tar (sort keys %ARCHIVE) {
498          print "Please alias '$tar' to '$prefix' next time!\n";          print "Please alias '$tar' to '$prefix' next time!\n";
499          print "See alias table later.\n";          print "See alias table later.\n";
500          $NEW_ALIAS{$tar} = $prefix;          $NEW_ALIAS{$tar} = $prefix;
501          $tb->delete_by_key($parent);          $tb->delete_by_key($parent->{_id});
502          next ARCHIVE;          next ARCHIVE;
503        } else {        } else {
504          print "Assuming that tar file name $tar is a valid prefix\n";          print "Assuming that tar file name $tar is a valid prefix\n";
# Line 549  for my $tar (sort keys %ARCHIVE) { Line 555  for my $tar (sort keys %ARCHIVE) {
555      unless ($OPT{test}) {      unless ($OPT{test}) {
556        $fh->print($val);        $fh->print($val);
557        index_pod(file => $path, parent => $parent,        index_pod(file => $path, parent => $parent,
558                  text => $val,  source => $ARCHIVE{$tar});                  text => $val,  source => $ARCHIVE{$tar},
559          );
560      }      }
561    }    }
562    
# Line 628  sub index_pod { Line 635  sub index_pod {
635      $did = $abs_did;      $did = $abs_did;
636    }    }
637    if ($did) {                   # have it version    if ($did) {                   # have it version
638      if (!$parm{remove}) {      if (!$parm{remove} and !$OPT{force}) {
639        warn "duplicate: $did\n";        warn "duplicate: $did\n";
640        return;        return;
641      }      }
# Line 657  sub index_pod { Line 664  sub index_pod {
664    $record->{size} =  length($parm{'text'});    $record->{size} =  length($parm{'text'});
665    my $headline    =  $record->{name} || $did;    my $headline    =  $record->{name} || $did;
666    
667      # additional fields for Hyper Estraier
668      $record->{'@mdate'} = $parm{'mdate'} if ($parm{'mdate'});
669    
670    $headline =~ s/^$DATA//o;     # $did    $headline =~ s/^$DATA//o;     # $did
671    $headline =~ s/\s+/ /g; $headline =~ s/^\s+//;    $headline =~ s/\s+/ /g; $headline =~ s/^\s+//;
672    
# Line 666  sub index_pod { Line 676  sub index_pod {
676                  headline => $headline,                  headline => $headline,
677                  %{$record});                  %{$record});
678    } else {    } else {
679        foreach (keys %{$parm{parent}}) {
680            next if (/^_/);
681            $record->{$_} = $parm{parent}->{$_} if ($parm{parent}->{$_});
682        }
683      $tb->insert('docid'  => $did,      $tb->insert('docid'  => $did,
684                  headline => $headline,                  headline => $headline,
685                  source   => $parm{source},                  source   => $parm{source},
686                  parent   => $parm{parent},                  parent   => $parm{parent}->{_id},
687                  %{$record});                  %{$record});
688    }    }
689  }  }
# Line 735  sub wanted { Line 749  sub wanted {
749    return if (! $max);    return if (! $max);
750    $max--;    $max--;
751    
752    return unless /^(.*)\.tar(\.gz|\.Z)$/;    return unless /^(.*)\.(tar\.(gz|Z)|tgz)$/;
753    
754    my ($archive, $version) = version($1);    my ($archive, $version) = version($1);
755        
# Line 768  sub wanted { Line 782  sub wanted {
782    }    }
783  }  }
784    
 sub create_table {  
   my %parm = @_;  
   
   my $access = bless {}, 'WAIT::Document::Find';  
   
   my $stem = [{  
                'prefix'    => ['isotr', 'isolc'],  
                'intervall' => ['isotr', 'isolc'],  
               }, 'isotr', 'isolc', 'split2', 'stop', 'Stem'];  
   my $text = [{  
                'prefix'    => ['isotr', 'isolc'],  
                'intervall' => ['isotr', 'isolc'],  
               },  
               'isotr', 'isolc', 'split2', 'stop'];  
   my $sound = ['isotr', 'isolc', 'split2', 'Soundex'],;  
   
   my $tb =  
     $parm{db}->create_table  
       (name     => $parm{table},  
        attr     => ['docid', 'headline', 'source', 'size', 'parent'],  
        keyset   => [['docid']],  
        layout   => $parm{layout},  
        access   => $access,  
        invindex =>  
        [  
         'name'         => $stem,  
         'synopsis'     => $stem,  
         'bugs'         => $stem,  
         'description'  => $stem,  
         'text'         => $stem,  
         'environment'  => $text,  
         'example'      => $text,  'example' => $stem,  
         'author'       => $sound, 'author'  => $stem,  
        ]  
       );  
   die "Could not create table '$parm{table}'" unless $tb;  
   $tb;  
 }  
   
785  my %MON;  my %MON;
786  my $YEAR;  my $YEAR;
787    
# Line 943  sub insert { Line 918  sub insert {
918          my @invindex = $self->{'invindex'} || die "no invindex in object";          my @invindex = $self->{'invindex'} || die "no invindex in object";
919    
920          foreach my $attr (keys %{$args}) {          foreach my $attr (keys %{$args}) {
921                  if (grep(/^$attr$/, @{ $self->{'attr'} })) {                  if (grep(/^$attr$/, @{ $self->{'attr'} }) or $attr =~ m/^@/o) {
922                          $doc->add_attr($attr, $args->{$attr});                          $doc->add_attr($attr, $args->{$attr});
923                  }                  }
924                  if (grep(/^$attr$/, @{ $self->{'invindex'} })) {                  if (grep(/^$attr$/, @{ $self->{'invindex'} })) {
# Line 954  sub insert { Line 929  sub insert {
929          print STDERR $doc->dump_draft if ($self->{'debug'});          print STDERR $doc->dump_draft if ($self->{'debug'});
930    
931          my $id;          my $id;
932          unless ($id = $self->{'node'}->put_doc($doc)) {          unless ($self->{'node'}->put_doc($doc)) {
933                  printf STDERR "ERROR: %d\n", $self->{'node'}->status;                  printf STDERR "ERROR: %d\n", $self->{'node'}->status;
934          #} else {          } else {
935          #       print STDERR "id: $id\n";                  $id = $self->{'node'}->uri_to_id( $uri );
936                    if ($id != -1) {
937                            print STDERR "id: $id\n" if ($self->{'debug'})
938                    } else {
939                            print STDERR "ERROR: can't find id for newly insrted document $uri\n";
940                    }
941          }          }
942    
943          return $id;          return $id;

Legend:
Removed from v.22  
changed lines
  Added in v.34

  ViewVC Help
Powered by ViewVC 1.1.26