/[Grep]/bin/reindex.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /bin/reindex.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 128 by dpavlin, Sun Apr 29 00:48:04 2007 UTC revision 129 by dpavlin, Sun Apr 29 11:37:28 2007 UTC
# Line 12  use Data::Dump qw/dump/; Line 12  use Data::Dump qw/dump/;
12  use Text::DeDuper;  use Text::DeDuper;
13  use Encode;  use Encode;
14    
15  my $remove_duplicate = 1;  my $remove_duplicates = 1;
16    
17  $|=1;  $|=1;
18    
# Line 31  my ( $total, $duplicates ) = ( 0, 0 ); Line 31  my ( $total, $duplicates ) = ( 0, 0 );
31    
32  while ( my $i = $coll->next ) {  while ( my $i = $coll->next ) {
33    
34          my $c = encode('utf-8', $i->content);          print $i->id;
35    
36            if ( $remove_duplicates ) {
37    
38                    my $c = encode('utf-8', $i->content);
39    
40                    my @s = sort $deduper->find_similar( $c );
41                    if ( @s ) {
42                            #warn " similar = ",dump( @s );
43    
44                            foreach my $id ( @s ) {
45                                    next if $id == $i->id;  # keep current
46                                    my $si = Grep::Model::Item->new();
47                                    $si->load( $id ) or die "can't find similar item $id";
48                                    print " -$id-";
49                                    $si->delete;
50                                    $duplicates++;
51                            }
52                    }
53    
         if ( $remove_duplicate && $deduper->find_similar( $c ) ) {  
                 $i->delete;  
                 print "-",$i->id,"- ";  
                 $duplicates++;  
         } else {  
                 $search->add( $i, $i->in_feed->owner->id );  
                 print $i->id;  
54                  $deduper->add_doc( $i->id, $c );                  $deduper->add_doc( $i->id, $c );
                 print ' ';  
55          }          }
56    
57            $search->add( $i, $i->in_feed->owner->id );
58            print ' ';
59          $total++;          $total++;
60  }  }
61    
62  print "$total records indexed", $remove_duplicate ? " ($duplicates duplicates)" : "", "\n";  print "$total records indexed", $remove_duplicates ? " ($duplicates duplicates)" : "", "\n";
63    
64  $search->finish;  $search->finish;

Legend:
Removed from v.128  
changed lines
  Added in v.129

  ViewVC Help
Powered by ViewVC 1.1.26