/[Grep]/bin/reindex.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /bin/reindex.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 128 - (show annotations)
Sun Apr 29 00:48:04 2007 UTC (16 years, 11 months ago) by dpavlin
File MIME type: text/plain
File size: 996 byte(s)
use superuser when reindex-ing
1 #!/usr/bin/perl
2
3 # helper script to re-index full text index
4
5 use strict;
6
7 use lib 'lib';
8
9 use Jifty;
10 use Grep::Search;
11 use Data::Dump qw/dump/;
12 use Text::DeDuper;
13 use Encode;
14
15 my $remove_duplicate = 1;
16
17 $|=1;
18
19 BEGIN { Jifty->new; };
20
21 my $system_user = Grep::CurrentUser->superuser;
22 my $coll = Grep::Model::ItemCollection->new( results_are_readable => 1, current_user => $system_user );
23 $coll->unlimit;
24
25 print "indexing ", $coll->count, " items ";
26
27 my $search = Grep::Search->new();
28 my $deduper = Text::DeDuper->new();
29
30 my ( $total, $duplicates ) = ( 0, 0 );
31
32 while ( my $i = $coll->next ) {
33
34 my $c = encode('utf-8', $i->content);
35
36 if ( $remove_duplicate && $deduper->find_similar( $c ) ) {
37 $i->delete;
38 print "-",$i->id,"- ";
39 $duplicates++;
40 } else {
41 $search->add( $i, $i->in_feed->owner->id );
42 print $i->id;
43 $deduper->add_doc( $i->id, $c );
44 print ' ';
45 }
46 $total++;
47 }
48
49 print "$total records indexed", $remove_duplicate ? " ($duplicates duplicates)" : "", "\n";
50
51 $search->finish;

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26