/[Grep]/lib/Grep/Search/KinoSearch.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /lib/Grep/Search/KinoSearch.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 98 - (hide annotations)
Sat Feb 24 12:16:57 2007 UTC (17 years, 2 months ago) by dpavlin
Original Path: lib/Grep/Search.pm
File size: 4749 byte(s)
code cleaup, now isa Jifty::Object, more debug loging
1 dpavlin 47 package Grep::Search;
2    
3     use strict;
4     use warnings;
5 dpavlin 98 use base 'Jifty::Object';
6 dpavlin 47
7     use Data::Dump qw/dump/;
8     use Lucene;
9     use Jifty::Util;
10    
11     my $index_path = Jifty::Util->app_root . '/var/lucene';
12    
13     my ( $analyzer, $store, $writer );
14    
15 dpavlin 53 my $debug = 1;
16 dpavlin 58 my $create;
17 dpavlin 47
18 dpavlin 49 sub create {
19 dpavlin 98 my $self = shift;
20 dpavlin 47
21 dpavlin 58 if (defined( $create )) {
22 dpavlin 98 $self->log->debug("using previous create $create");
23 dpavlin 58 return $create;
24     }
25    
26 dpavlin 47 if (! -e "$index_path/segments") {
27     $create = 1;
28 dpavlin 98 $self->log->debug("create index $index_path");
29 dpavlin 47 } else {
30 dpavlin 58 $create = 0;
31 dpavlin 98 $self->log->debug("open index: $index_path");
32 dpavlin 47 }
33 dpavlin 49 return $create;
34 dpavlin 47 }
35    
36 dpavlin 49 sub analyzer {
37     my $self = shift;
38 dpavlin 98 if (! defined( $analyzer )) {
39     $analyzer = new Lucene::Analysis::Standard::StandardAnalyzer();
40     $self->log->debug("$analyzer created");
41     }
42 dpavlin 49 return $analyzer;
43     }
44    
45     sub store {
46     my $self = shift;
47 dpavlin 98 if (! defined( $store )) {
48     $store = Lucene::Store::FSDirectory->getDirectory( $index_path, $self->create );
49     $self->log->debug("$store created");
50     }
51 dpavlin 49 return $store;
52     }
53    
54     sub writer {
55     my $self = shift;
56 dpavlin 98 if (! defined( $writer )) {
57     $writer = new Lucene::Index::IndexWriter( $self->store, $self->analyzer, $self->create );
58     $self->log->debug("$writer created");
59     }
60 dpavlin 49 return $writer;
61     }
62    
63 dpavlin 47 =head2 add
64    
65 dpavlin 64 Grep::Search->add( $record, $owner_id );
66 dpavlin 47
67     =cut
68    
69     sub add {
70     my $self = shift;
71    
72     my $i = shift or die "no record to add";
73 dpavlin 64 my $uid = shift;
74 dpavlin 47
75     die "record not Jifty::Record but ", ref $i unless ($i->isa('Jifty::Record'));
76    
77     my $pk = { $i->primary_keys };
78    
79     my $doc = new Lucene::Document;
80    
81     my @columns = map { $_->name } $i->columns;
82    
83     foreach my $c ( @columns ) {
84    
85     my $v = $i->$c;
86    
87     if ( ref($v) ne '' ) {
88 dpavlin 53
89     foreach my $f_c ( qw/id name title/ ) {
90     if ( $i->$c->can( $f_c ) ) {
91     my $f_v = $i->$c->$f_c || $i->$c->{values}->{ $f_c };
92     my $col = $c . '_' . $f_c;
93     if ( $f_v ) {
94     warn " # $col = $f_v\n" if ($debug);
95     $doc->add(Lucene::Document::Field->Text( $col, $f_v ));
96     } else {
97     warn " . $col is NULL\n" if ($debug);
98     }
99     }
100     }
101    
102     if ($v->isa('Jifty::DateTime')) {
103 dpavlin 47 warn " d $c = $v\n" if ($debug);
104     $doc->add(Lucene::Document::Field->Keyword( $c, "$v" ));
105     } else {
106     warn " s $c = $v [",ref($v),"]\n" if ($debug);
107     }
108     next;
109     }
110    
111     next if (! defined($v) || $v eq '');
112    
113     $v =~ s/<[^>]+>/ /gs;
114    
115     if ( defined( $pk->{$c} ) ) {
116     $doc->add(Lucene::Document::Field->Keyword( $c, $v ));
117     warn " * $c = $v\n" if ($debug);
118     } else {
119     $doc->add(Lucene::Document::Field->Text( $c, $v ));
120 dpavlin 53 warn " + $c = ", $self->snippet( 50, $v ), "\n" if ($debug);
121 dpavlin 47 }
122     }
123    
124 dpavlin 64 # add _owner_id to speed up filtering of search results
125     $uid ||= Jifty->web->current_user->id;
126     $doc->add(Lucene::Document::Field->Keyword( '_owner_id', $uid ));
127    
128 dpavlin 49 $self->writer->addDocument($doc);
129 dpavlin 47
130 dpavlin 98 $self->log->debug("added ", $i->id, " for user $uid to index");
131 dpavlin 47 }
132    
133     =head2
134    
135     my $ItemCollection = Grep::Search->collection( 'search query' );
136    
137     =cut
138    
139     sub collection {
140     my $self = shift;
141    
142     my $q = shift or die "no q?";
143    
144 dpavlin 58 return if ( $self->create );
145    
146 dpavlin 49 my $searcher = new Lucene::Search::IndexSearcher($self->store);
147 dpavlin 98 $self->log->debug("$searcher created");
148 dpavlin 49 my $parser = new Lucene::QueryParser("content", $self->analyzer);
149 dpavlin 98 $self->log->debug("$parser created");
150 dpavlin 47
151 dpavlin 64 my $full_q = "($q) AND _owner_id:" . Jifty->web->current_user->id;
152    
153     my $query = $parser->parse( $full_q );
154    
155 dpavlin 98 $self->log->debug("searching for '$q' using ", $query->toString);
156 dpavlin 47
157     my $hits = $searcher->search($query);
158     my $num_hits = $hits->length();
159    
160 dpavlin 98 $self->log->debug("found $num_hits results");
161 dpavlin 47
162     my $collection = Grep::Model::ItemCollection->new();
163    
164     my @results;
165    
166     for ( my $i = 0; $i < $num_hits; $i++ ) {
167    
168     my $doc = $hits->doc( $i );
169    
170     my $score = $hits->score($i);
171     my $title = $doc->get("title");
172     my $id = $doc->get("id");
173    
174     warn "## $i $score $title\n";
175    
176     my $item = Grep::Model::Item->new();
177     my ($ok,$msg) = $item->load_by_cols( id => $id );
178    
179     if ( $ok ) {
180     $collection->add_record( $item );
181     } else {
182     warn "can't load item $id\n";
183     }
184    
185     }
186    
187     undef $hits;
188     undef $query;
189     undef $parser;
190 dpavlin 76 $searcher->close;
191 dpavlin 47 undef $searcher;
192    
193     return $collection;
194     }
195    
196     =head2 finish
197    
198     Grep::Search->finish
199    
200     =cut
201    
202     sub finish {
203     my $self = shift;
204     if ($writer) {
205     warn "closing index\n";
206     $writer->close;
207     }
208 dpavlin 95 undef $writer;
209 dpavlin 72 undef $store;
210 dpavlin 58 undef $create;
211 dpavlin 95 undef $analyzer;
212 dpavlin 58
213     return;
214 dpavlin 47 }
215    
216 dpavlin 57 =for TODO
217    
218 dpavlin 47 sub _signal {
219     my $s = shift;
220     warn "catched SIG $s\n";
221     finish();
222     exit(0);
223     }
224    
225     $SIG{'__DIE__'} = \&_signal;
226     $SIG{'INT'} = \&_signal;
227     $SIG{'QUIT'} = \&_signal;
228    
229 dpavlin 57 =cut
230 dpavlin 53
231     =head2 snippet
232    
233     my $short = $self->snippet( 50, $text );
234    
235    
236     =cut
237    
238     sub snippet {
239     my $self = shift;
240    
241     my $len = shift or die "no len?";
242     my $m = join(" ", @_);
243    
244     $m =~ s/\s+/ /gs;
245    
246     if (length($m) > $len) {
247     return substr($m,0,$len) . '...';
248     } else {
249     return $m;
250     }
251     }
252    
253 dpavlin 47 1;

  ViewVC Help
Powered by ViewVC 1.1.26