/[webpac]/openisis/current/fulltext
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /openisis/current/fulltext

Parent Directory Parent Directory | Revision Log Revision Log


Revision 237 - (hide annotations)
Mon Mar 8 17:43:12 2004 UTC (20 years, 1 month ago) by dpavlin
File size: 871 byte(s)
initial import of openisis 0.9.0 vendor drop

1 dpavlin 237 #!/usr/bin/perl
2    
3     # read files for fulltext index
4     # filelist is on stdin (e.g. by find)
5     # index entries go to stdout
6    
7     # usage:
8     # find /foo -name \*.html | ./fulltext >/tmp/idx 2>/tmp/mst
9     # find /usr/share/doc -type f -a \! -name \*.htm\* | ./fulltext >/tmp/idx 2>/tmp/mst
10     # sort -o /tmp/idx /tmp/idx
11     # time ./openisis -write db/test/ft -stream -fmt mfn </tmp/mst
12     # time ./openisis -db db/test/ft -ifload 0 -v i </tmp/idx
13     # time ./openisis -db db/test/ft -ifchk -v i
14     # time ./openisis -db db/test/ft -search Descriptive -ifdump
15    
16     $fn = 0;
17     while (<>) {
18     chomp;
19     $f = $_;
20     next unless open( F, $f );
21     $fn++;
22     print STDERR "100\t$f\n";
23     $line = 0;
24     while ( <F> ) {
25     next if /^\s*$/;
26     last if 255 < ++$line;
27     chomp;
28     $w = 0;
29     for $word (split /\W+/) {
30     next unless $word;
31     printf "%s\t%d\t%d\t%d\t%d\n", uc($word), $fn, 800, $line, ++$w;
32     }
33     }
34     print STDERR "\f\n";
35     }

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26