Revision 238 (by dpavlin, 2004/03/08 17:46:16) tagging openisis 0.9.0
#!/usr/bin/perl

#	read files for fulltext index
#	filelist is on stdin (e.g. by find)
#	index entries go to stdout

#	usage:
# find /foo -name \*.html | ./fulltext >/tmp/idx 2>/tmp/mst
# find /usr/share/doc -type f -a \! -name \*.htm\* | ./fulltext >/tmp/idx 2>/tmp/mst
#	sort -o /tmp/idx /tmp/idx
# time ./openisis -write db/test/ft -stream -fmt mfn </tmp/mst
# time ./openisis -db db/test/ft -ifload 0 -v i </tmp/idx
# time ./openisis -db db/test/ft -ifchk -v i
# time ./openisis -db db/test/ft -search Descriptive -ifdump

$fn = 0;
while (<>) {
	chomp;
	$f = $_;
	next unless open( F, $f );
	$fn++;
	print STDERR "100\t$f\n";
	$line = 0;
	while ( <F> ) {
		next if /^\s*$/;
		last if 255 < ++$line;
		chomp;
		$w = 0;
		for $word (split /\W+/) {
			next unless $word;
			printf "%s\t%d\t%d\t%d\t%d\n", uc($word), $fn, 800, $line, ++$w;
		}
	}
	print STDERR "\f\n";
}