Revision 16 (by dpavlin, 2007/06/28 10:38:34) new calling StemHR->stem('word')
#!/usr/bin/perl -w

use lib '.';
use StemHR;

my %rules;
my %stem_words;
my $words = 0;
my $stems = 0;


my $last_stem = '';
my $errors = 0;
sub check_stem {
	my $s = shift || return;
	if ($last_stem) {
		if ($last_stem ne $s) {
			print "ERROR==> ";
			$errors++;
		}
	} else {
		$last_stem = $s;
	}
}

while(<>) {
	chomp;
	next if (/^#/);
	if (/^$/) {
		print "\n";
		$last_stem = '';
		next;
	}

	$words++;

	my $orig = $_;

	my $stem = StemHR->stem($_);

	if (s/^(.+)\s(\d+)$/$1\t$2/g) {
		$rules{$2}++;
		$stems++;
		$stem_words{$1}++;
		check_stem($1);
	} else  {
		$last_stem = $_;
	}

	printf("%-15s %s\n",$orig,$_);

}
my $nr_stems = keys(%stem_words);
printf "\n# %d words, %d stems in %d ops, %.2f%% size [%d errors]\n",$words,$nr_stems,$stems,($nr_stems*100/$words),$errors;

foreach my $s (keys %stem_words) {
	print "#stem $stem_words{$s} $s\n";
}

foreach my $r (sort keys %rules) {
	print "#rule $rules{$r} $r\n";
}