| Revision 16 (by dpavlin, 2007/06/28 10:38:34) |
new calling StemHR->stem('word')
|
#!/usr/bin/perl -w
use lib '.';
use StemHR;
my %rules;
my %stem_words;
my $words = 0;
my $stems = 0;
my $last_stem = '';
my $errors = 0;
sub check_stem {
my $s = shift || return;
if ($last_stem) {
if ($last_stem ne $s) {
print "ERROR==> ";
$errors++;
}
} else {
$last_stem = $s;
}
}
while(<>) {
chomp;
next if (/^#/);
if (/^$/) {
print "\n";
$last_stem = '';
next;
}
$words++;
my $orig = $_;
my $stem = StemHR->stem($_);
if (s/^(.+)\s(\d+)$/$1\t$2/g) {
$rules{$2}++;
$stems++;
$stem_words{$1}++;
check_stem($1);
} else {
$last_stem = $_;
}
printf("%-15s %s\n",$orig,$_);
}
my $nr_stems = keys(%stem_words);
printf "\n# %d words, %d stems in %d ops, %.2f%% size [%d errors]\n",$words,$nr_stems,$stems,($nr_stems*100/$words),$errors;
foreach my $s (keys %stem_words) {
print "#stem $stem_words{$s} $s\n";
}
foreach my $r (sort keys %rules) {
print "#rule $rules{$r} $r\n";
}