/[stem-hr]/stem.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /stem.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 16 - (hide annotations)
Thu Jun 28 10:38:34 2007 UTC (16 years, 10 months ago) by dpavlin
File MIME type: text/plain
File size: 932 byte(s)
new calling StemHR->stem('word')

1 dpavlin 12 #!/usr/bin/perl -w
2    
3     use lib '.';
4 dpavlin 15 use StemHR;
5 dpavlin 12
6     my %rules;
7     my %stem_words;
8     my $words = 0;
9     my $stems = 0;
10    
11    
12     my $last_stem = '';
13     my $errors = 0;
14     sub check_stem {
15     my $s = shift || return;
16     if ($last_stem) {
17     if ($last_stem ne $s) {
18     print "ERROR==> ";
19     $errors++;
20     }
21     } else {
22     $last_stem = $s;
23     }
24     }
25    
26     while(<>) {
27     chomp;
28     next if (/^#/);
29     if (/^$/) {
30     print "\n";
31     $last_stem = '';
32     next;
33     }
34    
35     $words++;
36    
37     my $orig = $_;
38    
39 dpavlin 16 my $stem = StemHR->stem($_);
40 dpavlin 12
41     if (s/^(.+)\s(\d+)$/$1\t$2/g) {
42     $rules{$2}++;
43     $stems++;
44     $stem_words{$1}++;
45     check_stem($1);
46     } else {
47     $last_stem = $_;
48     }
49    
50     printf("%-15s %s\n",$orig,$_);
51    
52     }
53     my $nr_stems = keys(%stem_words);
54     printf "\n# %d words, %d stems in %d ops, %.2f%% size [%d errors]\n",$words,$nr_stems,$stems,($nr_stems*100/$words),$errors;
55    
56     foreach my $s (keys %stem_words) {
57     print "#stem $stem_words{$s} $s\n";
58     }
59    
60     foreach my $r (sort keys %rules) {
61     print "#rule $rules{$r} $r\n";
62     }

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26