/[nn]/affix.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /affix.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1.1.1 - (hide annotations) (vendor branch)
Wed Jan 30 10:07:32 2002 UTC (22 years, 2 months ago) by dpavlin
Branch: dbp, MAIN
CVS Tags: r0, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain
initial import

1 dpavlin 1.1 #!/usr/bin/perl -w
2    
3     #--------------------------------------------------------------------
4    
5     my @affix_regexp;
6     my @affix_add;
7     my @affix_sub;
8    
9     sub load_affix {
10     my ($filename) = @_;
11    
12     my $suffixes=0;
13    
14     my ($regexp,$add,$sub);
15    
16     open (A,$filename) || die "$filename: $!";
17     while(<A>) {
18     chomp;
19     next if (/^#|^[\s\t\n\r]*$/);
20    
21     if (/^suffixes/i) {
22     $suffixes++;
23     next;
24     }
25    
26     next if (! $suffixes);
27    
28     if (/^flag[\s\t]+\*{0,1}(.):/i) {
29     undef $regexp;
30     undef $add;
31     undef $sub;
32     next;
33     }
34    
35     if (/^[\s\t]*([^>#]+)>[\s\t]+-([^\,\s\t]+),([^\s\t]+)/) {
36     $regexp = $1;
37     $add = $2;
38     $sub = $3 if ($3 ne "-");
39     } elsif (/^[\s\t]*([^>#]+)>[\s\t]+([^\s\t\#]+)/) {
40     $regexp = $1;
41     $sub = $2;
42     }
43    
44     sub nuke_s {
45     my $tmp = $_[0];
46     return if (!$tmp);
47     $tmp=~s/^ *//g;
48     $tmp=~s/ *$//g;
49     $tmp=~s/ *//g;
50     return $tmp;
51     }
52    
53     push @affix_regexp,nuke_s($regexp);
54     push @affix_add,nuke_s($add);
55     push @affix_sub,nuke_s($sub);
56     }
57     }
58    
59     sub normalize_word {
60     my @out;
61     foreach my $word (@_) {
62     push @out,$word; # save original word
63     next if (length($word) < 3); # cludge: preskoci kratke
64     for(my $i=0; $i<=$#affix_regexp; $i++) {
65     my $regexp = $affix_regexp[$i];
66     my $add = $affix_add[$i];
67     my $sub = $affix_sub[$i];
68     next if length($word) < length($sub);
69     my $tmp_word = $word;
70     if ($sub) {
71     next if ($word !~ m/$sub$/i);
72     $tmp_word =~ s/$sub$/$add/i if ($add);
73     } else {
74     $tmp_word = $word.$add;
75     }
76     if ($tmp_word =~ m/$regexp/ix) {
77     # print "$word -> $tmp_word\t-$sub, +$add, regexp: $regexp\n";
78     push @out,lc($tmp_word);
79     }
80     }
81     }
82     return @out;
83     }
84    
85    
86    
87     #--------------------------------------------------------------------
88    
89     load_affix("/tmp/dpavlin/docs/nn/search/croatian.aff");
90    
91     print "elements: ",$#affix_regexp,"\n";
92    
93     print join(", ",normalize_word( ("zakoni","oprostu") )),"\n";
94    
95     print join(", ",normalize_word( "preirez" )),"\n";
96     print join(", ",normalize_word( "preireza" )),"\n";
97    

  ViewVC Help
Powered by ViewVC 1.1.26