/[nn]/affix.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /affix.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1.1.1 - (show annotations) (vendor branch)
Wed Jan 30 10:07:32 2002 UTC (17 years, 2 months ago) by dpavlin
Branch: dbp, MAIN
CVS Tags: r0, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain
initial import

1 #!/usr/bin/perl -w
2
3 #--------------------------------------------------------------------
4
5 my @affix_regexp;
6 my @affix_add;
7 my @affix_sub;
8
9 sub load_affix {
10 my ($filename) = @_;
11
12 my $suffixes=0;
13
14 my ($regexp,$add,$sub);
15
16 open (A,$filename) || die "$filename: $!";
17 while(<A>) {
18 chomp;
19 next if (/^#|^[\s\t\n\r]*$/);
20
21 if (/^suffixes/i) {
22 $suffixes++;
23 next;
24 }
25
26 next if (! $suffixes);
27
28 if (/^flag[\s\t]+\*{0,1}(.):/i) {
29 undef $regexp;
30 undef $add;
31 undef $sub;
32 next;
33 }
34
35 if (/^[\s\t]*([^>#]+)>[\s\t]+-([^\,\s\t]+),([^\s\t]+)/) {
36 $regexp = $1;
37 $add = $2;
38 $sub = $3 if ($3 ne "-");
39 } elsif (/^[\s\t]*([^>#]+)>[\s\t]+([^\s\t\#]+)/) {
40 $regexp = $1;
41 $sub = $2;
42 }
43
44 sub nuke_s {
45 my $tmp = $_[0];
46 return if (!$tmp);
47 $tmp=~s/^ *//g;
48 $tmp=~s/ *$//g;
49 $tmp=~s/ *//g;
50 return $tmp;
51 }
52
53 push @affix_regexp,nuke_s($regexp);
54 push @affix_add,nuke_s($add);
55 push @affix_sub,nuke_s($sub);
56 }
57 }
58
59 sub normalize_word {
60 my @out;
61 foreach my $word (@_) {
62 push @out,$word; # save original word
63 next if (length($word) < 3); # cludge: preskoci kratke
64 for(my $i=0; $i<=$#affix_regexp; $i++) {
65 my $regexp = $affix_regexp[$i];
66 my $add = $affix_add[$i];
67 my $sub = $affix_sub[$i];
68 next if length($word) < length($sub);
69 my $tmp_word = $word;
70 if ($sub) {
71 next if ($word !~ m/$sub$/i);
72 $tmp_word =~ s/$sub$/$add/i if ($add);
73 } else {
74 $tmp_word = $word.$add;
75 }
76 if ($tmp_word =~ m/$regexp/ix) {
77 # print "$word -> $tmp_word\t-$sub, +$add, regexp: $regexp\n";
78 push @out,lc($tmp_word);
79 }
80 }
81 }
82 return @out;
83 }
84
85
86
87 #--------------------------------------------------------------------
88
89 load_affix("/tmp/dpavlin/docs/nn/search/croatian.aff");
90
91 print "elements: ",$#affix_regexp,"\n";
92
93 print join(", ",normalize_word( ("zakoni","oprostu") )),"\n";
94
95 print join(", ",normalize_word( "preirez" )),"\n";
96 print join(", ",normalize_word( "preireza" )),"\n";
97

  ViewVC Help
Powered by ViewVC 1.1.26