/[nn]/ispell.php
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /ispell.php

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (show annotations)
Fri Jan 25 18:31:46 2002 UTC (22 years, 2 months ago) by dpavlin
Branch point for: dbp, MAIN
Initial revision

1 #!/usr/local/php/bin/php
2
3 <?
4
5 class C_AFFIX {
6 var $lang;
7 var $flag;
8 var $sub;
9 var $add;
10 var $regexp;
11 }
12
13 class C_NORM_FORM {
14 var $word;
15 var $lang;
16 var $flag;
17 }
18
19 // ---------------------------------------------------
20 // load_affix()
21 // ---------------------------------------------------
22 function load_affix() {
23 global $DEBUG;
24
25 global $ispellmode;
26
27 global $affix_file;
28 global $affix_array;
29 global $normalized_array;
30
31 $affix_array=array();
32 $normalized_array=array();
33
34 $i=0;
35 if ($ispellmode=='db') {
36 $query="SELECT flag,lang,mask,find,repl
37 FROM affix
38 WHERE type='s'";
39
40 if($DEBUG) echo "load_affix(): ",$query,"";
41
42 if (!($res=db_query($query))) print_error_local('Query error: '.$query."\n".db_error());
43
44 while ($row=db_fetchrow($res)) {
45 $affix_array[$i] = new C_AFFIX;
46 $affix_array[$i]->flag=trim($row[0]);
47 $affix_array[$i]->lang=trim($row[1]);
48 $affix_array[$i]->regexp=trim($row[2]);
49 $affix_array[$i]->add=trim($row[3]);
50 $affix_array[$i]->sub=trim($row[4]);
51 $i++;
52 }
53 db_freeresult($res);
54 } elseif ($ispellmode=='text') {
55 reset($affix_file);
56
57 while (list($lang,$file)=each($affix_file)) {
58 $fd=fopen($file,"r");
59 if (! $fd) {
60 print ("Load affix error: cannot open file $file");
61 exit_local(1);
62 }
63
64 $suffixes=0;
65 while (! feof($fd)) {
66 $str=strtolower(trim(fgets($fd,1024)));
67
68 if (preg_match("/^#|^[\s\t\n\r]*$/",$str)) continue;
69
70 if (ereg("^suffixes",$str)) {
71 $suffixes=1;
72 continue;
73 }
74
75 if ($suffixes != 1) continue;
76
77 if (preg_match("/^flag[\s\t]+\*{0,1}(.):/i",$str,$param)) {
78 $flag=trim($param[1]);
79 continue;
80 }
81
82 if (preg_match("/^[\s\t]*([^>#]+)>[\s\t]+-([^\,\s\t]+),([^\s\t]+)/",$str,$param)) {
83 $regexp=trim($param[1]);
84 $add_str=trim($param[2]);
85 $sub_str=trim($param[3]);
86
87 if ($sub_str == '-') $sub_str='';
88
89 $regexp=str_replace(" ","",$regexp);
90 $add_str=str_replace(" ","",$add_str);
91 $sub_str=str_replace(" ","",$sub_str);
92
93 $affix_array[$i] = new C_AFFIX;
94
95 $affix_array[$i]->flag=$flag;
96 $affix_array[$i]->lang=$lang;
97 $affix_array[$i]->regexp=$regexp;
98 $affix_array[$i]->add=$add_str;
99 $affix_array[$i]->sub=$sub_str;
100 $i++;
101 } elseif (preg_match("/^[\s\t]*([^>#]+)>[\s\t]+([^\s\t\#]+)/",$str,$param)) {
102 $regexp=trim($param[1]);
103 $add_str='';
104 $sub_str=trim($param[2]);
105
106 $regexp=str_replace(" ","",$regexp);
107 $sub_str=str_replace(" ","",$sub_str);
108
109 $affix_array[$i] = new C_AFFIX;
110
111 $affix_array[$i]->flag=$flag;
112 $affix_array[$i]->lang=$lang;
113 $affix_array[$i]->regexp=$regexp;
114 $affix_array[$i]->add=$add_str;
115 $affix_array[$i]->sub=$sub_str;
116 $i++;
117 }
118 }
119 fclose($fd);
120 }
121 }
122 }
123
124 // ---------------------------------------------------
125 // normalize_word($word)
126 // ---------------------------------------------------
127 function normalize_word($word) {
128 global $DEBUG;
129
130 global $affix_array;
131 global $normalized_array;
132
133 global $final_word;
134
135 $j=0;
136
137 for($i=0; $i<count($affix_array); $i++) {
138 $sub=$affix_array[$i]->sub;
139 if (strlen($word)<strlen($sub)) continue;
140
141 if ($sub != '') {
142 if (strcmp($sub,substr($word,strlen($word)-strlen($sub),strlen($sub)))!=0) continue;
143 $temp_word=eregi_replace("$sub$",$affix_array[$i]->add,$word);
144 } else {
145 $temp_word=$word.$affix_array[$i]->add;
146 }
147
148 if (eregi($affix_array[$i]->regexp,$temp_word)) {
149 if ($DEBUG || 1) echo "Possible norm form \"$temp_word\" for \"$word\":".
150 " - ".$affix_array[$i]->sub.
151 " + ".$affix_array[$i]->add.
152 " exp ".$affix_array[$i]->regexp.
153 " fl ".$affix_array[$i]->flag.
154 " lng ".$affix_array[$i]->lang.
155 "\n";
156
157 $normalized_array["$word"][$j]=new C_NORM_FORM;
158 $normalized_array["$word"][$j]->word=$temp_word;
159 $normalized_array["$word"][$j]->flag=$affix_array[$i]->flag;
160 $normalized_array["$word"][$j]->lang=$affix_array[$i]->lang;
161 $j++;
162 }
163 }
164
165 $final_word["$word"][0]=$word;
166 }
167
168 // ---------------------------------------------------
169 // check_words()
170 // ---------------------------------------------------
171 function check_words() {
172 global $DEBUG;
173
174 global $ispellmode;
175 global $spell_file;
176 global $grep;
177
178 global $final_word;
179 global $normalized_array;
180
181 reset ($normalized_array);
182 while (list($word,$value)=each($normalized_array)) {
183 for ($j=0; $j<count($value); $j++) {
184 $norm_word=$normalized_array[$word][$j]->word;
185 $norm_lang=$normalized_array[$word][$j]->lang;
186 $norm_flag=$normalized_array[$word][$j]->flag;
187
188 if ($ispellmode=='db') {
189 $query="SELECT flag
190 FROM spell
191 WHERE word='$norm_word'
192 AND lang='$norm_lang'";
193
194 if($DEBUG) echo "check_words(): ",$query,"";
195
196 if (!($res=db_query($query))) print_error_local('Query error: '.$query."\n".db_error());
197
198 while ($row=db_fetchrow($res)) {
199 $flag=trim($row[0]);
200 if ($flag == '') continue;
201 if (ereg($norm_flag,$flag)) {
202 $final_word[$word][]=$norm_word;
203 if ($DEBUG) echo "check_words(): ","Norm form found: \"$norm_word\" for \"$word\"\n";
204 }
205 }
206 db_freeresult($res);
207 } elseif ($ispellmode=='text') {
208 reset ($spell_file);
209 while (list($lang,$lang_file)=each($spell_file)) {
210 if ($lang != $norm_lang) continue;
211 for ($i=0;$i<count($lang_file);$i++) {
212 $file=$lang_file[$i];
213 $pipe="$grep -i \"^$norm_word/\" $file";
214 if ($DEBUG) echo "check_words(): ",$pipe."";
215
216 $fd=popen($pipe,"r");
217
218 if (! $fd) {
219 print "Cannot pipe to $grep\n";
220 exit_local(1);
221 }
222
223 while (! feof($fd)) {
224 $str=trim(strtolower(fgets($fd,1024)));
225 if ($str=='') continue;
226 $str_array=explode("/",$str);
227
228 if (($str_array[0]==$norm_word)&&
229 (ereg($norm_flag,$str_array[1]))) {
230 $final_word[$word][]=$norm_word;
231 if ($DEBUG) echo "check_words(): ","Norm form found: \"$norm_word\" for \"$word\"\n";
232 }
233 } // while
234
235 pclose($fd);
236 } // for
237 }
238 } // if
239 } // for
240 } // while
241 }
242
243 $DEBUG=1;
244
245 $ispellmode="text";
246 $affix_file=array( "/tmp/dpavlin/docs/nn/search/croatian.aff" );
247
248 load_affix();
249 print "size: ".sizeof($affix_array)."\n";;
250 normalize_word("zakoni");
251 normalize_word("laws");
252
253 ?>
254

  ViewVC Help
Powered by ViewVC 1.1.26