1 |
ulpfr |
10 |
/**************************************************************************** |
2 |
|
|
***************************************************************************** |
3 |
|
|
FILE : soundex.c |
4 |
|
|
FUNCTION : This module contains the two algorithms SOUNDEX and PHONIX as |
5 |
|
|
they were defined by Gadd. |
6 |
|
|
NOTES : This is an ANSI-C version of the original C++ file. |
7 |
|
|
LITERATURE: T.N. Gadd: 'Fishing fore Werds': Phonetic Retrieval of written |
8 |
|
|
text in Information Retrieval Systems, Program 22/3, 1988, |
9 |
|
|
p.222-237. |
10 |
|
|
T.N. Gadd: PHONIX --- The Algorithm, Program 24/4, 1990, |
11 |
|
|
p.363-366. |
12 |
|
|
***************************************************************************** |
13 |
|
|
****************************************************************************/ |
14 |
|
|
|
15 |
|
|
/* #define TEST */ /* activates procedures main() and PrintCode() */ |
16 |
|
|
/* #define PHONIX_DEBUG */ /* activates some debug information */ |
17 |
|
|
|
18 |
|
|
/**************************************************************************** |
19 |
|
|
NAME : StrDel |
20 |
|
|
INPUT : char *DelPos --- pointer to first char to be deleted |
21 |
|
|
int DelSize --- number of chars to be deleted |
22 |
|
|
OUTPUT : char *DelPos |
23 |
|
|
FUNCTION: This procedure deletes DelSize chars at position DelPos and moves |
24 |
|
|
the remaining chars left to DelPos. |
25 |
|
|
EXAMPLE : If Del is pointing at the L of the string "DELETE" the call |
26 |
|
|
StrDel(Del, 2) will return Del pointing at "TE". |
27 |
|
|
****************************************************************************/ |
28 |
|
|
|
29 |
|
|
#ifdef __cplusplus |
30 |
|
|
extern "C" { |
31 |
|
|
#endif |
32 |
|
|
#include "EXTERN.h" |
33 |
|
|
#include "perl.h" |
34 |
|
|
#include "XSUB.h" |
35 |
|
|
#ifdef __cplusplus |
36 |
|
|
} |
37 |
|
|
#endif |
38 |
|
|
|
39 |
|
|
void StrDel (DelPos, DelSize) |
40 |
|
|
char *DelPos; |
41 |
|
|
int DelSize; |
42 |
|
|
{ |
43 |
|
|
/* move chars left */ |
44 |
|
|
char *Help = DelPos + DelSize; |
45 |
|
|
while (*Help) |
46 |
|
|
*DelPos++ = *Help++; |
47 |
|
|
|
48 |
|
|
/* move trailing \0 */ |
49 |
|
|
*DelPos = *Help; |
50 |
|
|
} |
51 |
|
|
|
52 |
|
|
|
53 |
|
|
/**************************************************************************** |
54 |
|
|
NAME : StrIns |
55 |
|
|
INPUT : char *InsPos --- pointer to insert position |
56 |
|
|
OUTPUT : char *InStr --- new string to be inserted |
57 |
|
|
FUNCTION: StrIns moves the chars at position InsPos right and copies the |
58 |
|
|
string InsStr into this free space. |
59 |
|
|
EXAMPLE : If Ins is pointing at the S of the string "INSERT" the call |
60 |
|
|
StrIns(Ins, "NEW") will return Ins pointing at "NEWSERT". |
61 |
|
|
****************************************************************************/ |
62 |
|
|
void StrIns (InsPos, InsStr) |
63 |
|
|
char *InsPos; |
64 |
|
|
char *InsStr; |
65 |
|
|
{ |
66 |
|
|
int i; |
67 |
|
|
int MoveSize = strlen(InsStr); |
68 |
|
|
|
69 |
|
|
/* move chars right */ |
70 |
|
|
for (i = strlen(InsPos)+1; i >= 0; i--) |
71 |
|
|
InsPos[i+MoveSize] = InsPos[i]; |
72 |
|
|
|
73 |
|
|
/* copy InsStr to InsPos */ |
74 |
|
|
while (*InsStr) |
75 |
|
|
*InsPos++ = *InsStr++; |
76 |
|
|
} |
77 |
|
|
|
78 |
|
|
extern bool IsVowel(); |
79 |
|
|
#if 0 |
80 |
|
|
/**************************************************************************** |
81 |
|
|
NAME : IsVowel |
82 |
|
|
INPUT : char c --- char to be examined |
83 |
|
|
OUTPUT : int --- 1 or 0 |
84 |
|
|
FUNCTION: IsVowel checks if c is an uppercase vowel or an uppercase Y. If c |
85 |
|
|
is one of those chars IsVowel will return a 1, else it will return |
86 |
|
|
a 0. |
87 |
|
|
****************************************************************************/ |
88 |
|
|
int IsVowel (c) |
89 |
|
|
unsigned char c; |
90 |
|
|
{ |
91 |
|
|
return (c == 'A') || (c == 'E') || (c == 'I') || |
92 |
|
|
(c == 'O') || (c == 'U') || (c == 'Y') || |
93 |
|
|
(c == 0304) || (c == 0344) || (c == 0334) || |
94 |
|
|
(c == 0366) || (c == 0326) || (c == 0374); |
95 |
|
|
} |
96 |
|
|
|
97 |
|
|
|
98 |
|
|
/**************************************************************************** |
99 |
|
|
NAME : SoundexCode |
100 |
|
|
INPUT : char *Name --- string to be calculated |
101 |
|
|
OUTPUT : char *Key --- soundex code for Name |
102 |
|
|
FUNCTION: This procedure calculates a four-letter soundex code for the string |
103 |
|
|
Name and returns this code in the string Key. |
104 |
|
|
****************************************************************************/ |
105 |
|
|
#define SoundexLen 4 /* length of a soundex code */ |
106 |
|
|
#define SoundexKey "Z000" /* default key for soundex code */ |
107 |
|
|
|
108 |
|
|
char SCode(c) |
109 |
|
|
unsigned char c; |
110 |
|
|
{ |
111 |
|
|
/* set letter values */ |
112 |
|
|
static int Code[] = {0, 1, 2, 3, 0, 1, 2, 0, 0, 2, 2, 4, 5, 5, 0, |
113 |
|
|
1, 2, 6, 2, 3, 0, 1, 0, 2, 0, 2}; |
114 |
|
|
|
115 |
|
|
fprintf(stderr, "SCode(%c)\n", c); |
116 |
|
|
if (c == 0337) return(2); /* german sz */ |
117 |
|
|
return(Code[toupper(c)-'A']); |
118 |
|
|
} |
119 |
|
|
|
120 |
|
|
void SoundexCode (Name, Key) |
121 |
|
|
unsigned char *Name; |
122 |
|
|
unsigned char *Key; |
123 |
|
|
{ |
124 |
|
|
unsigned char LastLetter; |
125 |
|
|
int Index; |
126 |
|
|
|
127 |
|
|
fprintf(stderr, "SoundexCode(%s)\n", Name); |
128 |
|
|
/* set default key */ |
129 |
|
|
strcpy(Key, SoundexKey); |
130 |
|
|
|
131 |
|
|
/* keep first letter */ |
132 |
|
|
Key[0] = *Name; |
133 |
|
|
LastLetter = *Name; |
134 |
|
|
Name++; |
135 |
|
|
|
136 |
|
|
/* scan rest of string */ |
137 |
|
|
for (Index = 1; (Index < SoundexLen) && *Name; Name++) |
138 |
|
|
{ |
139 |
|
|
/* use only letters */ |
140 |
|
|
if (isalpha(*Name)) |
141 |
|
|
{ |
142 |
|
|
/* ignore duplicate successive chars */ |
143 |
|
|
if (LastLetter != *Name) |
144 |
|
|
{ |
145 |
|
|
/* new LastLetter */ |
146 |
|
|
LastLetter = *Name; |
147 |
|
|
|
148 |
|
|
/* ignore letters with code 0 */ |
149 |
|
|
if (!IsVowel(*Name) && (SCode(*Name) != 0)) |
150 |
|
|
{ |
151 |
|
|
Key[Index] = '0' + SCode(*Name); |
152 |
|
|
Index++; |
153 |
|
|
} |
154 |
|
|
} |
155 |
|
|
} |
156 |
|
|
} |
157 |
|
|
} |
158 |
|
|
#endif /* 0 */ |
159 |
|
|
|
160 |
|
|
/**************************************************************************** |
161 |
|
|
NAME : PhonixCode |
162 |
|
|
INPUT : char *Name --- string to be calculated |
163 |
|
|
OUTPUT : char *Key --- phonix code for Name |
164 |
|
|
FUNCTION: This procedure calculates a eight-letter phonix code for the string |
165 |
|
|
Name and returns this code in the string Key. |
166 |
|
|
****************************************************************************/ |
167 |
|
|
#define PhonixLen 8 /* length of a phonix code */ |
168 |
|
|
#define PhonixKey "Z0000000" /* default key for phonix code */ |
169 |
|
|
|
170 |
|
|
extern bool IsAlpha(); |
171 |
|
|
extern char PCode(); |
172 |
|
|
|
173 |
|
|
void PhonixCode (Name, Key) |
174 |
|
|
char *Name; |
175 |
|
|
char *Key; |
176 |
|
|
{ |
177 |
|
|
char LastLetter; |
178 |
|
|
int Index; |
179 |
|
|
|
180 |
|
|
/* set default key */ |
181 |
|
|
strcpy(Key, PhonixKey); |
182 |
|
|
|
183 |
|
|
/* keep first letter or replace it with '$' */ |
184 |
|
|
Key[0] = IsVowel(*Name) ? '$' : *Name; |
185 |
|
|
LastLetter = *Name; |
186 |
|
|
Name++; |
187 |
|
|
|
188 |
|
|
/* NOTE: Gadd replaces vowels being the first letter of the */ |
189 |
|
|
/* word with a 'v'. Due to the implementation of WAIS all */ |
190 |
|
|
/* letters will be lowercased. Therefore '$' is used instead */ |
191 |
|
|
/* of 'v'. */ |
192 |
|
|
|
193 |
|
|
/* scan rest of string */ |
194 |
|
|
for (Index = 1; (Index < PhonixLen) && *Name; Name++) |
195 |
|
|
{ |
196 |
|
|
/* use only letters */ |
197 |
|
|
if (IsAlpha(*Name)) |
198 |
|
|
{ |
199 |
|
|
/* ignore duplicate successive chars */ |
200 |
|
|
if (LastLetter != *Name) |
201 |
|
|
{ |
202 |
|
|
LastLetter = *Name; |
203 |
|
|
|
204 |
|
|
/* ignore letters with code 0 except as separators */ |
205 |
|
|
if (PCode(*Name) != '0') |
206 |
|
|
{ |
207 |
|
|
Key[Index] = PCode(*Name); |
208 |
|
|
Index++; |
209 |
|
|
} |
210 |
|
|
} |
211 |
|
|
} |
212 |
|
|
} |
213 |
|
|
} |
214 |
|
|
|
215 |
|
|
|
216 |
|
|
/**************************************************************************** |
217 |
|
|
NAME : PhonixReplace1 |
218 |
|
|
INPUT : int where --- replace OldStr only if it occurs at this position |
219 |
|
|
char *Name --- string to work |
220 |
|
|
char *OldStr --- old letter group to delete |
221 |
|
|
char *NewStr --- new letter group to insert |
222 |
|
|
int CondPre --- condition referring to letter before OldStr |
223 |
|
|
int CondPost --- condition referring to letter after OldStr |
224 |
|
|
OUTPUT : char *Name --- Name with replaced letter group |
225 |
|
|
FUNCTION: This procedure replaces the letter group OldStr with the letter |
226 |
|
|
group NewStr in the string Name, regarding the position of OldStr |
227 |
|
|
where (START, MIDDLE, END, ALL) and the conditions CondPre and |
228 |
|
|
CondPost (NON, VOC, CON). |
229 |
|
|
EXAMPLE : PhonixReplace1(START, "WAWA", "W", "V", NON, NON) replaces only the |
230 |
|
|
first W with a V because of the condition START. |
231 |
|
|
EXAMPLE : PhonixReplace1(START, "WAWA", "W", "V", NON, CON) replaces neither |
232 |
|
|
the first W with a V (because of the condition CON, i.e. a consonant |
233 |
|
|
must follow the W) nor the second W (because of the condition START). |
234 |
|
|
****************************************************************************/ |
235 |
|
|
#define NON 1 /* no condition */ |
236 |
|
|
#define VOC 2 /* vowel needed */ |
237 |
|
|
#define CON 3 /* consonant needed */ |
238 |
|
|
|
239 |
|
|
#define START 1 /* condition refers to beginning of Name */ |
240 |
|
|
#define MIDDLE 2 /* condition refers to middle of Name */ |
241 |
|
|
#define END 3 /* condition refers to EndPos of Name */ |
242 |
|
|
#define ALL 4 /* condition refers to whole Name */ |
243 |
|
|
|
244 |
|
|
void PhonixReplace1 (Where, Name, OldStr, NewStr, CondPre, CondPost) |
245 |
|
|
int Where; |
246 |
|
|
char *Name; |
247 |
|
|
char *OldStr; |
248 |
|
|
char *NewStr; |
249 |
|
|
int CondPre; |
250 |
|
|
int CondPost; |
251 |
|
|
{ |
252 |
|
|
char *OldStrPos; |
253 |
|
|
char *EndPos; |
254 |
|
|
char *NamePtr = Name; |
255 |
|
|
|
256 |
|
|
/* vowels before or after OldStr */ |
257 |
|
|
char LetterPre; /* letter before OldStr */ |
258 |
|
|
char LetterPost; /* letter after OldStr */ |
259 |
|
|
int VowelPre; /* LetterPre is vowel? */ |
260 |
|
|
int VowelPost; /* LetterPost is vowel? */ |
261 |
|
|
int OkayPre; /* pre-condition okay? */ |
262 |
|
|
int OkayPost; /* post-condition okay? */ |
263 |
|
|
|
264 |
|
|
do |
265 |
|
|
{ |
266 |
|
|
/* find OldStr in NamePtr */ |
267 |
|
|
OldStrPos = strstr(NamePtr, OldStr); |
268 |
|
|
|
269 |
|
|
/* find EndPos of Name */ |
270 |
|
|
EndPos = &Name[strlen(Name)-strlen(OldStr)]; |
271 |
|
|
|
272 |
|
|
/* check conditions if OldStrPos != NULL */ |
273 |
|
|
if (OldStrPos) |
274 |
|
|
{ |
275 |
|
|
/* vowel before OldStrPos */ |
276 |
|
|
LetterPre = *(OldStrPos-1); |
277 |
|
|
/* vowel after OldStrPos+strlen(OldStr) */ |
278 |
|
|
LetterPost = *(OldStrPos+strlen(OldStr)); |
279 |
|
|
|
280 |
|
|
/* check conditions */ |
281 |
|
|
switch (CondPre) |
282 |
|
|
{ |
283 |
|
|
case NON: OkayPre = 1; |
284 |
|
|
break; |
285 |
|
|
case VOC: OkayPre = LetterPre ? IsVowel(LetterPre) : 0; |
286 |
|
|
break; |
287 |
|
|
case CON: OkayPre = LetterPre ? !IsVowel(LetterPre) : 0; |
288 |
|
|
break; |
289 |
|
|
default : OkayPre = 0; |
290 |
|
|
break; |
291 |
|
|
} |
292 |
|
|
switch (CondPost) |
293 |
|
|
{ |
294 |
|
|
case NON: OkayPost = 1; |
295 |
|
|
break; |
296 |
|
|
case VOC: OkayPost = LetterPost ? IsVowel(LetterPost) : 0; |
297 |
|
|
break; |
298 |
|
|
case CON: OkayPost = LetterPost ? !IsVowel(LetterPost) : 0; |
299 |
|
|
break; |
300 |
|
|
default : OkayPost = 0; |
301 |
|
|
break; |
302 |
|
|
} |
303 |
|
|
} |
304 |
|
|
|
305 |
|
|
/* replace OldStr with NewStr */ |
306 |
|
|
if (OldStrPos && OkayPre && OkayPost && |
307 |
|
|
((Where == START) && (OldStrPos == Name) || |
308 |
|
|
(Where == MIDDLE) && (OldStrPos != Name) && (OldStrPos != EndPos) || |
309 |
|
|
(Where == END) && (OldStrPos == EndPos) || |
310 |
|
|
(Where == ALL))) |
311 |
|
|
{ |
312 |
|
|
/* replace old letter group with new letter group */ |
313 |
|
|
StrDel(OldStrPos, strlen(OldStr)); |
314 |
|
|
StrIns(OldStrPos, NewStr); |
315 |
|
|
|
316 |
|
|
/* advance NamePtr to the position of OldStr */ |
317 |
|
|
NamePtr = OldStrPos; |
318 |
|
|
|
319 |
|
|
#ifdef PHONIX_DEBUG |
320 |
|
|
printf("Replace = %s-->%s\n", OldStr, NewStr); |
321 |
|
|
#endif /* PHONIX_DEBUG */ |
322 |
|
|
} |
323 |
|
|
else |
324 |
|
|
/* advance NamePtr one char */ |
325 |
|
|
NamePtr++; |
326 |
|
|
} |
327 |
|
|
while (OldStrPos); |
328 |
|
|
} |
329 |
|
|
|
330 |
|
|
|
331 |
|
|
/**************************************************************************** |
332 |
|
|
NAME : PhonixReplace2 |
333 |
|
|
INPUT : int where --- replace OldStr only if it occurs at this position |
334 |
|
|
char *Name --- string to work |
335 |
|
|
char *OldStr --- old letter group to delete |
336 |
|
|
char *NewStr --- new letter group to insert |
337 |
|
|
OUTPUT : char *Name --- Name with replaced letter group |
338 |
|
|
FUNCTION: This procedure replaces the letter group OldStr with the letter |
339 |
|
|
group NewStr in the string Name, regarding the position of OldStr |
340 |
|
|
where (START, MIDDLE, END, ALL). |
341 |
|
|
EXAMPLE : PhonixReplace2(START, "WAWA", "W", "V") replaces only the first W |
342 |
|
|
with a V because of the condition START. |
343 |
|
|
****************************************************************************/ |
344 |
|
|
void PhonixReplace2 (Where, Name, OldStr, NewStr) |
345 |
|
|
int Where; |
346 |
|
|
char *Name; |
347 |
|
|
char *OldStr; |
348 |
|
|
char *NewStr; |
349 |
|
|
{ |
350 |
|
|
char *OldStrPos; |
351 |
|
|
char *EndPos; |
352 |
|
|
char *NamePtr = Name; |
353 |
|
|
|
354 |
|
|
do |
355 |
|
|
{ |
356 |
|
|
/* find OldStr in NamePtr */ |
357 |
|
|
OldStrPos = strstr(NamePtr, OldStr); |
358 |
|
|
|
359 |
|
|
/* find EndPos of Name */ |
360 |
|
|
EndPos = &Name[strlen(Name)-strlen(OldStr)]; |
361 |
|
|
|
362 |
|
|
/* replace OldStr with NewStr */ |
363 |
|
|
if (OldStrPos && |
364 |
|
|
((Where == START) && (OldStrPos == Name) || |
365 |
|
|
(Where == MIDDLE) && (OldStrPos != Name) && (OldStrPos != EndPos) || |
366 |
|
|
(Where == END) && (OldStrPos == EndPos) || |
367 |
|
|
(Where == ALL))) |
368 |
|
|
{ |
369 |
|
|
/* replace old letter group with new letter group */ |
370 |
|
|
StrDel(OldStrPos, strlen(OldStr)); |
371 |
|
|
StrIns(OldStrPos, NewStr); |
372 |
|
|
|
373 |
|
|
/* advance NamePtr to the position of OldStr */ |
374 |
|
|
NamePtr = OldStrPos; |
375 |
|
|
|
376 |
|
|
#ifdef PHONIX_DEBUG |
377 |
|
|
printf("Replace = %s-->%s\n", OldStr, NewStr); |
378 |
|
|
#endif /* PHONIX_DEBUG */ |
379 |
|
|
} |
380 |
|
|
else |
381 |
|
|
/* advance NamePtr one char */ |
382 |
|
|
NamePtr++; |
383 |
|
|
} |
384 |
|
|
while (OldStrPos); |
385 |
|
|
} |
386 |
|
|
|
387 |
|
|
|
388 |
|
|
/**************************************************************************** |
389 |
|
|
NAME : Phonix |
390 |
|
|
INPUT : char *Name --- string to calculate phonix code for |
391 |
|
|
OUTPUT : char *Key --- phonix code of Name |
392 |
|
|
FUNCTION: Phonix calculates the phonix code for the string Name. |
393 |
|
|
****************************************************************************/ |
394 |
|
|
void Phonix (Name, Key) |
395 |
|
|
char *Name; |
396 |
|
|
char *Key; |
397 |
|
|
{ |
398 |
|
|
/* use new variable NewName to remain Name unchanged */ |
399 |
|
|
char NewName[50]; |
400 |
|
|
int i; |
401 |
|
|
|
402 |
|
|
strcpy(NewName, Name); |
403 |
|
|
|
404 |
|
|
/* uppercase NewName */ |
405 |
|
|
for (i=0; i < strlen(NewName); i++) |
406 |
|
|
if (islower(NewName[i])) |
407 |
|
|
NewName[i] = toupper(NewName[i]); |
408 |
|
|
|
409 |
|
|
#ifdef PHONIX_DEBUG |
410 |
|
|
printf("Name = %s\n", NewName); |
411 |
|
|
#endif /* PHONIX_DEBUG */ |
412 |
|
|
|
413 |
|
|
/* replace letter groups according to Gadd's definition */ |
414 |
|
|
PhonixReplace2(ALL , NewName, "DG" , "G" ); |
415 |
|
|
PhonixReplace2(ALL , NewName, "CO" , "KO" ); |
416 |
|
|
PhonixReplace2(ALL , NewName, "CA" , "KA" ); |
417 |
|
|
PhonixReplace2(ALL , NewName, "CU" , "KU" ); |
418 |
|
|
PhonixReplace2(ALL , NewName, "CY" , "SI" ); |
419 |
|
|
PhonixReplace2(ALL , NewName, "CI" , "SI" ); |
420 |
|
|
PhonixReplace2(ALL , NewName, "CE" , "SE" ); |
421 |
|
|
PhonixReplace1(START , NewName, "CL" , "KL" , NON, VOC); |
422 |
|
|
PhonixReplace2(ALL , NewName, "CK" , "K" ); |
423 |
|
|
PhonixReplace2(END , NewName, "GC" , "K" ); |
424 |
|
|
PhonixReplace2(END , NewName, "JC" , "K" ); |
425 |
|
|
PhonixReplace1(START , NewName, "CHR" , "KR" , NON, VOC); |
426 |
|
|
PhonixReplace1(START , NewName, "CR" , "KR" , NON, VOC); |
427 |
|
|
PhonixReplace2(START , NewName, "WR" , "R" ); |
428 |
|
|
PhonixReplace2(ALL , NewName, "NC" , "NK" ); |
429 |
|
|
PhonixReplace2(ALL , NewName, "CT" , "KT" ); |
430 |
|
|
PhonixReplace2(ALL , NewName, "PH" , "F" ); |
431 |
|
|
PhonixReplace2(ALL , NewName, "AA" , "AR" ); |
432 |
|
|
PhonixReplace2(ALL , NewName, "SCH" , "SH" ); |
433 |
|
|
PhonixReplace2(ALL , NewName, "BTL" , "TL" ); |
434 |
|
|
PhonixReplace2(ALL , NewName, "GHT" , "T" ); |
435 |
|
|
PhonixReplace2(ALL , NewName, "AUGH" , "ARF" ); |
436 |
|
|
PhonixReplace1(MIDDLE, NewName, "LJ" , "LD" , VOC, VOC); |
437 |
|
|
PhonixReplace2(ALL , NewName, "LOUGH", "LOW" ); |
438 |
|
|
PhonixReplace2(START , NewName, "Q" , "KW" ); |
439 |
|
|
PhonixReplace2(START , NewName, "KN" , "N" ); |
440 |
|
|
PhonixReplace2(END , NewName, "GN" , "N" ); |
441 |
|
|
PhonixReplace2(ALL , NewName, "GHN" , "N" ); |
442 |
|
|
PhonixReplace2(END , NewName, "GNE" , "N" ); |
443 |
|
|
PhonixReplace2(ALL , NewName, "GHNE" , "NE" ); |
444 |
|
|
PhonixReplace2(END , NewName, "GNES" , "NS" ); |
445 |
|
|
PhonixReplace2(START , NewName, "GN" , "N" ); |
446 |
|
|
PhonixReplace1(MIDDLE, NewName, "GN" , "N" , NON, CON); |
447 |
|
|
PhonixReplace1(END , NewName, "GN" , "N" , NON, NON); /* NON,CON */ |
448 |
|
|
PhonixReplace2(START , NewName, "PS" , "S" ); |
449 |
|
|
PhonixReplace2(START , NewName, "PT" , "T" ); |
450 |
|
|
PhonixReplace2(START , NewName, "CZ" , "C" ); |
451 |
|
|
PhonixReplace1(MIDDLE, NewName, "WZ" , "Z" , VOC, NON); |
452 |
|
|
PhonixReplace2(MIDDLE, NewName, "CZ" , "CH" ); |
453 |
|
|
PhonixReplace2(ALL , NewName, "LZ" , "LSH" ); |
454 |
|
|
PhonixReplace2(ALL , NewName, "RZ" , "RSH" ); |
455 |
|
|
PhonixReplace1(MIDDLE, NewName, "Z" , "S" , NON, VOC); |
456 |
|
|
PhonixReplace2(ALL , NewName, "ZZ" , "TS" ); |
457 |
|
|
PhonixReplace1(MIDDLE, NewName, "Z" , "TS" , CON, NON); |
458 |
|
|
PhonixReplace2(ALL , NewName, "HROUG", "REW" ); |
459 |
|
|
PhonixReplace2(ALL , NewName, "OUGH" , "OF" ); |
460 |
|
|
PhonixReplace1(MIDDLE, NewName, "Q" , "KW" , VOC, VOC); |
461 |
|
|
PhonixReplace1(MIDDLE, NewName, "J" , "Y" , VOC, VOC); |
462 |
|
|
PhonixReplace1(START , NewName, "YJ" , "Y" , NON, VOC); |
463 |
|
|
PhonixReplace2(START , NewName, "GH" , "G" ); |
464 |
|
|
PhonixReplace1(END , NewName, "E" , "GH" , VOC, NON); |
465 |
|
|
PhonixReplace2(START , NewName, "CY" , "S" ); |
466 |
|
|
PhonixReplace2(ALL , NewName, "NX" , "NKS" ); |
467 |
|
|
PhonixReplace2(START , NewName, "PF" , "F" ); |
468 |
|
|
PhonixReplace2(END , NewName, "DT" , "T" ); |
469 |
|
|
PhonixReplace2(END , NewName, "TL" , "TIL" ); |
470 |
|
|
PhonixReplace2(END , NewName, "DL" , "DIL" ); |
471 |
|
|
PhonixReplace2(ALL , NewName, "YTH" , "ITH" ); |
472 |
|
|
PhonixReplace1(START , NewName, "TJ" , "CH" , NON, VOC); |
473 |
|
|
PhonixReplace1(START , NewName, "TSJ" , "CH" , NON, VOC); |
474 |
|
|
PhonixReplace1(START , NewName, "TS" , "T" , NON, VOC); |
475 |
|
|
PhonixReplace1(ALL , NewName, "TCH" , "CH" ); |
476 |
|
|
PhonixReplace1(MIDDLE, NewName, "WSK" , "VSKIE", VOC, NON); |
477 |
|
|
PhonixReplace1(END , NewName, "WSK" , "VSKIE", VOC, NON); |
478 |
|
|
PhonixReplace1(START , NewName, "MN" , "N" , NON, VOC); |
479 |
|
|
PhonixReplace1(START , NewName, "PN" , "N" , NON, VOC); |
480 |
|
|
PhonixReplace1(MIDDLE, NewName, "STL" , "SL" , VOC, NON); |
481 |
|
|
PhonixReplace1(END , NewName, "STL" , "SL" , VOC, NON); |
482 |
|
|
PhonixReplace2(END , NewName, "TNT" , "ENT" ); |
483 |
|
|
PhonixReplace2(END , NewName, "EAUX" , "OH" ); |
484 |
|
|
PhonixReplace2(ALL , NewName, "EXCI" , "ECS" ); |
485 |
|
|
PhonixReplace2(ALL , NewName, "X" , "ECS" ); |
486 |
|
|
PhonixReplace2(END , NewName, "NED" , "ND" ); |
487 |
|
|
PhonixReplace2(ALL , NewName, "JR" , "DR" ); |
488 |
|
|
PhonixReplace2(END , NewName, "EE" , "EA" ); |
489 |
|
|
PhonixReplace2(ALL , NewName, "ZS" , "S" ); |
490 |
|
|
PhonixReplace1(MIDDLE, NewName, "R" , "AH" , VOC, CON); |
491 |
|
|
PhonixReplace1(END , NewName, "R" , "AH" , VOC, NON); /* VOC,CON */ |
492 |
|
|
PhonixReplace1(MIDDLE, NewName, "HR" , "AH" , VOC, CON); |
493 |
|
|
PhonixReplace1(END , NewName, "HR" , "AH" , VOC, NON); /* VOC,CON */ |
494 |
|
|
PhonixReplace1(END , NewName, "HR" , "AH" , VOC, NON); |
495 |
|
|
PhonixReplace2(END , NewName, "RE" , "AR" ); |
496 |
|
|
PhonixReplace1(END , NewName, "R" , "AH" , VOC, NON); |
497 |
|
|
PhonixReplace2(ALL , NewName, "LLE" , "LE" ); |
498 |
|
|
PhonixReplace1(END , NewName, "LE" , "ILE" , CON, NON); |
499 |
|
|
PhonixReplace1(END , NewName, "LES" , "ILES" , CON, NON); |
500 |
|
|
PhonixReplace2(END , NewName, "E" , "" ); |
501 |
|
|
PhonixReplace2(END , NewName, "ES" , "S" ); |
502 |
|
|
PhonixReplace1(END , NewName, "SS" , "AS" , VOC, NON); |
503 |
|
|
PhonixReplace1(END , NewName, "MB" , "M" , VOC, NON); |
504 |
|
|
PhonixReplace2(ALL , NewName, "MPTS" , "MPS" ); |
505 |
|
|
PhonixReplace2(ALL , NewName, "MPS" , "MS" ); |
506 |
|
|
PhonixReplace2(ALL , NewName, "MPT" , "MT" ); |
507 |
|
|
|
508 |
|
|
/* calculate Key for NewName */ |
509 |
|
|
PhonixCode(NewName, Key); |
510 |
|
|
|
511 |
|
|
#ifdef PHONIX_DEBUG |
512 |
|
|
printf("NewName = %s\n", NewName); |
513 |
|
|
printf("Code = %s\n\n", Key); |
514 |
|
|
#endif /* PHONIX_DEBUG */ |
515 |
|
|
} |
516 |
|
|
|
517 |
|
|
|
518 |
|
|
/**************************************************************************** |
519 |
|
|
NAME : Soundex |
520 |
|
|
INPUT : char *Name --- string to calculate soundex code for |
521 |
|
|
OUTPUT : char *Key --- soundex code of Name |
522 |
|
|
FUNCTION: Soundex calculates the soundex code for the string Name. |
523 |
|
|
****************************************************************************/ |
524 |
|
|
|
525 |
|
|
void Soundex (Name, Key) |
526 |
|
|
char *Name; |
527 |
|
|
char *Key; |
528 |
|
|
{ |
529 |
|
|
/* use new variable NewName to remain Name unchanged */ |
530 |
|
|
char NewName[50]; |
531 |
|
|
int i; |
532 |
|
|
|
533 |
|
|
strcpy(NewName, Name); |
534 |
|
|
|
535 |
|
|
/* uppercase NewName */ |
536 |
|
|
for (i=0; i < strlen(NewName); i++) |
537 |
|
|
if (islower(NewName[i])) |
538 |
|
|
NewName[i] = toupper(NewName[i]); |
539 |
|
|
|
540 |
|
|
/* calculate Key for Name */ |
541 |
|
|
SoundexCode(NewName, Key); |
542 |
|
|
/* fprintf(stderr, "Soundex: %s -> %s\n", Name, Key); */ |
543 |
|
|
} |
544 |
|
|
|
545 |
|
|
|
546 |
|
|
/**************************************************************************** |
547 |
|
|
Now the two procedures PrintCode() and main() follow which will only be |
548 |
|
|
included if TEST is defined. |
549 |
|
|
****************************************************************************/ |
550 |
|
|
|
551 |
|
|
#ifdef TEST |
552 |
|
|
|
553 |
|
|
void PrintCode (Name) |
554 |
|
|
unsigned char *Name; |
555 |
|
|
{ |
556 |
|
|
unsigned char SoundexName[SoundexLen+1]; |
557 |
|
|
unsigned char PhonixName[PhonixLen+1]; |
558 |
|
|
|
559 |
|
|
Soundex(Name, SoundexName); |
560 |
|
|
Phonix(Name, PhonixName); |
561 |
|
|
printf("%20s --> %s %s\n", Name, SoundexName, PhonixName); |
562 |
|
|
} |
563 |
|
|
|
564 |
|
|
|
565 |
|
|
void main () |
566 |
|
|
{ |
567 |
|
|
unsigned char s[256]; |
568 |
|
|
PrintCode("CLASSEN"); |
569 |
|
|
PrintCode("WRITE"); |
570 |
|
|
PrintCode("WRIGHT"); |
571 |
|
|
PrintCode("RITE"); |
572 |
|
|
PrintCode("WHITE"); |
573 |
|
|
PrintCode("WAIT"); |
574 |
|
|
PrintCode("WEIGHT"); |
575 |
|
|
PrintCode("KNIGHT"); |
576 |
|
|
PrintCode("NIGHT"); |
577 |
|
|
PrintCode("NITE"); |
578 |
|
|
PrintCode("GNOME"); |
579 |
|
|
PrintCode("NOAM"); |
580 |
|
|
PrintCode("SMIDT"); |
581 |
|
|
PrintCode("SMITH"); |
582 |
|
|
PrintCode("SCHMIT"); |
583 |
|
|
PrintCode("CRAFT"); |
584 |
|
|
PrintCode("KRAFT"); |
585 |
|
|
PrintCode("REES"); |
586 |
|
|
PrintCode("REECE"); |
587 |
|
|
PrintCode("YAEGER"); |
588 |
|
|
PrintCode("YOGA"); |
589 |
|
|
PrintCode("EAGER"); |
590 |
|
|
PrintCode("AUGER"); |
591 |
|
|
PrintCode("Krueger"); |
592 |
|
|
PrintCode("Kruger"); |
593 |
|
|
PrintCode("Krüger"); |
594 |
|
|
while (1) { |
595 |
|
|
PrintCode(gets(s)); |
596 |
|
|
} |
597 |
|
|
} |
598 |
|
|
|
599 |
|
|
#endif /* TEST*/ |