/[wait]/trunk/levenstein.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/levenstein.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 88 - (show annotations)
Mon May 24 13:44:01 2004 UTC (19 years, 11 months ago) by dpavlin
File MIME type: text/plain
File size: 3810 byte(s)
move cvs-head to trunk

1 /**** Einfache Levenshtein-Distanz (p0=q0=r0=1) ****/
2 /**** mit Berücksichtigung von Wildcards ****/
3 /**** (geschwindigkeitsoptimiertes C-Programm) ****/
4 /**** Autor : Jörg Michael, Hannover ****/
5 /**** Datum : 22. Dezember 1993 ****/
6
7 /**** modus = ' ': normale Levenshtein-Distanz ****/
8 /**** modus = '+': keine Unterscheidung ****/
9 /**** Klein-/Großschreibung ****/
10 /**** modus = '*': wie '+', aber zusätzlich ****/
11 /**** "symmetrisches" Verhalten ****/
12 /**** gemäß der im Text beschrie- ****/
13 /**** benen Vorformatierung ****/
14
15 #ifdef __cplusplus
16 extern "C" {
17 #endif
18 #include "EXTERN.h"
19 #include "perl.h"
20 #include "XSUB.h"
21 #ifdef __cplusplus
22 }
23
24 #endif
25 #define maxlen 51
26 #ifndef strchr
27 char *strchr();
28 # endif
29
30 int
31 formatierung(char ziel[], char wort[], int n, char modus)
32 /**** Wandelt "wort" in GROSSschreibung ****/
33 /**** um und expandiert Umlaute ****/
34 /**** (n = Zeichenzahl von "ziel") ****/
35 /**** Zurückgegeben wird: strlen (ziel) ****/
36 {
37 int i, k;
38 char c, *s;
39
40 i = 0;
41 k = 0;
42 while ((c = wort[i++]) != 0 && k < n - 1) {
43 if (isupper(c) || isdigit(c)) {
44 ziel[k++] = c;
45 } else if (islower(c)) {
46 ziel[k++] = c - 'a' + 'A';
47 } else {
48 s = strchr("ÄAEäAEÖOEöOEÜUEüUEßSS", c);
49 if (s != NULL) {
50 ziel[k++] = *(s + 1);
51 if (k < n - 1) {
52 ziel[k++] = *(s + 2);
53 }
54 } else if (modus == '*' && c != '?') {
55 /**** Aufeinanderfolgende '*' ****/
56 /**** zu einem zusammenziehen ****/
57 if (k == 0 || ziel[k - 1] != '*') {
58 ziel[k++] = '*';
59 }
60 } else {
61 ziel[k++] = c;
62 }
63 }
64 }
65 ziel[k] = 0;
66 return (k);
67 }
68
69 int
70 WLD(wort, muster, modus, limit)
71 char *wort;
72 char *muster;
73 char modus;
74 int limit;
75 {
76 register int spmin, p, q, r, lm, lw, d1, d2, i, k, x1, x2, x3;
77 char c, mm[maxlen], ww[maxlen];
78 int d[maxlen];
79
80 if (limit == 0) {
81 limit = maxlen;
82 }
83 if (modus == '+' || modus == '*') {
84 lw = formatierung(ww, wort, maxlen, modus);
85 lm = formatierung(mm, muster, maxlen, modus);
86
87 if (modus == '*' && lw < lm - 1
88 && strchr(ww, '*') != NULL) {
89 /**** Wort und Muster tauschen ****/
90 wort = mm;
91 muster = ww;
92 strcpy(ww + lw, "*");
93 i = lw;
94 lw = lm;
95 lm = i + 1;
96 /**** Limit neu setzen ****/
97 i = (int) (i / 3);
98 if (i < limit) {
99 limit = i;
100 }
101 } else {
102 wort = ww;
103 muster = mm;
104 }
105 } else {
106 lw = strlen(wort);
107 lm = strlen(muster);
108 if (lw >= maxlen)
109 lw = (maxlen - 1);
110 if (lm >= maxlen)
111 lm = (maxlen - 1);
112 }
113
114 /**** Anfangswerte berechnen ****/
115 if (*muster == '*') {
116 for (k = 0; k <= lw; k++) {
117 d[k] = 0;
118 }
119 } else {
120 d[0] = (*muster == 0) ? 0 : 1;
121 i = (*muster == '?') ? 0 : 1;
122 for (k = 1; k <= lw; k++) {
123 if (*muster == *(wort + k - 1)) {
124 i = 0;
125 }
126 d[k] = k - 1 + i;
127 }
128 }
129
130 spmin = (d[0] == 0 || lw == 0) ? d[0] : d[1];
131 if (spmin > limit) {
132 return (maxlen);
133 }
134 /**** Distanzmatrix durchrechnen ****/
135 for (i = 2; i <= lm; i++) {
136 c = *(muster + i - 1);
137 p = (c == '*' || c == '?') ? 0 : 1;
138 q = (c == '*') ? 0 : 1;
139 r = (c == '*') ? 0 : 1;
140 d2 = d[0];
141 d[0] = d2 + q;
142 spmin = d[0];
143
144 for (k = 1; k <= lw; k++) {
145 /**** d[k] = Minimum dreier Zahlen ****/
146 d1 = d2;
147 d2 = d[k];
148 x1 = d1 + ((c == *(wort + k - 1)) ? 0 : p);
149 x2 = d2 + q;
150 x3 = d[k - 1] + r;
151
152 if (x1 < x2) {
153 x2 = x1;
154 }
155 d[k] = (x2 < x3) ? x2 : x3;
156
157 if (d[k] < spmin) {
158 spmin = d[k];
159 }
160 }
161
162 if (spmin > limit) {
163 return (maxlen);
164 }
165 }
166 return ((d[lw] <= limit) ? d[lw] : maxlen);
167 }

Properties

Name Value
cvs2svn:cvs-rev 1.1.1.2

  ViewVC Help
Powered by ViewVC 1.1.26