/[wait]/trunk/levenstein.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/levenstein.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 11 - (show annotations)
Fri Apr 28 15:41:10 2000 UTC (24 years ago) by unknown
Original Path: branches/CPAN/levenstein.c
File MIME type: text/plain
File size: 3787 byte(s)
This commit was manufactured by cvs2svn to create branch 'CPAN'.
1 /**** Einfache Levenshtein-Distanz (p0=q0=r0=1) ****/
2 /**** mit Berücksichtigung von Wildcards ****/
3 /**** (geschwindigkeitsoptimiertes C-Programm) ****/
4 /**** Autor : Jörg Michael, Hannover ****/
5 /**** Datum : 22. Dezember 1993 ****/
6
7 /**** modus = ' ': normale Levenshtein-Distanz ****/
8 /**** modus = '+': keine Unterscheidung ****/
9 /**** Klein-/Großschreibung ****/
10 /**** modus = '*': wie '+', aber zusätzlich ****/
11 /**** "symmetrisches" Verhalten ****/
12 /**** gemäß der im Text beschrie- ****/
13 /**** benen Vorformatierung ****/
14
15 #ifdef __cplusplus
16 extern "C" {
17 #endif
18 #include "EXTERN.h"
19 #include "perl.h"
20 #include "XSUB.h"
21 #ifdef __cplusplus
22 }
23
24 #endif
25 #define maxlen 51
26 char *strchr();
27
28 int
29 formatierung(char ziel[], char wort[], int n, char modus)
30 /**** Wandelt "wort" in GROSSschreibung ****/
31 /**** um und expandiert Umlaute ****/
32 /**** (n = Zeichenzahl von "ziel") ****/
33 /**** Zurückgegeben wird: strlen (ziel) ****/
34 {
35 int i, k;
36 char c, *s;
37
38 i = 0;
39 k = 0;
40 while ((c = wort[i++]) != 0 && k < n - 1) {
41 if (isupper(c) || isdigit(c)) {
42 ziel[k++] = c;
43 } else if (islower(c)) {
44 ziel[k++] = c - 'a' + 'A';
45 } else {
46 s = strchr("ÄAEäAEÖOEöOEÜUEüUEßSS", c);
47 if (s != NULL) {
48 ziel[k++] = *(s + 1);
49 if (k < n - 1) {
50 ziel[k++] = *(s + 2);
51 }
52 } else if (modus == '*' && c != '?') {
53 /**** Aufeinanderfolgende '*' ****/
54 /**** zu einem zusammenziehen ****/
55 if (k == 0 || ziel[k - 1] != '*') {
56 ziel[k++] = '*';
57 }
58 } else {
59 ziel[k++] = c;
60 }
61 }
62 }
63 ziel[k] = 0;
64 return (k);
65 }
66
67 int
68 WLD(wort, muster, modus, limit)
69 char *wort;
70 char *muster;
71 char modus;
72 int limit;
73 {
74 register int spmin, p, q, r, lm, lw, d1, d2, i, k, x1, x2, x3;
75 char c, mm[maxlen], ww[maxlen];
76 int d[maxlen];
77
78 if (limit == 0) {
79 limit = maxlen;
80 }
81 if (modus == '+' || modus == '*') {
82 lw = formatierung(ww, wort, maxlen, modus);
83 lm = formatierung(mm, muster, maxlen, modus);
84
85 if (modus == '*' && lw < lm - 1
86 && strchr(ww, '*') != NULL) {
87 /**** Wort und Muster tauschen ****/
88 wort = mm;
89 muster = ww;
90 strcpy(ww + lw, "*");
91 i = lw;
92 lw = lm;
93 lm = i + 1;
94 /**** Limit neu setzen ****/
95 i = (int) (i / 3);
96 if (i < limit) {
97 limit = i;
98 }
99 } else {
100 wort = ww;
101 muster = mm;
102 }
103 } else {
104 lw = strlen(wort);
105 lm = strlen(muster);
106 if (lw >= maxlen)
107 lw = (maxlen - 1);
108 if (lm >= maxlen)
109 lm = (maxlen - 1);
110 }
111
112 /**** Anfangswerte berechnen ****/
113 if (*muster == '*') {
114 for (k = 0; k <= lw; k++) {
115 d[k] = 0;
116 }
117 } else {
118 d[0] = (*muster == 0) ? 0 : 1;
119 i = (*muster == '?') ? 0 : 1;
120 for (k = 1; k <= lw; k++) {
121 if (*muster == *(wort + k - 1)) {
122 i = 0;
123 }
124 d[k] = k - 1 + i;
125 }
126 }
127
128 spmin = (d[0] == 0 || lw == 0) ? d[0] : d[1];
129 if (spmin > limit) {
130 return (maxlen);
131 }
132 /**** Distanzmatrix durchrechnen ****/
133 for (i = 2; i <= lm; i++) {
134 c = *(muster + i - 1);
135 p = (c == '*' || c == '?') ? 0 : 1;
136 q = (c == '*') ? 0 : 1;
137 r = (c == '*') ? 0 : 1;
138 d2 = d[0];
139 d[0] = d2 + q;
140 spmin = d[0];
141
142 for (k = 1; k <= lw; k++) {
143 /**** d[k] = Minimum dreier Zahlen ****/
144 d1 = d2;
145 d2 = d[k];
146 x1 = d1 + ((c == *(wort + k - 1)) ? 0 : p);
147 x2 = d2 + q;
148 x3 = d[k - 1] + r;
149
150 if (x1 < x2) {
151 x2 = x1;
152 }
153 d[k] = (x2 < x3) ? x2 : x3;
154
155 if (d[k] < spmin) {
156 spmin = d[k];
157 }
158 }
159
160 if (spmin > limit) {
161 return (maxlen);
162 }
163 }
164 return ((d[lw] <= limit) ? d[lw] : maxlen);
165 }

Properties

Name Value
cvs2svn:cvs-rev 1.1

  ViewVC Help
Powered by ViewVC 1.1.26