1 |
#include <string.h> |
2 |
#include <stdlib.h> |
3 |
/* #define NDEBUG */ |
4 |
#include <assert.h> |
5 |
|
6 |
#include "hash.h" |
7 |
|
8 |
|
9 |
/* |
10 |
** public domain code by Jerry Coffin. |
11 |
** |
12 |
** Tested with Visual C 1.0 and Borland C 3.1. |
13 |
** Compiles without warnings, and seems like it should be pretty |
14 |
** portable. |
15 |
*/ |
16 |
|
17 |
/* HW: HenkJan Wolthuis, 1997, public domain |
18 |
|
19 |
changed functionnames, all public functions now have a 'hash_' prefix |
20 |
minor editing, marked 'm all(?) with a description |
21 |
removed a bug in hash_del and one in hash_enumerate |
22 |
added some assertions |
23 |
added a 'count' member to hold the number of elements |
24 |
added hash_sorted_enum, sometimes useful |
25 |
changed the testmain |
26 |
*/ |
27 |
|
28 |
/* |
29 |
** RBS: Bob Stout, 2003, public domain |
30 |
** |
31 |
** 1. Fixed some problems in hash() static function. |
32 |
** 2. Use unsigned shorts for hash values. This was implicit in the original |
33 |
** which was written for PC's using early Microsoft and Borland compilers. |
34 |
*/ |
35 |
|
36 |
/* HW: #define to allow duplicate keys, they're added before the existing |
37 |
key so hash_lookup finds the last one inserted first (LIFO) |
38 |
when not defined, hash_insert swaps the datapointers, returning a |
39 |
pointer to the old data |
40 |
*/ |
41 |
/* #define DUPLICATE_KEYS */ |
42 |
|
43 |
/* |
44 |
** These are used in freeing a table. Perhaps I should code up |
45 |
** something a little less grungy, but it works, so what the heck. |
46 |
*/ |
47 |
static void (*function) (void *) = NULL; |
48 |
static hash_table *the_table = NULL; |
49 |
|
50 |
|
51 |
/* Initialize the hash_table to the size asked for. Allocates space |
52 |
** for the correct number of pointers and sets them to NULL. If it |
53 |
** can't allocate sufficient memory, signals error by setting the size |
54 |
** of the table to 0. |
55 |
*/ |
56 |
/*HW: changed, now returns NULL on malloc-failure */ |
57 |
hash_table *hash_construct_table(hash_table * table, size_t size) |
58 |
{ |
59 |
size_t i; |
60 |
bucket **temp; |
61 |
|
62 |
table->size = size; |
63 |
table->count = 0; |
64 |
table->table = (bucket **) malloc(sizeof(bucket *) * size); |
65 |
temp = table->table; |
66 |
|
67 |
if (NULL == temp) { |
68 |
table->size = 0; |
69 |
return NULL; /*HW: was 'table' */ |
70 |
} |
71 |
|
72 |
for (i = 0; i < size; i++) |
73 |
temp[i] = NULL; |
74 |
|
75 |
return table; |
76 |
} |
77 |
|
78 |
|
79 |
/* |
80 |
** Hashes a string to produce an unsigned short, which should be |
81 |
** sufficient for most purposes. |
82 |
** RBS: fixed per user feedback from Steve Greenland |
83 |
*/ |
84 |
|
85 |
static unsigned short hash(char *string) |
86 |
{ |
87 |
unsigned short ret_val = 0; |
88 |
int i; |
89 |
|
90 |
while (*string) { |
91 |
/* |
92 |
** RBS: Added conditional to account for strings in which the |
93 |
** length is less than an integral multiple of sizeof(int). |
94 |
** |
95 |
** Note: This fixes the problem of hasing trailing garbage, but |
96 |
** doesn't fix the problem with some CPU's which can't align on |
97 |
** byte boundries. Any decent C compiler *should* fix this, but |
98 |
** it still might extract a performance hit. Also unaddressed is |
99 |
** what happens when using a CPU which addresses data only on |
100 |
** 4-byte boundries when it tries to work with a pointer to a |
101 |
** 2-byte unsigned short. |
102 |
*/ |
103 |
|
104 |
if (strlen(string) >= sizeof(unsigned short)) |
105 |
i = *(unsigned short *) string; |
106 |
else |
107 |
i = (unsigned short) (*string); |
108 |
ret_val ^= i; |
109 |
ret_val <<= 1; |
110 |
string++; |
111 |
} |
112 |
return ret_val; |
113 |
} |
114 |
|
115 |
/* |
116 |
** Insert 'key' into hash table. |
117 |
** Returns pointer to old data associated with the key, if any, or |
118 |
** NULL if the key wasn't in the table previously. |
119 |
*/ |
120 |
/* HW: returns NULL if malloc failed */ |
121 |
void *hash_insert(char *key, void *data, hash_table * table) |
122 |
{ |
123 |
unsigned short val = hash(key) % table->size; |
124 |
bucket *ptr; |
125 |
|
126 |
assert(NULL != key); |
127 |
|
128 |
/* |
129 |
** NULL means this bucket hasn't been used yet. We'll simply |
130 |
** allocate space for our new bucket and put our data there, with |
131 |
** the table pointing at it. |
132 |
*/ |
133 |
|
134 |
if (NULL == (table->table)[val]) { |
135 |
(table->table)[val] = (bucket *) malloc(sizeof(bucket)); |
136 |
if (NULL == (table->table)[val]) |
137 |
return NULL; |
138 |
|
139 |
if (NULL == |
140 |
((table->table)[val]->key = (char *) malloc(strlen(key) + 1))) { |
141 |
free((table->table)[val]); |
142 |
(table->table)[val] = NULL; |
143 |
return NULL; |
144 |
} |
145 |
strcpy((table->table)[val]->key, key); |
146 |
(table->table)[val]->next = NULL; |
147 |
(table->table)[val]->data = data; |
148 |
table->count++; /* HW */ |
149 |
return (table->table)[val]->data; |
150 |
} |
151 |
|
152 |
/* HW: added a #define so the hashtable can accept duplicate keys */ |
153 |
#ifndef DUPLICATE_KEYS |
154 |
/* |
155 |
** This spot in the table is already in use. See if the current string |
156 |
** has already been inserted, and if so, increment its count. |
157 |
*//* HW: ^^^^^^^^ ?? */ |
158 |
for (ptr = (table->table)[val]; NULL != ptr; ptr = ptr->next) |
159 |
if (0 == strcmp(key, ptr->key)) { |
160 |
void *old_data; |
161 |
|
162 |
old_data = ptr->data; |
163 |
ptr->data = data; |
164 |
return old_data; |
165 |
} |
166 |
#endif |
167 |
/* |
168 |
** This key must not be in the table yet. We'll add it to the head of |
169 |
** the list at this spot in the hash table. Speed would be |
170 |
** slightly improved if the list was kept sorted instead. In this case, |
171 |
** this code would be moved into the loop above, and the insertion would |
172 |
** take place as soon as it was determined that the present key in the |
173 |
** list was larger than this one. |
174 |
*/ |
175 |
|
176 |
ptr = (bucket *) malloc(sizeof(bucket)); |
177 |
if (NULL == ptr) |
178 |
return NULL; /*HW: was 0 */ |
179 |
|
180 |
if (NULL == (ptr->key = (char *) malloc(strlen(key) + 1))) { |
181 |
free(ptr); |
182 |
return NULL; |
183 |
} |
184 |
strcpy(ptr->key, key); |
185 |
ptr->data = data; |
186 |
ptr->next = (table->table)[val]; |
187 |
(table->table)[val] = ptr; |
188 |
table->count++; /* HW */ |
189 |
|
190 |
return data; |
191 |
} |
192 |
|
193 |
|
194 |
/* |
195 |
** Look up a key and return the associated data. Returns NULL if |
196 |
** the key is not in the table. |
197 |
*/ |
198 |
void *hash_lookup(char *key, hash_table * table) |
199 |
{ |
200 |
unsigned short val = hash(key) % table->size; |
201 |
bucket *ptr; |
202 |
|
203 |
assert(NULL != key); |
204 |
|
205 |
if (NULL == (table->table)[val]) |
206 |
return NULL; |
207 |
|
208 |
for (ptr = (table->table)[val]; NULL != ptr; ptr = ptr->next) { |
209 |
if (0 == strcmp(key, ptr->key)) |
210 |
return ptr->data; |
211 |
} |
212 |
|
213 |
return NULL; |
214 |
} |
215 |
|
216 |
/* |
217 |
** Delete a key from the hash table and return associated |
218 |
** data, or NULL if not present. |
219 |
*/ |
220 |
|
221 |
void *hash_del(char *key, hash_table * table) |
222 |
{ |
223 |
unsigned short val = hash(key) % table->size; |
224 |
void *data; |
225 |
bucket *ptr, *last = NULL; |
226 |
|
227 |
assert(NULL != key); |
228 |
|
229 |
if (NULL == (table->table)[val]) |
230 |
return NULL; /* HW: was 'return 0' */ |
231 |
|
232 |
/* |
233 |
** Traverse the list, keeping track of the previous node in the list. |
234 |
** When we find the node to delete, we set the previous node's next |
235 |
** pointer to point to the node after ourself instead. We then delete |
236 |
** the key from the present node, and return a pointer to the data it |
237 |
** contains. |
238 |
*/ |
239 |
for (last = NULL, ptr = (table->table)[val]; |
240 |
NULL != ptr; last = ptr, ptr = ptr->next) { |
241 |
if (0 == strcmp(key, ptr->key)) { |
242 |
if (last != NULL) { |
243 |
data = ptr->data; |
244 |
last->next = ptr->next; |
245 |
free(ptr->key); |
246 |
free(ptr); |
247 |
table->count--; /* HW */ |
248 |
return data; |
249 |
} |
250 |
|
251 |
/* If 'last' still equals NULL, it means that we need to |
252 |
** delete the first node in the list. This simply consists |
253 |
** of putting our own 'next' pointer in the array holding |
254 |
** the head of the list. We then dispose of the current |
255 |
** node as above. |
256 |
*/ |
257 |
else { |
258 |
/* HW: changed this bit to match the comments above */ |
259 |
data = ptr->data; |
260 |
(table->table)[val] = ptr->next; |
261 |
free(ptr->key); |
262 |
free(ptr); |
263 |
table->count--; /* HW */ |
264 |
return data; |
265 |
} |
266 |
} |
267 |
} |
268 |
|
269 |
/* |
270 |
** If we get here, it means we didn't find the item in the table. |
271 |
** Signal this by returning NULL. |
272 |
*/ |
273 |
|
274 |
return NULL; |
275 |
} |
276 |
|
277 |
/* |
278 |
** free_table iterates the table, calling this repeatedly to free |
279 |
** each individual node. This, in turn, calls one or two other |
280 |
** functions - one to free the storage used for the key, the other |
281 |
** passes a pointer to the data back to a function defined by the user, |
282 |
** process the data as needed. |
283 |
*/ |
284 |
|
285 |
static void free_node(char *key, void *data) |
286 |
{ |
287 |
(void) data; |
288 |
|
289 |
assert(NULL != key); |
290 |
|
291 |
if (NULL != function) { |
292 |
function(hash_del(key, the_table)); |
293 |
} else |
294 |
hash_del(key, the_table); |
295 |
} |
296 |
|
297 |
/* |
298 |
** Frees a complete table by iterating over it and freeing each node. |
299 |
** the second parameter is the address of a function it will call with a |
300 |
** pointer to the data associated with each node. This function is |
301 |
** responsible for freeing the data, or doing whatever is needed with |
302 |
** it. |
303 |
*/ |
304 |
|
305 |
void hash_free_table(hash_table * table, void (*func) (void *)) |
306 |
{ |
307 |
function = func; |
308 |
the_table = table; |
309 |
|
310 |
hash_enumerate(table, free_node); |
311 |
free(table->table); |
312 |
table->table = NULL; |
313 |
table->size = 0; |
314 |
table->count = 0; /* HW */ |
315 |
|
316 |
the_table = NULL; |
317 |
function = NULL; |
318 |
} |
319 |
|
320 |
/* |
321 |
** Simply invokes the function given as the second parameter for each |
322 |
** node in the table, passing it the key and the associated data. |
323 |
*/ |
324 |
|
325 |
void hash_enumerate(hash_table * table, void (*func) (char *, void *)) |
326 |
{ |
327 |
unsigned i; |
328 |
bucket *temp; |
329 |
bucket *swap; |
330 |
|
331 |
for (i = 0; i < table->size; i++) { |
332 |
if (NULL != (table->table)[i]) { |
333 |
/* HW: changed this loop */ |
334 |
temp = (table->table)[i]; |
335 |
while (NULL != temp) { |
336 |
/* HW: swap trick, in case temp is freed by 'func' */ |
337 |
swap = temp->next; |
338 |
func(temp->key, temp->data); |
339 |
temp = swap; |
340 |
} |
341 |
} |
342 |
} |
343 |
} |
344 |
|
345 |
/* HW: added hash_sorted_enum() |
346 |
|
347 |
hash_sorted_enum is like hash_enumerate but gives |
348 |
sorted output. This is much slower than hash_enumerate, but |
349 |
sometimes nice for printing to a file... |
350 |
*/ |
351 |
|
352 |
typedef struct sort_struct |
353 |
{ |
354 |
char *key; |
355 |
void *data; |
356 |
} |
357 |
sort_struct; |
358 |
static sort_struct *sortmap = NULL; |
359 |
|
360 |
static int counter = 0; |
361 |
|
362 |
/* HW: used as 'func' for hash_enumerate */ |
363 |
static void key_get(char *key, void *data) |
364 |
{ |
365 |
sortmap[counter].key = key; |
366 |
sortmap[counter].data = data; |
367 |
counter++; |
368 |
} |
369 |
|
370 |
/* HW: used for comparing the keys in qsort */ |
371 |
static int key_comp(const void *a, const void *b) |
372 |
{ |
373 |
return strcmp((*(sort_struct *) a).key, (*(sort_struct *) b).key); |
374 |
} |
375 |
|
376 |
/* HW: it's a compromise between speed and space. this one needs |
377 |
table->count * sizeof( sort_struct) memory. |
378 |
Another approach only takes count*sizeof(char*), but needs |
379 |
to hash_lookup the data of every key after sorting the key. |
380 |
returns 0 on malloc failure, 1 on success |
381 |
*/ |
382 |
int hash_sorted_enum(hash_table * table, void (*func) (char *, void *)) |
383 |
{ |
384 |
int i; |
385 |
|
386 |
/* nothing to do ! */ |
387 |
if (NULL == table || 0 == table->count || NULL == func) |
388 |
return 0; |
389 |
|
390 |
/* malloc an pointerarray for all hashkey's and datapointers */ |
391 |
if (NULL == |
392 |
(sortmap = |
393 |
(sort_struct *) malloc(sizeof(sort_struct) * table->count))) |
394 |
return 0; |
395 |
|
396 |
/* copy the pointers to the hashkey's */ |
397 |
counter = 0; |
398 |
hash_enumerate(table, key_get); |
399 |
|
400 |
/* sort the pointers to the keys */ |
401 |
qsort(sortmap, table->count, sizeof(sort_struct), key_comp); |
402 |
|
403 |
/* call the function for each node */ |
404 |
for (i = 0; i < abs((table->count)); i++) { |
405 |
func(sortmap[i].key, sortmap[i].data); |
406 |
} |
407 |
|
408 |
/* free the pointerarray */ |
409 |
free(sortmap); |
410 |
sortmap = NULL; |
411 |
|
412 |
return 1; |
413 |
} |
414 |
|
415 |
/* HW: changed testmain */ |
416 |
#define TEST |
417 |
#ifdef TEST |
418 |
|
419 |
#include <stdio.h> |
420 |
//#include "snip_str.h" /* for strdup() */ |
421 |
|
422 |
FILE *o; |
423 |
|
424 |
void fprinter(char *string, void *data) |
425 |
{ |
426 |
fprintf(o, "%s: %s\n", string, (char *) data); |
427 |
} |
428 |
|
429 |
void printer(char *string, void *data) |
430 |
{ |
431 |
printf("%s: %s\n", string, (char *) data); |
432 |
} |
433 |
|
434 |
/* function to pass to hash_free_table */ |
435 |
void strfree(void *d) |
436 |
{ |
437 |
/* any additional processing goes here (if you use structures as data) */ |
438 |
/* free the datapointer */ |
439 |
free(d); |
440 |
} |
441 |
|
442 |
|
443 |
int main(void) |
444 |
{ |
445 |
hash_table table; |
446 |
|
447 |
char *strings[] = { |
448 |
"The first string", |
449 |
"The second string", |
450 |
"The third string", |
451 |
"The fourth string", |
452 |
"A much longer string than the rest in this example.", |
453 |
"The last string", |
454 |
NULL |
455 |
}; |
456 |
|
457 |
char *junk[] = { |
458 |
"The first data", |
459 |
"The second data", |
460 |
"The third data", |
461 |
"The fourth data", |
462 |
"The fifth datum", |
463 |
"The sixth piece of data" |
464 |
}; |
465 |
|
466 |
int i; |
467 |
void *j; |
468 |
|
469 |
hash_construct_table(&table, 211); |
470 |
|
471 |
/* I know, no checking on strdup ;-)), but using strdup |
472 |
to demonstrate hash_table_free with a functionpointer */ |
473 |
for (i = 0; NULL != strings[i]; i++) |
474 |
hash_insert(strings[i], strdup(junk[i]), &table); |
475 |
|
476 |
/* enumerating to a file */ |
477 |
if (NULL != (o = fopen("HASH.HSH", "wb"))) { |
478 |
fprintf(o, "%d strings in the table:\n\n", table.count); |
479 |
hash_enumerate(&table, fprinter); |
480 |
fprintf(o, "\nsorted by key:\n"); |
481 |
hash_sorted_enum(&table, fprinter); |
482 |
fclose(o); |
483 |
} |
484 |
|
485 |
/* enumerating to screen */ |
486 |
hash_sorted_enum(&table, printer); |
487 |
printf("\n"); |
488 |
|
489 |
/* delete 3 strings, should be 3 left */ |
490 |
for (i = 0; i < 3; i++) { |
491 |
/* hash_del returns a pointer to the data */ |
492 |
strfree(hash_del(strings[i], &table)); |
493 |
} |
494 |
hash_enumerate(&table, printer); |
495 |
|
496 |
for (i = 0; NULL != strings[i]; i++) { |
497 |
j = hash_lookup(strings[i], &table); |
498 |
if (NULL == j) |
499 |
printf("\n'%s' is not in the table", strings[i]); |
500 |
else |
501 |
printf("\n%s is still in the table.", strings[i]); |
502 |
} |
503 |
|
504 |
hash_free_table(&table, strfree); |
505 |
|
506 |
return 0; |
507 |
} |
508 |
#endif /* TEST */ |