1 |
#include <string.h> |
2 |
#include <stdlib.h> |
3 |
/* #define NDEBUG */ |
4 |
#include <assert.h> |
5 |
|
6 |
#include "hash.h" |
7 |
|
8 |
|
9 |
/* |
10 |
** public domain code by Jerry Coffin. |
11 |
** |
12 |
** Tested with Visual C 1.0 and Borland C 3.1. |
13 |
** Compiles without warnings, and seems like it should be pretty |
14 |
** portable. |
15 |
*/ |
16 |
|
17 |
/* HW: HenkJan Wolthuis, 1997, public domain |
18 |
|
19 |
changed functionnames, all public functions now have a 'hash_' prefix |
20 |
minor editing, marked 'm all(?) with a description |
21 |
removed a bug in hash_del and one in hash_enumerate |
22 |
added some assertions |
23 |
added a 'count' member to hold the number of elements |
24 |
added hash_sorted_enum, sometimes useful |
25 |
changed the testmain |
26 |
*/ |
27 |
|
28 |
/* |
29 |
** RBS: Bob Stout, 2003, public domain |
30 |
** |
31 |
** 1. Fixed some problems in hash() static function. |
32 |
** 2. Use unsigned shorts for hash values. This was implicit in the original |
33 |
** which was written for PC's using early Microsoft and Borland compilers. |
34 |
*/ |
35 |
|
36 |
/* HW: #define to allow duplicate keys, they're added before the existing |
37 |
key so hash_lookup finds the last one inserted first (LIFO) |
38 |
when not defined, hash_insert swaps the datapointers, returning a |
39 |
pointer to the old data |
40 |
*/ |
41 |
/* #define DUPLICATE_KEYS */ |
42 |
|
43 |
/* |
44 |
** These are used in freeing a table. Perhaps I should code up |
45 |
** something a little less grungy, but it works, so what the heck. |
46 |
*/ |
47 |
static void ( *function ) ( void * ) = NULL; |
48 |
static hash_table *the_table = NULL; |
49 |
|
50 |
|
51 |
/* Initialize the hash_table to the size asked for. Allocates space |
52 |
** for the correct number of pointers and sets them to NULL. If it |
53 |
** can't allocate sufficient memory, signals error by setting the size |
54 |
** of the table to 0. |
55 |
*/ |
56 |
/*HW: changed, now returns NULL on malloc-failure */ |
57 |
hash_table *hash_construct_table( hash_table * table, size_t size ) |
58 |
{ |
59 |
size_t i; |
60 |
bucket **temp; |
61 |
|
62 |
table->size = size; |
63 |
table->count = 0; |
64 |
table->table = ( bucket ** ) malloc( sizeof( bucket * ) * size ); |
65 |
temp = table->table; |
66 |
|
67 |
if ( NULL == temp ) { |
68 |
table->size = 0; |
69 |
return NULL; /*HW: was 'table' */ |
70 |
} |
71 |
|
72 |
for ( i = 0; i < size; i++ ) |
73 |
temp[ i ] = NULL; |
74 |
|
75 |
return table; |
76 |
} |
77 |
|
78 |
|
79 |
/* |
80 |
** Hashes a string to produce an unsigned short, which should be |
81 |
** sufficient for most purposes. |
82 |
** RBS: fixed per user feedback from Steve Greenland |
83 |
*/ |
84 |
|
85 |
static unsigned short hash( char *string ) |
86 |
{ |
87 |
unsigned short ret_val = 0; |
88 |
int i; |
89 |
|
90 |
while ( *string ) { |
91 |
/* |
92 |
** RBS: Added conditional to account for strings in which the |
93 |
** length is less than an integral multiple of sizeof(int). |
94 |
** |
95 |
** Note: This fixes the problem of hasing trailing garbage, but |
96 |
** doesn't fix the problem with some CPU's which can't align on |
97 |
** byte boundries. Any decent C compiler *should* fix this, but |
98 |
** it still might extract a performance hit. Also unaddressed is |
99 |
** what happens when using a CPU which addresses data only on |
100 |
** 4-byte boundries when it tries to work with a pointer to a |
101 |
** 2-byte unsigned short. |
102 |
*/ |
103 |
|
104 |
if ( strlen( string ) >= sizeof( unsigned short ) ) |
105 |
i = *( unsigned short * ) string; |
106 |
else |
107 |
i = ( unsigned short ) ( *string ); |
108 |
ret_val ^= i; |
109 |
ret_val <<= 1; |
110 |
string++; |
111 |
} |
112 |
return ret_val; |
113 |
} |
114 |
|
115 |
/* |
116 |
** Insert 'key' into hash table. |
117 |
** Returns pointer to old data associated with the key, if any, or |
118 |
** NULL if the key wasn't in the table previously. |
119 |
*/ |
120 |
/* HW: returns NULL if malloc failed */ |
121 |
void *hash_insert( char *key, void *data, hash_table * table ) |
122 |
{ |
123 |
unsigned short val = hash( key ) % table->size; |
124 |
bucket *ptr; |
125 |
|
126 |
assert( NULL != key ); |
127 |
|
128 |
/* |
129 |
** NULL means this bucket hasn't been used yet. We'll simply |
130 |
** allocate space for our new bucket and put our data there, with |
131 |
** the table pointing at it. |
132 |
*/ |
133 |
|
134 |
if ( NULL == ( table->table ) [ val ] ) { |
135 |
( table->table ) [ val ] = ( bucket * ) malloc( sizeof( bucket ) ); |
136 |
if ( NULL == ( table->table ) [ val ] ) |
137 |
return NULL; |
138 |
|
139 |
if ( NULL == |
140 |
( ( table->table ) [ val ] ->key = ( char * ) malloc( strlen( key ) + 1 ) ) ) { |
141 |
free( ( table->table ) [ val ] ); |
142 |
( table->table ) [ val ] = NULL; |
143 |
return NULL; |
144 |
} |
145 |
strcpy( ( table->table ) [ val ] ->key, key ); |
146 |
( table->table ) [ val ] ->next = NULL; |
147 |
( table->table ) [ val ] ->data = data; |
148 |
table->count++; /* HW */ |
149 |
return ( table->table ) [ val ] ->data; |
150 |
} |
151 |
|
152 |
/* HW: added a #define so the hashtable can accept duplicate keys */ |
153 |
#ifndef DUPLICATE_KEYS |
154 |
/* |
155 |
** This spot in the table is already in use. See if the current string |
156 |
** has already been inserted, and if so, increment its count. |
157 |
*/ /* HW: ^^^^^^^^ ?? */ |
158 |
for ( ptr = ( table->table ) [ val ]; NULL != ptr; ptr = ptr->next ) |
159 |
if ( 0 == strcmp( key, ptr->key ) ) { |
160 |
void * old_data; |
161 |
|
162 |
old_data = ptr->data; |
163 |
ptr->data = data; |
164 |
return old_data; |
165 |
} |
166 |
#endif |
167 |
/* |
168 |
** This key must not be in the table yet. We'll add it to the head of |
169 |
** the list at this spot in the hash table. Speed would be |
170 |
** slightly improved if the list was kept sorted instead. In this case, |
171 |
** this code would be moved into the loop above, and the insertion would |
172 |
** take place as soon as it was determined that the present key in the |
173 |
** list was larger than this one. |
174 |
*/ |
175 |
|
176 |
ptr = ( bucket * ) malloc( sizeof( bucket ) ); |
177 |
if ( NULL == ptr ) |
178 |
return NULL; /*HW: was 0 */ |
179 |
|
180 |
if ( NULL == ( ptr->key = ( char * ) malloc( strlen( key ) + 1 ) ) ) { |
181 |
free( ptr ); |
182 |
return NULL; |
183 |
} |
184 |
strcpy( ptr->key, key ); |
185 |
ptr->data = data; |
186 |
ptr->next = ( table->table ) [ val ]; |
187 |
( table->table ) [ val ] = ptr; |
188 |
table->count++; /* HW */ |
189 |
|
190 |
return data; |
191 |
} |
192 |
|
193 |
|
194 |
/* |
195 |
** Look up a key and return the associated data. Returns NULL if |
196 |
** the key is not in the table. |
197 |
*/ |
198 |
void *hash_lookup( char *key, hash_table * table ) |
199 |
{ |
200 |
unsigned short val = hash( key ) % table->size; |
201 |
bucket *ptr; |
202 |
|
203 |
assert( NULL != key ); |
204 |
|
205 |
if ( NULL == ( table->table ) [ val ] ) |
206 |
return NULL; |
207 |
|
208 |
for ( ptr = ( table->table ) [ val ]; NULL != ptr; ptr = ptr->next ) { |
209 |
if ( 0 == strcmp( key, ptr->key ) ) |
210 |
return ptr->data; |
211 |
} |
212 |
|
213 |
return NULL; |
214 |
} |
215 |
|
216 |
/* |
217 |
** Delete a key from the hash table and return associated |
218 |
** data, or NULL if not present. |
219 |
*/ |
220 |
|
221 |
void *hash_del( char *key, hash_table * table ) |
222 |
{ |
223 |
unsigned short val = hash( key ) % table->size; |
224 |
void *data; |
225 |
bucket *ptr, *last = NULL; |
226 |
|
227 |
assert( NULL != key ); |
228 |
|
229 |
if ( NULL == ( table->table ) [ val ] ) |
230 |
return NULL; /* HW: was 'return 0' */ |
231 |
|
232 |
/* |
233 |
** Traverse the list, keeping track of the previous node in the list. |
234 |
** When we find the node to delete, we set the previous node's next |
235 |
** pointer to point to the node after ourself instead. We then delete |
236 |
** the key from the present node, and return a pointer to the data it |
237 |
** contains. |
238 |
*/ |
239 |
for ( last = NULL, ptr = ( table->table ) [ val ]; |
240 |
NULL != ptr; last = ptr, ptr = ptr->next ) { |
241 |
if ( 0 == strcmp( key, ptr->key ) ) { |
242 |
if ( last != NULL ) { |
243 |
data = ptr->data; |
244 |
last->next = ptr->next; |
245 |
free( ptr->key ); |
246 |
free( ptr ); |
247 |
table->count--; /* HW */ |
248 |
return data; |
249 |
} |
250 |
|
251 |
/* If 'last' still equals NULL, it means that we need to |
252 |
** delete the first node in the list. This simply consists |
253 |
** of putting our own 'next' pointer in the array holding |
254 |
** the head of the list. We then dispose of the current |
255 |
** node as above. |
256 |
*/ |
257 |
else { |
258 |
/* HW: changed this bit to match the comments above */ |
259 |
data = ptr->data; |
260 |
( table->table ) [ val ] = ptr->next; |
261 |
free( ptr->key ); |
262 |
free( ptr ); |
263 |
table->count--; /* HW */ |
264 |
return data; |
265 |
} |
266 |
} |
267 |
} |
268 |
|
269 |
/* |
270 |
** If we get here, it means we didn't find the item in the table. |
271 |
** Signal this by returning NULL. |
272 |
*/ |
273 |
|
274 |
return NULL; |
275 |
} |
276 |
|
277 |
/* |
278 |
** free_table iterates the table, calling this repeatedly to free |
279 |
** each individual node. This, in turn, calls one or two other |
280 |
** functions - one to free the storage used for the key, the other |
281 |
** passes a pointer to the data back to a function defined by the user, |
282 |
** process the data as needed. |
283 |
*/ |
284 |
|
285 |
static void free_node( char *key, void *data ) |
286 |
{ |
287 |
( void ) data; |
288 |
|
289 |
assert( NULL != key ); |
290 |
|
291 |
if ( NULL != function ) { |
292 |
function( hash_del( key, the_table ) ); |
293 |
} else |
294 |
hash_del( key, the_table ); |
295 |
} |
296 |
|
297 |
/* |
298 |
** Frees a complete table by iterating over it and freeing each node. |
299 |
** the second parameter is the address of a function it will call with a |
300 |
** pointer to the data associated with each node. This function is |
301 |
** responsible for freeing the data, or doing whatever is needed with |
302 |
** it. |
303 |
*/ |
304 |
|
305 |
void hash_free_table( hash_table * table, void ( *func ) ( void * ) ) |
306 |
{ |
307 |
function = func; |
308 |
the_table = table; |
309 |
|
310 |
hash_enumerate( table, free_node ); |
311 |
free( table->table ); |
312 |
table->table = NULL; |
313 |
table->size = 0; |
314 |
table->count = 0; /* HW */ |
315 |
|
316 |
the_table = NULL; |
317 |
function = NULL; |
318 |
} |
319 |
|
320 |
/* |
321 |
** Simply invokes the function given as the second parameter for each |
322 |
** node in the table, passing it the key and the associated data. |
323 |
*/ |
324 |
|
325 |
void hash_enumerate( hash_table * table, void ( *func ) ( char *, void * ) ) |
326 |
{ |
327 |
unsigned i; |
328 |
bucket *temp; |
329 |
bucket *swap; |
330 |
|
331 |
for ( i = 0; i < table->size; i++ ) { |
332 |
if ( NULL != ( table->table ) [ i ] ) { |
333 |
/* HW: changed this loop */ |
334 |
temp = ( table->table ) [ i ]; |
335 |
while ( NULL != temp ) { |
336 |
/* HW: swap trick, in case temp is freed by 'func' */ |
337 |
swap = temp->next; |
338 |
func( temp->key, temp->data ); |
339 |
temp = swap; |
340 |
} |
341 |
} |
342 |
} |
343 |
} |
344 |
|
345 |
/* HW: added hash_sorted_enum() |
346 |
|
347 |
hash_sorted_enum is like hash_enumerate but gives |
348 |
sorted output. This is much slower than hash_enumerate, but |
349 |
sometimes nice for printing to a file... |
350 |
*/ |
351 |
|
352 |
typedef struct sort_struct |
353 |
{ |
354 |
char *key; |
355 |
void *data; |
356 |
} |
357 |
sort_struct; |
358 |
static sort_struct *sortmap = NULL; |
359 |
|
360 |
static int counter = 0; |
361 |
|
362 |
/* HW: used as 'func' for hash_enumerate */ |
363 |
static void key_get( char *key, void *data ) |
364 |
{ |
365 |
sortmap[ counter ].key = key; |
366 |
sortmap[ counter ].data = data; |
367 |
counter++; |
368 |
} |
369 |
|
370 |
/* HW: used for comparing the keys in qsort */ |
371 |
static int key_comp( const void *a, const void *b ) |
372 |
{ |
373 |
return strcmp( ( *( sort_struct * ) a ).key, ( *( sort_struct * ) b ).key ); |
374 |
} |
375 |
|
376 |
/* HW: it's a compromise between speed and space. this one needs |
377 |
table->count * sizeof( sort_struct) memory. |
378 |
Another approach only takes count*sizeof(char*), but needs |
379 |
to hash_lookup the data of every key after sorting the key. |
380 |
returns 0 on malloc failure, 1 on success |
381 |
*/ |
382 |
int hash_sorted_enum( hash_table * table, void ( *func ) ( char *, void * ) ) |
383 |
{ |
384 |
int i; |
385 |
|
386 |
/* nothing to do ! */ |
387 |
if ( NULL == table || 0 == table->count || NULL == func ) |
388 |
return 0; |
389 |
|
390 |
/* malloc an pointerarray for all hashkey's and datapointers */ |
391 |
if ( NULL == |
392 |
( sortmap = |
393 |
( sort_struct * ) malloc( sizeof( sort_struct ) * table->count ) ) ) |
394 |
return 0; |
395 |
|
396 |
/* copy the pointers to the hashkey's */ |
397 |
counter = 0; |
398 |
hash_enumerate( table, key_get ); |
399 |
|
400 |
/* sort the pointers to the keys */ |
401 |
qsort( sortmap, table->count, sizeof( sort_struct ), key_comp ); |
402 |
|
403 |
/* call the function for each node */ |
404 |
for ( i = 0; i < abs( ( table->count ) ); i++ ) { |
405 |
func( sortmap[ i ].key, sortmap[ i ].data ); |
406 |
} |
407 |
|
408 |
/* free the pointerarray */ |
409 |
free( sortmap ); |
410 |
sortmap = NULL; |
411 |
|
412 |
return 1; |
413 |
} |
414 |
|
415 |
/* HW: changed testmain */ |
416 |
#define TEST |
417 |
#ifdef TEST |
418 |
|
419 |
#include <stdio.h> |
420 |
//#include "snip_str.h" /* for strdup() */ |
421 |
|
422 |
FILE *o; |
423 |
|
424 |
void fprinter( char *string, void *data ) |
425 |
{ |
426 |
fprintf( o, "%s: %s\n", string, ( char * ) data ); |
427 |
} |
428 |
|
429 |
void printer( char *string, void *data ) |
430 |
{ |
431 |
printf( "%s: %s\n", string, ( char * ) data ); |
432 |
} |
433 |
|
434 |
/* function to pass to hash_free_table */ |
435 |
void strfree( void *d ) |
436 |
{ |
437 |
/* any additional processing goes here (if you use structures as data) */ |
438 |
/* free the datapointer */ |
439 |
free( d ); |
440 |
} |
441 |
|
442 |
|
443 |
int main( void ) |
444 |
{ |
445 |
hash_table table; |
446 |
|
447 |
char *strings[] = { |
448 |
"The first string", |
449 |
"The second string", |
450 |
"The third string", |
451 |
"The fourth string", |
452 |
"A much longer string than the rest in this example.", |
453 |
"The last string", |
454 |
NULL |
455 |
}; |
456 |
|
457 |
char *junk[] = { |
458 |
"The first data", |
459 |
"The second data", |
460 |
"The third data", |
461 |
"The fourth data", |
462 |
"The fifth datum", |
463 |
"The sixth piece of data" |
464 |
}; |
465 |
|
466 |
int i; |
467 |
void *j; |
468 |
|
469 |
hash_construct_table( &table, 211 ); |
470 |
|
471 |
/* I know, no checking on strdup ;-)), but using strdup |
472 |
to demonstrate hash_table_free with a functionpointer */ |
473 |
for ( i = 0; NULL != strings[ i ]; i++ ) |
474 |
hash_insert( strings[ i ], strdup( junk[ i ] ), &table ); |
475 |
|
476 |
/* enumerating to a file */ |
477 |
if ( NULL != ( o = fopen( "HASH.HSH", "wb" ) ) ) { |
478 |
fprintf( o, "%d strings in the table:\n\n", table.count ); |
479 |
hash_enumerate( &table, fprinter ); |
480 |
fprintf( o, "\nsorted by key:\n" ); |
481 |
hash_sorted_enum( &table, fprinter ); |
482 |
fclose( o ); |
483 |
} |
484 |
|
485 |
/* enumerating to screen */ |
486 |
hash_sorted_enum( &table, printer ); |
487 |
printf( "\n" ); |
488 |
|
489 |
/* delete 3 strings, should be 3 left */ |
490 |
for ( i = 0; i < 3; i++ ) { |
491 |
/* hash_del returns a pointer to the data */ |
492 |
strfree( hash_del( strings[ i ], &table ) ); |
493 |
} |
494 |
hash_enumerate( &table, printer ); |
495 |
|
496 |
for ( i = 0; NULL != strings[ i ]; i++ ) { |
497 |
j = hash_lookup( strings[ i ], &table ); |
498 |
if ( NULL == j ) |
499 |
printf( "\n'%s' is not in the table", strings[ i ] ); |
500 |
else |
501 |
printf( "\n%s is still in the table.", strings[ i ] ); |
502 |
} |
503 |
|
504 |
hash_free_table( &table, strfree ); |
505 |
|
506 |
return 0; |
507 |
} |
508 |
#endif /* TEST */ |