/* Pure C simple version of python 2.7.8 hash table */ /* Sample usage: see main() */ #include <stdio.h> #include <stdlib.h> #include <assert.h> #include <string.h> #define PyDict_MINSIZE 8 #define PERTURB_SHIFT 5 #define INIT_NONZERO_DICT_SLOTS(mp) do { \ (mp)->ma_table = (mp)->ma_smalltable; (mp)->ma_mask = PyDict_MINSIZE - 1; } while (0) #define EMPTY_TO_MINSIZE(mp) do { \ memset((mp)->ma_smalltable, 0, sizeof((mp)->ma_smalltable)); (mp)->ma_used = (mp)->ma_fill = 0; INIT_NONZERO_DICT_SLOTS(mp); } while (0) typedef void PyObject; typedef struct { size_t me_hash; PyObject *me_key; PyObject *me_value; } PyDictEntry; typedef struct _dictobject PyDictObject; struct _dictobject { size_t ma_fill; /* # Active + # Dummy */ size_t ma_used; /* # Active */ size_t ma_mask; PyDictEntry *ma_table; size_t (*ma_hash)(PyObject *key); int (*ma_keycmp)(PyObject *key1,PyObject *key2); PyObject *(*ma_keydup)(PyObject *key); PyObject *(*ma_valdup)(PyObject *val); PyObject *(*ma_default)(void); PyDictEntry ma_smalltable[PyDict_MINSIZE]; }; /* Object used as dummy key to fill deleted entries */ static PyObject *_dummy_struct; #define dummy (&_dummy_struct) static size_t hash_string(PyObject *_str) { char *str = (char *)_str; size_t hash = 5381; for (; *str; str++) hash = ((hash << 5) + hash) + *str; /* hash * 33 + c */ return hash; } static int keycmp_string(PyObject *_k1, PyObject *_k2) { char *k1 = (char *)_k1; char *k2 = (char *)_k2; for (; *k1 == *k2; k1++, k2++) if (*k1 == ‘\0‘) return 0; return *k1 - *k2; } static PyObject * keydup_string(PyObject *_key){ return (PyObject *)strdup((char *)_key); } static PyObject * valdup_int(PyObject *_val){ int *val = (int*)malloc(sizeof(int)); *val = *(int *)_val; return (PyObject *)val; } static PyObject * get_default_val(void){ int *val=(int*)malloc(sizeof(int)); *val=0; return (PyObject *)val; } PyDictObject * dict_new(void) { register PyDictObject *mp; mp = (PyDictObject *)malloc(sizeof(PyDictObject)); if (mp == NULL) return NULL; EMPTY_TO_MINSIZE(mp); mp->ma_hash = hash_string; mp->ma_keycmp = keycmp_string; mp->ma_keydup = keydup_string; mp->ma_valdup = valdup_int; mp->ma_default = get_default_val; return mp; } static PyDictEntry * lookdict(PyDictObject *mp, PyObject *key, register size_t hash) { register size_t i; register size_t perturb; register PyDictEntry *freeslot; register size_t mask = (size_t)mp->ma_mask; PyDictEntry *ep0 = mp->ma_table; register PyDictEntry *ep; i = (size_t)hash & mask; ep = &ep0[i]; if (ep->me_key == NULL || ep->me_key == key) return ep; if (ep->me_key == dummy) freeslot = ep; else if (ep->me_hash == hash && mp->ma_keycmp(ep->me_key, key) == 0) return ep; else freeslot = NULL; for (perturb = hash;; perturb >>= PERTURB_SHIFT) { i = (i << 2) + i + perturb + 1; ep = &ep0[i & mask]; if (ep->me_key == NULL) return freeslot == NULL ? ep : freeslot; if (ep->me_key == key || (ep->me_hash == hash && ep->me_key != dummy //delete won‘t change me_hash, so this is needed. && mp->ma_keycmp(ep->me_key, key) == 0)) return ep; if (ep->me_key == dummy && freeslot == NULL) freeslot = ep; } assert(0); /* NOT REACHED */ return 0; } static PyDictEntry * lookdict_nodummy(PyDictObject *mp, PyObject *key, register size_t hash) { register size_t i; register size_t perturb; register size_t mask = (size_t)mp->ma_mask; PyDictEntry *ep0 = mp->ma_table; register PyDictEntry *ep; i = (size_t)hash & mask; ep = &ep0[i]; if (ep->me_key == NULL || ep->me_key == key || (ep->me_hash == hash && mp->ma_keycmp(ep->me_key, key)==0)) return ep; for (perturb = hash;; perturb >>= PERTURB_SHIFT) { i = (i << 2) + i + perturb + 1; ep = &ep0[i & mask]; if (ep->me_key == NULL || ep->me_key == key || (ep->me_hash == hash && mp->ma_keycmp(ep->me_key, key)==0)) return ep; } assert(0); /* NOT REACHED */ return 0; } static void insertdict_clean(register PyDictObject *mp, PyObject *key, size_t hash, PyObject *value) { register size_t i; register size_t perturb; register size_t mask = mp->ma_mask; PyDictEntry *ep0 = mp->ma_table; register PyDictEntry *ep; i = hash & mask; ep = &ep0[i]; for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) { i = (i << 2) + i + perturb + 1; ep = &ep0[i & mask]; } mp->ma_fill++; ep->me_key = key; ep->me_hash = hash; ep->me_value = value; mp->ma_used++; } /* Restructure the table by allocating a new table and reinserting all items again. When entries have been deleted, the new table may actually be smaller than the old one. */ static int dict_resize(PyDictObject *mp, size_t minused) { //printf("call resize...\n"); size_t newsize; PyDictEntry *oldtable, *newtable, *ep; size_t i; int is_oldtable_malloced; PyDictEntry small_copy[PyDict_MINSIZE]; /* Find the smallest table size > minused. */ for (newsize = PyDict_MINSIZE; newsize <= minused && newsize > 0; newsize <<= 1) ; /* Get space for a new table. */ oldtable = mp->ma_table; is_oldtable_malloced = (oldtable != mp->ma_smalltable); if (newsize == PyDict_MINSIZE) { /* A large table is shrinking, or we can‘t get any smaller. */ newtable = mp->ma_smalltable; if (newtable == oldtable) {//such as: for a new dict, add 5 keys, delete them, add 6th key, then goes here. if (mp->ma_fill == mp->ma_used) { /* No dummies, so no point doing anything. */ return 0; } assert(mp->ma_fill > mp->ma_used); memcpy(small_copy, oldtable, sizeof(small_copy)); oldtable = small_copy; } } else { newtable = (PyDictEntry*)malloc(sizeof(PyDictEntry)*newsize); if (newtable == NULL) return -1; } /* Make the dict empty, using the new table. */ assert(newtable != oldtable); mp->ma_table = newtable; mp->ma_mask = newsize - 1; memset(newtable, 0, sizeof(PyDictEntry)* newsize); mp->ma_used = 0; i = mp->ma_fill; mp->ma_fill = 0; /* Copy the data over; this is refcount-neutral for active entries; dummy entries aren‘t copied over, of course */ for (ep = oldtable; i > 0; ep++) { if (ep->me_value != NULL) { /* active entry */ --i; insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value); } else if (ep->me_key != NULL) { /* dummy entry */ --i; assert(ep->me_key == dummy); } /* else key == value == NULL: nothing to do */ } if (is_oldtable_malloced) free(oldtable); return 0; } PyObject * dict_search(PyDictObject *mp, PyObject *key) { size_t hash; PyDictEntry *ep; assert(key); hash = mp->ma_hash(key); ep = lookdict(mp, key, hash); return ep->me_value; } int dict_contain(PyDictObject *mp, PyObject *key) { return dict_search(mp,key)?1:0; } int dict_add(register PyDictObject *mp, PyObject *key, PyObject *value) { register size_t hash; assert(key); assert(value); hash = mp->ma_hash(key); PyDictEntry *ep=lookdict(mp,key,hash); assert(ep->me_value==NULL); //only process for add case. if (ep->me_key == NULL) //unused mp->ma_fill++; ep->me_key = mp->ma_keydup(key); ep->me_hash = hash; ep->me_value = mp->ma_valdup(value); mp->ma_used++; if (!( mp->ma_fill * 3 >= (mp->ma_mask + 1) * 2)) return 0; return dict_resize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used); } int dict_update(register PyDictObject *mp, PyObject *key, PyObject *value) { register size_t hash; assert(key); assert(value); hash = mp->ma_hash(key); PyDictEntry *ep=lookdict(mp,key,hash); assert(ep->me_value!=NULL); //only process for update case. PyObject *old_value = ep->me_value; ep->me_value = mp->ma_valdup(value); free(old_value); mp->ma_used++; return 0; } int dict_del(PyDictObject *mp, PyObject *key) { register size_t hash; register PyDictEntry *ep; PyObject *old_value, *old_key; assert(key); hash = mp->ma_hash(key); ep = lookdict(mp, key, hash); if (ep->me_value == NULL) //key does not exist or duplicate deletion return -1; old_key = ep->me_key; ep->me_key = dummy; old_value = ep->me_value; ep->me_value = NULL; mp->ma_used--; free(old_value); free(old_key); return 0; } PyObject * dict_dget(PyDictObject *mp, PyObject *key) { PyDictEntry *ep; size_t h; h = mp->ma_hash(key); ep = lookdict(mp,key,h); if (ep->me_value==NULL) { if (ep->me_key == NULL) mp->ma_fill++; ep->me_key= mp->ma_keydup(key); ep->me_value = mp->ma_default(); ep->me_hash=h; mp->ma_used++; if (mp->ma_fill * 3 >= (mp->ma_mask + 1) * 2){ dict_resize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used); ep = lookdict_nodummy(mp,key,h); } } return ep->me_value; } void dict_clear(PyObject *op) { PyDictObject *mp; PyDictEntry *ep, *table; int table_is_malloced; size_t fill; PyDictEntry small_copy[PyDict_MINSIZE]; mp = (PyDictObject *)op; table = mp->ma_table; assert(table != NULL); table_is_malloced = table != mp->ma_smalltable; fill = mp->ma_fill; if (table_is_malloced) EMPTY_TO_MINSIZE(mp); else if (fill > 0) { memcpy(small_copy, table, sizeof(small_copy)); table = small_copy; EMPTY_TO_MINSIZE(mp); } for (ep = table; fill > 0; ep++) { if (ep->me_key) { fill--; free(ep->me_key); free(ep->me_value); } } if (table_is_malloced) free(table); } size_t dict_len(PyDictObject *mp) { return mp->ma_used; } // test function static int hash_cmp(const void *a, const void *b) { return *(int *)(*(PyDictEntry *) a).me_value > *(int *)(*(PyDictEntry *) b).me_value ? -1 : 1; } static void hash_all_members(PyDictObject *t) { PyDictEntry *pdes = t->ma_table; PyDictEntry es[t->ma_used]; size_t i = 0, j = 0, size = t->ma_mask+1; for (; i < size; i++) if (pdes[i].me_value != NULL) es[j++] = pdes[i]; qsort(es, t->ma_used, sizeof(es[0]), hash_cmp); for (i = 0; i < t->ma_used; i++) printf("%s\t%d\n", (char *)es[i].me_key, *(int *)es[i].me_value); } int main(void) { PyDictObject *mp = dict_new(); FILE *fp; char keybuf[100]; int *valbuf = (int*)malloc(sizeof(int)); *valbuf = 1; if ((fp = fopen("u8w.txt", "r")) == NULL ) { fprintf(stdout,"Fail to open file.\n"); exit(0); } while (fscanf(fp, "%s", keybuf) == 1) { if (dict_contain(mp,keybuf)) *(int*)dict_search(mp,keybuf) += 1; else dict_add(mp,keybuf,valbuf); } // PyObject *vp; // while (fscanf(fp, "%s", keybuf) == 1) { // vp = dict_dget(mp,keybuf); // *(int*)vp += 1; // } hash_all_members(mp); dict_clear(mp); free(valbuf); free(mp); return 0; }