Python 2.7的字典实现

/* Pure C simple version of python 2.7.8 hash table */
/* Sample usage: see main() */
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#define PyDict_MINSIZE 8
#define PERTURB_SHIFT 5
#define INIT_NONZERO_DICT_SLOTS(mp) do {                                \
    (mp)->ma_table = (mp)->ma_smalltable;                                   (mp)->ma_mask = PyDict_MINSIZE - 1;                                 } while (0)
#define EMPTY_TO_MINSIZE(mp) do {                                       \
    memset((mp)->ma_smalltable, 0, sizeof((mp)->ma_smalltable));            (mp)->ma_used = (mp)->ma_fill = 0;                                      INIT_NONZERO_DICT_SLOTS(mp);                                        } while (0)

typedef void PyObject;

typedef struct {
    size_t me_hash;
    PyObject *me_key;
    PyObject *me_value;
} PyDictEntry;

typedef struct _dictobject PyDictObject;
struct _dictobject {
    size_t ma_fill;  /* # Active + # Dummy */
    size_t ma_used;  /* # Active */
    size_t ma_mask;
    PyDictEntry *ma_table;
    size_t (*ma_hash)(PyObject *key);
    int (*ma_keycmp)(PyObject *key1,PyObject *key2);
    PyObject *(*ma_keydup)(PyObject *key);
    PyObject *(*ma_valdup)(PyObject *val);
    PyObject *(*ma_default)(void);
    PyDictEntry ma_smalltable[PyDict_MINSIZE];
};

/* Object used as dummy key to fill deleted entries */
static PyObject *_dummy_struct;
#define dummy (&_dummy_struct)

static size_t
hash_string(PyObject *_str) {
    char *str = (char *)_str;
    size_t hash = 5381;
    for (; *str; str++)
        hash = ((hash << 5) + hash) + *str; /* hash * 33 + c */
    return hash;
}

static int
keycmp_string(PyObject *_k1, PyObject *_k2) {
    char *k1 = (char *)_k1;
    char *k2 = (char *)_k2;
    for (; *k1 == *k2; k1++, k2++)
        if (*k1 == \0)
            return 0;
    return *k1 - *k2;
}

static PyObject *
keydup_string(PyObject *_key){
    return (PyObject *)strdup((char *)_key);
}

static PyObject *
valdup_int(PyObject *_val){
    int *val = (int*)malloc(sizeof(int));
    *val = *(int *)_val;
    return (PyObject *)val;
}

static PyObject *
get_default_val(void){
    int *val=(int*)malloc(sizeof(int));
    *val=0;
    return (PyObject *)val;
}

PyDictObject *
dict_new(void) {
    register PyDictObject *mp;
    mp = (PyDictObject *)malloc(sizeof(PyDictObject));
    if (mp == NULL)
        return NULL;
    EMPTY_TO_MINSIZE(mp);
    mp->ma_hash = hash_string;
    mp->ma_keycmp = keycmp_string;
    mp->ma_keydup = keydup_string;
    mp->ma_valdup = valdup_int;
    mp->ma_default = get_default_val;
    return mp;
}

static PyDictEntry *
lookdict(PyDictObject *mp, PyObject *key, register size_t hash) {
    register size_t i;
    register size_t perturb;
    register PyDictEntry *freeslot;
    register size_t mask = (size_t)mp->ma_mask;
    PyDictEntry *ep0 = mp->ma_table;
    register PyDictEntry *ep;

    i = (size_t)hash & mask;
    ep = &ep0[i];
    if (ep->me_key == NULL || ep->me_key == key)
        return ep;
    if (ep->me_key == dummy)
        freeslot = ep;
    else if (ep->me_hash == hash
             && mp->ma_keycmp(ep->me_key, key) == 0)
        return ep;
    else
        freeslot = NULL;
    for (perturb = hash;; perturb >>= PERTURB_SHIFT) {
        i = (i << 2) + i + perturb + 1;
        ep = &ep0[i & mask];
        if (ep->me_key == NULL)
            return freeslot == NULL ? ep : freeslot;
        if (ep->me_key == key
                || (ep->me_hash == hash
                    && ep->me_key != dummy //delete won‘t change me_hash, so this is needed.
                    && mp->ma_keycmp(ep->me_key, key) == 0))
            return ep;
        if (ep->me_key == dummy && freeslot == NULL)
            freeslot = ep;
    }
    assert(0);          /* NOT REACHED */
    return 0;
}

static PyDictEntry *
lookdict_nodummy(PyDictObject *mp, PyObject *key, register size_t hash) {
    register size_t i;
    register size_t perturb;
    register size_t mask = (size_t)mp->ma_mask;
    PyDictEntry *ep0 = mp->ma_table;
    register PyDictEntry *ep;

    i = (size_t)hash & mask;
    ep = &ep0[i];
    if (ep->me_key == NULL
            || ep->me_key == key
            || (ep->me_hash == hash && mp->ma_keycmp(ep->me_key, key)==0))
        return ep;
    for (perturb = hash;; perturb >>= PERTURB_SHIFT) {
        i = (i << 2) + i + perturb + 1;
        ep = &ep0[i & mask];
        if (ep->me_key == NULL
                || ep->me_key == key
                || (ep->me_hash == hash && mp->ma_keycmp(ep->me_key, key)==0))
            return ep;
    }
    assert(0);          /* NOT REACHED */
    return 0;
}

static void
insertdict_clean(register PyDictObject *mp, PyObject *key, size_t hash, PyObject *value) {
    register size_t i;
    register size_t perturb;
    register size_t mask = mp->ma_mask;
    PyDictEntry *ep0 = mp->ma_table;
    register PyDictEntry *ep;

    i = hash & mask;
    ep = &ep0[i];
    for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
        i = (i << 2) + i + perturb + 1;
        ep = &ep0[i & mask];
    }
    mp->ma_fill++;
    ep->me_key = key;
    ep->me_hash = hash;
    ep->me_value = value;
    mp->ma_used++;
}

/*
Restructure the table by allocating a new table and reinserting all
items again.  When entries have been deleted, the new table may
actually be smaller than the old one.
*/
static int
dict_resize(PyDictObject *mp, size_t minused) {
    //printf("call resize...\n");
    size_t newsize;
    PyDictEntry *oldtable, *newtable, *ep;
    size_t i;
    int is_oldtable_malloced;
    PyDictEntry small_copy[PyDict_MINSIZE];
    /* Find the smallest table size > minused. */
    for (newsize = PyDict_MINSIZE;
            newsize <= minused && newsize > 0;
            newsize <<= 1)
        ;
    /* Get space for a new table. */
    oldtable = mp->ma_table;
    is_oldtable_malloced = (oldtable != mp->ma_smalltable);

    if (newsize == PyDict_MINSIZE) {
        /* A large table is shrinking, or we can‘t get any smaller. */
        newtable = mp->ma_smalltable;
        if (newtable == oldtable) {//such as: for a new dict, add 5 keys, delete them, add 6th key, then goes here.
            if (mp->ma_fill == mp->ma_used) {
                /* No dummies, so no point doing anything. */
                return 0;
            }
            assert(mp->ma_fill > mp->ma_used);
            memcpy(small_copy, oldtable, sizeof(small_copy));
            oldtable = small_copy;
        }
    } else {
        newtable = (PyDictEntry*)malloc(sizeof(PyDictEntry)*newsize);
        if (newtable == NULL)
            return -1;
    }
    /* Make the dict empty, using the new table. */
    assert(newtable != oldtable);
    mp->ma_table = newtable;
    mp->ma_mask = newsize - 1;
    memset(newtable, 0, sizeof(PyDictEntry)* newsize);
    mp->ma_used = 0;
    i = mp->ma_fill;
    mp->ma_fill = 0;
    /* Copy the data over; this is refcount-neutral for active entries;
    dummy entries aren‘t copied over, of course */
    for (ep = oldtable; i > 0; ep++) {
        if (ep->me_value != NULL) {             /* active entry */
            --i;
            insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value);
        } else if (ep->me_key != NULL) {      /* dummy entry */
            --i;
            assert(ep->me_key == dummy);
        }
        /* else key == value == NULL:  nothing to do */
    }
    if (is_oldtable_malloced)
        free(oldtable);
    return 0;
}

PyObject *
dict_search(PyDictObject *mp, PyObject *key) {
    size_t hash;
    PyDictEntry *ep;
    assert(key);
    hash = mp->ma_hash(key);
    ep = lookdict(mp, key, hash);
    return ep->me_value;
}

int
dict_contain(PyDictObject *mp, PyObject *key) {
    return dict_search(mp,key)?1:0;
}

int
dict_add(register PyDictObject *mp, PyObject *key, PyObject *value) {
    register size_t hash;
    assert(key);
    assert(value);
    hash = mp->ma_hash(key);
    PyDictEntry *ep=lookdict(mp,key,hash);
    assert(ep->me_value==NULL); //only process for add case.
    if (ep->me_key == NULL) //unused
        mp->ma_fill++;
    ep->me_key = mp->ma_keydup(key);
    ep->me_hash = hash;
    ep->me_value = mp->ma_valdup(value);
    mp->ma_used++;
    if (!( mp->ma_fill * 3 >= (mp->ma_mask + 1) * 2))
        return 0;
    return dict_resize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
}

int
dict_update(register PyDictObject *mp, PyObject *key, PyObject *value) {
    register size_t hash;
    assert(key);
    assert(value);
    hash = mp->ma_hash(key);
    PyDictEntry *ep=lookdict(mp,key,hash);
    assert(ep->me_value!=NULL); //only process for update case.
    PyObject *old_value = ep->me_value;
    ep->me_value = mp->ma_valdup(value);
    free(old_value);
    mp->ma_used++;
    return 0;
}

int
dict_del(PyDictObject *mp, PyObject *key) {
    register size_t hash;
    register PyDictEntry *ep;
    PyObject *old_value, *old_key;

    assert(key);
    hash = mp->ma_hash(key);
    ep = lookdict(mp, key, hash);
    if (ep->me_value == NULL) //key does not exist or duplicate deletion
        return -1;
    old_key = ep->me_key;
    ep->me_key = dummy;
    old_value = ep->me_value;
    ep->me_value = NULL;
    mp->ma_used--;
    free(old_value);
    free(old_key);
    return 0;
}

PyObject *
dict_dget(PyDictObject *mp, PyObject *key) {
    PyDictEntry *ep;
    size_t h;
    h = mp->ma_hash(key);
    ep = lookdict(mp,key,h);
    if (ep->me_value==NULL) {
        if (ep->me_key == NULL)
            mp->ma_fill++;
        ep->me_key= mp->ma_keydup(key);
        ep->me_value = mp->ma_default();
        ep->me_hash=h;
        mp->ma_used++;
        if (mp->ma_fill * 3 >= (mp->ma_mask + 1) * 2){
            dict_resize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
            ep = lookdict_nodummy(mp,key,h);
        }
    }
    return ep->me_value;

}

void
dict_clear(PyObject *op) {
    PyDictObject *mp;
    PyDictEntry *ep, *table;
    int table_is_malloced;
    size_t fill;
    PyDictEntry small_copy[PyDict_MINSIZE];
    mp = (PyDictObject *)op;
    table = mp->ma_table;
    assert(table != NULL);
    table_is_malloced = table != mp->ma_smalltable;

    fill = mp->ma_fill;
    if (table_is_malloced)
        EMPTY_TO_MINSIZE(mp);
    else if (fill > 0) {
        memcpy(small_copy, table, sizeof(small_copy));
        table = small_copy;
        EMPTY_TO_MINSIZE(mp);
    }
    for (ep = table; fill > 0; ep++) {
        if (ep->me_key) {
            fill--;
            free(ep->me_key);
            free(ep->me_value);
        }
    }
    if (table_is_malloced)
        free(table);
}

size_t
dict_len(PyDictObject *mp) {
    return mp->ma_used;
}

// test function
static int
hash_cmp(const void *a, const void *b) {
    return *(int *)(*(PyDictEntry *) a).me_value > *(int *)(*(PyDictEntry *) b).me_value ? -1 : 1;
}

static void
hash_all_members(PyDictObject *t) {
    PyDictEntry *pdes = t->ma_table;
    PyDictEntry es[t->ma_used];
    size_t i = 0, j = 0, size = t->ma_mask+1;
    for (; i < size; i++)
        if (pdes[i].me_value != NULL)
            es[j++] = pdes[i];
    qsort(es, t->ma_used, sizeof(es[0]), hash_cmp);
    for (i = 0; i < t->ma_used; i++)
        printf("%s\t%d\n", (char *)es[i].me_key, *(int *)es[i].me_value);
}

int main(void) {
    PyDictObject *mp = dict_new();
    FILE *fp;
    char keybuf[100];
    int *valbuf = (int*)malloc(sizeof(int));
    *valbuf = 1;
    if ((fp = fopen("u8w.txt", "r")) == NULL ) {
        fprintf(stdout,"Fail to open file.\n");
        exit(0);
    }
    while (fscanf(fp, "%s", keybuf) == 1) {
        if (dict_contain(mp,keybuf))
            *(int*)dict_search(mp,keybuf) += 1;
        else
            dict_add(mp,keybuf,valbuf);
    }
//    PyObject *vp;
//    while (fscanf(fp, "%s", keybuf) == 1) {
//        vp = dict_dget(mp,keybuf);
//        *(int*)vp += 1;
//    }
    hash_all_members(mp);
    dict_clear(mp);
    free(valbuf);
    free(mp);
    return 0;
}

Python 2.7的字典实现,布布扣,bubuko.com

Python 2.7的字典实现

上一篇:C#线程同步技术(一) lock 语句


下一篇:2014年辛星完全解读Javascript第八节 json