Python 源码剖析(二)【整数对象】

二、整数对象

1、PyIntObject

2、PyIntObject 对象的创建和维护

3、Hack PyIntObject


1、PyIntObject

PyIntObject的定义:

[intobject.h]

typedef struct {

PyObject_HEAD

long ob_ival;

} PyIntObject;

其类型对象为PyInt_Type:

[intobject.c]

PyTypeObject PyInt_Type = {

PyObject_HEAD_INIT(&PyType_Type)

0,

"int",

sizeof(PyIntObject),

0,

(destructor)int_dealloc,        /* tp_dealloc */

(printfunc)int_print,           /* tp_print */

0,                  /* tp_getattr */

0,                  /* tp_setattr */

(cmpfunc)int_compare,           /* tp_compare */

(reprfunc)int_repr,         /* tp_repr */

&int_as_number,             /* tp_as_number */

0,                  /* tp_as_sequence */

0,                  /* tp_as_mapping */

(hashfunc)int_hash,         /* tp_hash */

0,                  /* tp_call */

(reprfunc)int_repr,         /* tp_str */

PyObject_GenericGetAttr,        /* tp_getattro */

0,                  /* tp_setattro */

0,                  /* tp_as_buffer */

Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES | Py_TPFLAGS_BASETYPE, /* tp_flags */

int_doc,                /* tp_doc */

0,                  /* tp_traverse */

0,                  /* tp_clear */

0,                  /* tp_richcompare */

0,                  /* tp_weaklistoffset */

0,                  /* tp_iter */

0,                  /* tp_iternext */

int_methods,        /* tp_methods */

0,                  /* tp_members */

0,                  /* tp_getset */

0,                  /* tp_base */

0,                  /* tp_dict */

0,                  /* tp_descr_get */

0,                  /* tp_descr_set */

0,                  /* tp_dictoffset */

0,                  /* tp_init */

0,                  /* tp_alloc */

int_new,                /* tp_new */

(freefunc)int_free,                 /* tp_free */

};

可见PyInt_Type保存着PyIntObject的元信息,包括这些操作:

int_dealloc

删除PyIntObject对象

int_free

删除PyIntObject对象

int_repr

转化成PyString对象

int_hash

获得HASH值

int_print

打印PyIntObject对象

int_compare

比较操作

int_as_number

数值操作

int_methods

成员函数

比较操作代码,其实就是将包装的long进行比较:

[intobject.c]

static int int_compare(PyIntObject *v, PyIntObject *w)

{

register long i = v->ob_ival;

register long j = w->ob_ival;

return (i < j) ? -1 : (i > j) ? 1 : 0;

}

注意int_as_number,其实是一个PyNumberMethods结构体:

[intobject.c]

static PyNumberMethods int_as_number = {

(binaryfunc)int_add,    /*nb_add*/

(binaryfunc)int_sub,    /*nb_subtract*/

(binaryfunc)int_mul,    /*nb_multiply*/

……

(binaryfunc)int_div,    /* nb_floor_divide */

int_true_divide,    /* nb_true_divide */

0,          /* nb_inplace_floor_divide */

0,          /* nb_inplace_true_divide */

};

PyNumberMethods定义了38个数值操作,如加法:

[intobject.h]

/* Macro, trading safety for speed */

#define PyInt_AS_LONG(op) (((PyIntObject *)(op))->ob_ival)

[intobject.c]

#define CONVERT_TO_LONG(obj, lng)       \

if (PyInt_Check(obj)) {         \

lng = PyInt_AS_LONG(obj);   \

}                   \

else {                  \

Py_INCREF(Py_NotImplemented);   \

return Py_NotImplemented;   \

}

static PyObject *

int_add(PyIntObject *v, PyIntObject *w)

{

register long a, b, x;

CONVERT_TO_LONG(v, a);

CONVERT_TO_LONG(w, b);

x = a + b;

if ((x^a) >= 0 || (x^b) >= 0)

return PyInt_FromLong(x);

return PyLong_Type.tp_as_number->nb_add((PyObject *)v, (PyObject *)w);

}

如果没有溢出,就返回一个新的PyIntObject,否则返回一个PyLongObject。


2、PyIntObject 对象的创建和维护

2.1、Python创建的途径

有三种途径可获得一个PyIntObject对象:

  PyObject *PyInt_FromLong(long ival)
  PyObject* PyInt_FromString(char *s, char **pend, int base)
#ifdef Py_USING_UNICODE
PyObject*PyInt_FromUnicode(Py_UNICODE *s, int length, int base)
#endif

其中PyInt_FromString时先转成浮点数再调用PyInt_FromLong:

[intobject.c]
PyObject* PyInt_FromString(char *s, char **pend, int base)
{
    char *end;
    long x;
    ......
//convert string to long
if (base == 0 && s[0] == '0') 
{
        x = (long) PyOS_strtoul(s, &end, base);
    }
else
        x = PyOS_strtol(s, &end, base);
    ......
    return PyInt_FromLong(x);
}

2.2、小整数对象

频繁申请、释放空间会降低运行效率、产生系统堆内存碎片,影响python性能。因此对于经常使用的整数,python使用对象池技术,并将小整数(对象池保存范围)定位[-5,100):

[intobject.c]

#ifndef NSMALLPOSINTS
#define NSMALLPOSINTS       100
#endif
#ifndef NSMALLNEGINTS
#define NSMALLNEGINTS       5
#endif
#if NSMALLNEGINTS + NSMALLPOSINTS > 0
/* References to small integers are saved in this array so that they
   can be shared.
   The integers that are saved are those in the range
   -NSMALLNEGINTS (inclusive) to NSMALLPOSINTS (not inclusive).
*/
static PyIntObject *small_ints[NSMALLNEGINTS + NSMALLPOSINTS];
#endif

2.3、大整数对象

对于大整数,pyhton则是使用内存池,提供一个free_list保存,谁需要保存谁:

[intobject.c]

#define BLOCK_SIZE  1000    /* 1K less typical malloc overhead */
#define BHEAD_SIZE  8   /* Enough for a 64-bit pointer */
#define N_INTOBJECTS    ((BLOCK_SIZE - BHEAD_SIZE) / sizeof(PyIntObject))
struct _intblock {
    struct _intblock *next;
    PyIntObject objects[N_INTOBJECTS];
};
typedef struct _intblock PyIntBlock;
static PyIntBlock *block_list = NULL;
static PyIntObject *free_list = NULL;

PyIntBlock的单向列表通过block_list维护,而这些block中的PyIntObject的列表中可以被使用的内存通过free_list来维护(一个block可放82个PyIntObject)。

2.4、添加和删除

看一下产生PyIntObject:

[intobject.c]
PyObject* PyInt_FromLong(long ival)
{
    register PyIntObject *v;
#if NSMALLNEGINTS + NSMALLPOSINTS > 0
    if (-NSMALLNEGINTS <= ival && ival < NSMALLPOSINTS) {
        v = small_ints[ival + NSMALLNEGINTS];
        Py_INCREF(v);
#ifdef COUNT_ALLOCS
        if (ival >= 0)
            quick_int_allocs++;
        else
            quick_neg_int_allocs++;
#endif
        return (PyObject *) v;
    }
#endif
    if (free_list == NULL) {
        if ((free_list = fill_free_list()) == NULL)
            return NULL;
    }
    /* Inline PyObject_New */
    v = free_list;
    free_list = (PyIntObject *)v->ob_type;
    PyObject_INIT(v, &PyInt_Type);
    v->ob_ival = ival;
    return (PyObject *) v;
}

先判断是否为小整数,是的话返回对象池中的小整数;否则转向block_list,调用fill_free_list:

[intobject.c]
static PyIntObject* fill_free_list(void)
{
    PyIntObject *p, *q;
    /* Python's object allocator isn't appropriate for large blocks. */
    p = (PyIntObject *) PyMem_MALLOC(sizeof(PyIntBlock));
    if (p == NULL)
        return (PyIntObject *) PyErr_NoMemory();
    ((PyIntBlock *)p)->next = block_list;
    block_list = (PyIntBlock *)p;
    /* Link the int objects together, from rear to front, then return
       the address of the last int object in the block. */
    p = &((PyIntBlock *)p)->objects[0];
    q = p + N_INTOBJECTS;
    while (--q > p)
        q->ob_type = (struct _typeobject *)(q-1);
    q->ob_type = NULL;
    return p + N_INTOBJECTS - 1;
}

当一个block没被填满时,不会再调用fill_free_list申请新空间,free_list指向可用空间,block_list指向最新创建的PyIntBlock对象。

注意,但某个对象被删除时,free_list会指向该空间,不会造成空间浪费:

[intobject.c]
static void int_dealloc(PyIntObject *v)
{
    if (PyInt_CheckExact(v)) {
        v->ob_type = (struct _typeobject *)free_list;
        free_list = v;
    }
    else
        v->ob_type->tp_free((PyObject *)v);
}

2.5、小整数对象池的初始化

小整数对象池small_ints初始化 _PyInt_Init:

[intobject.c]
int _PyInt_Init(void)
{
    PyIntObject *v;
    int ival;
#if NSMALLNEGINTS + NSMALLPOSINTS > 0
for (ival = -NSMALLNEGINTS; ival < NSMALLPOSINTS; ival++) 
{
        if (!free_list && (free_list = fill_free_list()) == NULL)
            return 0;
        /* PyObject_New is inlined */
        v = free_list;
        free_list = (PyIntObject *)v->ob_type;
        PyObject_INIT(v, &PyInt_Type);
        v->ob_ival = ival;
        small_ints[ival + NSMALLNEGINTS] = v;
    }
#endif
    return 1;
}

3、Hack PyIntObject

修改int_print源码查看block_list、free_list、小整数缓冲池信息:

static int int_print(PyIntObject *v, FILE *fp, int flags)

{

PyIntObject* intObjectPtr;

PyIntBlock *p = block_list;

PyIntBlock *last = NULL;

int count = 0;

int i;

while(p != NULL)

{

++count;

last = p;

p = p->next;

}

intObjectPtr = last->objects;

intObjectPtr += N_INTOBJECTS - 1;

printf("address @%p\n", v);

printf("********** value\trefCount **********\n");

for(i = 0; i < 10; ++i, --intObjectPtr)

{

printf("%d\t\t%d\n", intObjectPtr->ob_ival, intObjectPtr->ob_refcnt);

}

printf("block_list count : %d\n", count);

printf("free_list : %p\n\n", free_list);

return 0;

}

有兴趣的可以试一下。

上一篇:将html导出到excel或word


下一篇:Hadoop(六)MapReduce的入门与运行原理