6. Python3源码—List对象

6.1. List对象

List对象是“变长对象”。

6.1.1. Python中的创建

Python中List对象最重要的创建方法为PyList_New,如下Python语句最终会调用到PyList_New:

test = [1, 2, 3, 4, 5]

6.1.2. PyList_New的C调用栈

// pystate.c
PyInterpreterState_New

// ceval.c
=>_PyEval_EvalFrameDefault (case BUILD_LIST)

// listobject.c
=> PyList_New

6.1.3. PyList_New源码

// listobject.c
PyObject *
PyList_New(Py_ssize_t size)
{
    PyListObject *op;
#ifdef SHOW_ALLOC_COUNT
    static int initialized = 0;
    if (!initialized) {
        Py_AtExit(show_alloc);
        initialized = 1;
    }
#endif

    if (size < 0) {
        PyErr_BadInternalCall();
        return NULL;
    }
    if (numfree) {
        numfree--;
        op = free_list[numfree];
        _Py_NewReference((PyObject *)op);
#ifdef SHOW_ALLOC_COUNT
        count_reuse++;
#endif
    } else {
        op = PyObject_GC_New(PyListObject, &PyList_Type);
        if (op == NULL)
            return NULL;
#ifdef SHOW_ALLOC_COUNT
        count_alloc++;
#endif
    }
    if (size <= 0)
        op->ob_item = NULL;
    else {
        op->ob_item = (PyObject **) PyMem_Calloc(size, sizeof(PyObject *));
        if (op->ob_item == NULL) {
            Py_DECREF(op);
            return PyErr_NoMemory();
        }
    }
    Py_SIZE(op) = size;
    op->allocated = size;
    _PyObject_GC_TRACK(op);
    return (PyObject *) op;
}

可以看到:

  • List对象数据结构:
// listobject.h
typedef struct {
    PyObject_VAR_HEAD
    PyObject **ob_item;
    Py_ssize_t allocated;
} PyListObject;

ob_size和allocated并不一样,类似于STL中的vector的size和capacity。

  • List对象缓冲池:只缓冲PyListObject对象指针,里面包含的ob_item全部被释放。List对象缓冲区大小为80。
// listobject.c
#ifndef PyList_MAXFREELIST
#define PyList_MAXFREELIST 80
#endif
static PyListObject *free_list[PyList_MAXFREELIST];
static int numfree = 0;

static void
list_dealloc(PyListObject *op)
{
    Py_ssize_t i;
    PyObject_GC_UnTrack(op);
    Py_TRASHCAN_SAFE_BEGIN(op)
    if (op->ob_item != NULL) {
        /* Do it backwards, for Christian Tismer.
           There's a simple test case where somehow this reduces
           thrashing when a *very* large list is created and
           immediately deleted. */
        i = Py_SIZE(op);
        while (--i >= 0) {
            Py_XDECREF(op->ob_item[i]);
        }
        PyMem_FREE(op->ob_item);
    }
    if (numfree < PyList_MAXFREELIST && PyList_CheckExact(op))
        free_list[numfree++] = op;
    else
        Py_TYPE(op)->tp_free((PyObject *)op);
    Py_TRASHCAN_SAFE_END(op)
}

6.2. List对象的维护

6.2.1. 设置元素

test = [0, 1, 2, 3, 4]
test[0] = 100

上面的Python代码会调用C中的list_ass_subscript方法,由于是index case,所以最终会调用list_ass_item方法。需要注意,此处将v的引用计数加1,并且将之前的引用减1。

// listobject.c
static PySequenceMethods list_as_sequence = {
    (lenfunc)list_length,                    /* sq_length */
    (binaryfunc)list_concat,                 /* sq_concat */
    (ssizeargfunc)list_repeat,               /* sq_repeat */
    (ssizeargfunc)list_item,                 /* sq_item */
    0,                                       /* sq_slice */
    (ssizeobjargproc)list_ass_item,          /* sq_ass_item */
    0,                                      /* sq_ass_slice */
    (objobjproc)list_contains,               /* sq_contains */
    (binaryfunc)list_inplace_concat,   /* sq_inplace_concat */
    (ssizeargfunc)list_inplace_repeat, /* sq_inplace_repeat */
};

static int
list_ass_item(PyListObject *a, Py_ssize_t i, PyObject *v)
{   
    if (i < 0 || i >= Py_SIZE(a)) {
        PyErr_SetString(PyExc_IndexError,
                        "list assignment index out of range");
        return -1;
    }
    if (v == NULL)
        return list_ass_slice(a, i, i+1, v);
    Py_INCREF(v);
    Py_SETREF(a->ob_item[i], v);
    return 0;
}

Py_SETREF宏定义如下:

// object.h
#define Py_SETREF(op, op2)                      \
    do {                                        \
        PyObject *_py_tmp = (PyObject *)(op);   \
        (op) = (op2);                           \
        Py_DECREF(_py_tmp);                     \
    } while (0)

6.2.2. 追加元素

test = []
test.append(1000)

上面的Python代码会调用C中的list_append方法,最终会调用app1方法。

// listobject.h
#define LIST_APPEND_METHODDEF    \
    {"append", (PyCFunction)list_append, METH_O, list_append__doc__},

#define PyList_SET_ITEM(op, i, v) (((PyListObject *)(op))->ob_item[i] = (v))
#define PyList_GET_SIZE(op)    (assert(PyList_Check(op)),Py_SIZE(op))

app1方法中获取List对象里的ob_item长度,调整ob_item大小,对传入的v增加引用,并设置第n位为v。

// listobject.c
static PyMethodDef list_methods[] = {
    // sth.
    LIST_APPEND_METHODDEF
    // sth.
};

static PyObject *
list_append(PyListObject *self, PyObject *object)
{
    if (app1(self, object) == 0)
        Py_RETURN_NONE;
    return NULL;
}

static int
app1(PyListObject *self, PyObject *v)
{
    Py_ssize_t n = PyList_GET_SIZE(self);

    assert (v != NULL);
    if (n == PY_SSIZE_T_MAX) {
        PyErr_SetString(PyExc_OverflowError,
            "cannot add more objects to list");
        return -1;
    }

    if (list_resize(self, n+1) < 0)
        return -1;

    Py_INCREF(v);
    PyList_SET_ITEM(self, n, v);
    return 0;
}

6.2.3. 插入元素

test = [1, 2, 3, 4, 5]
test.insert(0, 10)
test.insert(-1, 100)

上面的Python代码会调用C中的list_insert方法,最终会调用ins1方法。

// listobject.h
#define LIST_INSERT_METHODDEF    \
    {"insert", (PyCFunction)list_insert, METH_FASTCALL, list_insert__doc__},

static PyObject *
list_insert_impl(PyListObject *self, Py_ssize_t index, PyObject *object);

static PyObject *
list_insert(PyListObject *self, PyObject *const *args, Py_ssize_t nargs)
{
    PyObject *return_value = NULL;
    Py_ssize_t index;
    PyObject *object;

    if (!_PyArg_ParseStack(args, nargs, "nO:insert",
        &index, &object)) {
        goto exit;
    }
    return_value = list_insert_impl(self, index, object);

exit:
    return return_value;
}

在ins1方法中调整ob_item大小,计算插入位置,对插入位置进行容错处理,将插入位置之后的元素全部向后移一位,对传入的v增加引用,并设置第where位为v。插入元素支持传入负值。

// listobject.c
static PyMethodDef list_methods[] = {
    // sth.
    LIST_INSERT_METHODDEF
    // sth.
};

static PyObject *
list_insert_impl(PyListObject *self, Py_ssize_t index, PyObject *object)
{
    if (ins1(self, index, object) == 0)
        Py_RETURN_NONE;
    return NULL;
}

static int
ins1(PyListObject *self, Py_ssize_t where, PyObject *v)
{
    Py_ssize_t i, n = Py_SIZE(self);
    PyObject **items;
    if (v == NULL) {
        PyErr_BadInternalCall();
        return -1;
    }
    if (n == PY_SSIZE_T_MAX) {
        PyErr_SetString(PyExc_OverflowError,
            "cannot add more objects to list");
        return -1;
    }

    if (list_resize(self, n+1) < 0)
        return -1;

    if (where < 0) {
        where += n;
        if (where < 0)
            where = 0;
    }
    if (where > n)
        where = n;
    items = self->ob_item;
    for (i = n; --i >= where; )
        items[i+1] = items[i];
    Py_INCREF(v);
    items[where] = v;
    return 0;
}

6.2.4. 删除元素

test = [1, 2, 3, 4, 5]
test.remove(3)

上面的Python代码会调用C中的list_remove方法。

// listobject.h
#define LIST_REMOVE_METHODDEF    \
    {"remove", (PyCFunction)list_remove, METH_O, list_remove__doc__},

list_remove方法遍历整个list,发现第一个匹配上的对象,调用list_ass_slice方法进行删除。

// listobject.c
static PyMethodDef list_methods[] = {
    // sth.
    LIST_REMOVE_METHODDEF
    // sth.
};

static PyObject *
list_remove(PyListObject *self, PyObject *value)
{
    Py_ssize_t i;

    for (i = 0; i < Py_SIZE(self); i++) {
        int cmp = PyObject_RichCompareBool(self->ob_item[i], value, Py_EQ);
        if (cmp > 0) {
            if (list_ass_slice(self, i, i+1,
                               (PyObject *)NULL) == 0)
                Py_RETURN_NONE;
            return NULL;
        }
        else if (cmp < 0)
            return NULL;
    }
    PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list");
    return NULL;
}

list_ass_slice并不是专门用于删除的方法,根据方法的注释可以看出,由于list_remove方法调用list_ass_slice,传入NULL,所以相当于是删除。

a[ilow:ihigh] = v if v != NULL.
del a[ilow:ihigh] if v == NULL.

list_ass_slice的实现主要是用memmove来移动内存。值得注意的是此处使用recycle_on_stack节省对堆上内存的使用。

// listobject.c
static int
list_ass_slice(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v)
{
    PyObject *recycle_on_stack[8];
    PyObject **recycle = recycle_on_stack;
    PyObject **item;
    PyObject **vitem = NULL;
    PyObject *v_as_SF = NULL;
    Py_ssize_t n; 
    Py_ssize_t norig;
    Py_ssize_t d;
    Py_ssize_t k;
    size_t s;
    int result = -1;
#define b ((PyListObject *)v)
    if (v == NULL)
        n = 0;
    else {
        // do sth.
    }
    if (ilow < 0)
        ilow = 0;
    else if (ilow > Py_SIZE(a))
        ilow = Py_SIZE(a);

    if (ihigh < ilow)
        ihigh = ilow;
    else if (ihigh > Py_SIZE(a))
        ihigh = Py_SIZE(a);

    norig = ihigh - ilow;
    assert(norig >= 0);
    d = n - norig;
    if (Py_SIZE(a) + d == 0) {
        Py_XDECREF(v_as_SF);
        return _list_clear(a);
    }
    item = a->ob_item;
    s = norig * sizeof(PyObject *);
    if (s) {
        if (s > sizeof(recycle_on_stack)) {
            recycle = (PyObject **)PyMem_MALLOC(s);
            if (recycle == NULL) {
                PyErr_NoMemory();
                goto Error;
            }
        }
        memcpy(recycle, &item[ilow], s);
    }

    if (d < 0) { /* Delete -d items */
        Py_ssize_t tail;
        tail = (Py_SIZE(a) - ihigh) * sizeof(PyObject *);
        memmove(&item[ihigh+d], &item[ihigh], tail);
        if (list_resize(a, Py_SIZE(a) + d) < 0) {
            memmove(&item[ihigh], &item[ihigh+d], tail);
            memcpy(&item[ilow], recycle, s);
            goto Error;
        }
        item = a->ob_item;
    }
    else if (d > 0) { /* Insert d items */
        k = Py_SIZE(a);
        if (list_resize(a, k+d) < 0)
            goto Error;
        item = a->ob_item;
        memmove(&item[ihigh+d], &item[ihigh],
            (k - ihigh)*sizeof(PyObject *));
    }
    for (k = 0; k < n; k++, ilow++) {
        PyObject *w = vitem[k];
        Py_XINCREF(w);
        item[ilow] = w;
    }
    for (k = norig - 1; k >= 0; --k)
        Py_XDECREF(recycle[k]);
    result = 0;
 Error:
    if (recycle != recycle_on_stack)
        PyMem_FREE(recycle);
    Py_XDECREF(v_as_SF);
    return result;
#undef b
}

6.2.5. 调整大小

无论是append、insert还是remove都会调用list_resize,重新调整list的allocated属性。list_resize源码如下:

// listobject.c
static int
list_resize(PyListObject *self, Py_ssize_t newsize)
{
    PyObject **items;
    size_t new_allocated, num_allocated_bytes;
    Py_ssize_t allocated = self->allocated;

    if (allocated >= newsize && newsize >= (allocated >> 1)) {
        assert(self->ob_item != NULL || newsize == 0);
        Py_SIZE(self) = newsize;
        return 0;
    }

    new_allocated = (size_t)newsize + (newsize >> 3) + (newsize < 9 ? 3 : 6);
    if (new_allocated > (size_t)PY_SSIZE_T_MAX / sizeof(PyObject *)) {
        PyErr_NoMemory();
        return -1;
    }

    if (newsize == 0)
        new_allocated = 0;
    num_allocated_bytes = new_allocated * sizeof(PyObject *);
    items = (PyObject **)PyMem_Realloc(self->ob_item, num_allocated_bytes);
    if (items == NULL) {
        PyErr_NoMemory();
        return -1;
    }
    self->ob_item = items;
    Py_SIZE(self) = newsize;
    self->allocated = new_allocated;
    return 0;
}

可以看出:

  • 如果new size小于等于allocated,且大于等于allocated/2,则不修改allocated,仅仅将list的size设置为new size;
  • 否则重新计算allocated 值并realloc;
    值得注意的是resize会扩大内存也会收缩内存。

6.3. List对象的特性

支持tp_as_sequence、tp_as_mapping两种操作。

6.3.1. 序列操作

// listobject.c
&list_as_sequence,                        /* tp_as_sequence */
// listobject.c
static PySequenceMethods list_as_sequence = {
    (lenfunc)list_length,              /* sq_length */
    (binaryfunc)list_concat,           /* sq_concat */
    (ssizeargfunc)list_repeat,         /* sq_repeat */
    (ssizeargfunc)list_item,           /* sq_item */
    0,                                 /* sq_slice */
    (ssizeobjargproc)list_ass_item,    /* sq_ass_item */
    0,                                 /* sq_ass_slice */
    (objobjproc)list_contains,         /* sq_contains */
    (binaryfunc)list_inplace_concat,   /* sq_inplace_concat */
    (ssizeargfunc)list_inplace_repeat, /* sq_inplace_repeat */
};

其中:

  • list_length
len([1, 2, 3, 4, 5])
  • list_concat
[0] + [1]
  • list_repeat
[0]*10
  • list_item:暂时没有找到相应Python语句;
  • list_ass_item
test = [0, 1, 2, 3, 4]
test[0] = 100
  • list_contains
test = [0, 1, 2]
0 in test
  • list_inplace_concat
test = [0, 1, 2]
test += [3]
  • list_inplace_repeat
test = [0, 1, 2]
test *= 10

6.3.2. 关联操作

// listobject.c
&list_as_mapping,                          /* tp_as_mapping */
// listobject.c
static PyMappingMethods list_as_mapping = {
    (lenfunc)list_length,
    (binaryfunc)list_subscript,
    (objobjargproc)list_ass_subscript
};

其中:

  • list_subscript
test = [0, 1, 2, 3, 4]
test[1]
test[0:3]

a[1]会走index case,a[0:3]会走slice case

  • list_ass_subscript
test = [0, 1, 2, 3, 4]
test[0] = 100
test[1:3] = [1000]

test[0]会走list_ass_subscript方法的index分支,test[1:3]会走slice分支;

6.3.3. to string

// listobject.c
(reprfunc)list_repr,                        /* tp_repr */
0,                                          /* tp_str */

6.3.4. hash

// listobject.c
PyObject_HashNotImplemented,                /* tp_hash */

6.3.5. 比较

// listobject.c
list_richcompare,                           /* tp_richcompare */

6.3.6. 内置方法

// listobject.c
list_methods,                              /* tp_methods */

6.4 参考

  • Python源码剖析
上一篇:cocoa编程第4版 8.6 挑战2 解答


下一篇:【学习排序】 Learning to Rank 中Listwise关于ListNet算法讲解及实现