6.1. List对象
List对象是“变长对象”。
6.1.1. Python中的创建
Python中List对象最重要的创建方法为PyList_New,如下Python语句最终会调用到PyList_New:
test = [1, 2, 3, 4, 5]
6.1.2. PyList_New的C调用栈
// pystate.c
PyInterpreterState_New
// ceval.c
=>_PyEval_EvalFrameDefault (case BUILD_LIST)
// listobject.c
=> PyList_New
6.1.3. PyList_New源码
// listobject.c
PyObject *
PyList_New(Py_ssize_t size)
{
PyListObject *op;
#ifdef SHOW_ALLOC_COUNT
static int initialized = 0;
if (!initialized) {
Py_AtExit(show_alloc);
initialized = 1;
}
#endif
if (size < 0) {
PyErr_BadInternalCall();
return NULL;
}
if (numfree) {
numfree--;
op = free_list[numfree];
_Py_NewReference((PyObject *)op);
#ifdef SHOW_ALLOC_COUNT
count_reuse++;
#endif
} else {
op = PyObject_GC_New(PyListObject, &PyList_Type);
if (op == NULL)
return NULL;
#ifdef SHOW_ALLOC_COUNT
count_alloc++;
#endif
}
if (size <= 0)
op->ob_item = NULL;
else {
op->ob_item = (PyObject **) PyMem_Calloc(size, sizeof(PyObject *));
if (op->ob_item == NULL) {
Py_DECREF(op);
return PyErr_NoMemory();
}
}
Py_SIZE(op) = size;
op->allocated = size;
_PyObject_GC_TRACK(op);
return (PyObject *) op;
}
可以看到:
- List对象数据结构:
// listobject.h
typedef struct {
PyObject_VAR_HEAD
PyObject **ob_item;
Py_ssize_t allocated;
} PyListObject;
ob_size和allocated并不一样,类似于STL中的vector的size和capacity。
- List对象缓冲池:只缓冲PyListObject对象指针,里面包含的ob_item全部被释放。List对象缓冲区大小为80。
// listobject.c
#ifndef PyList_MAXFREELIST
#define PyList_MAXFREELIST 80
#endif
static PyListObject *free_list[PyList_MAXFREELIST];
static int numfree = 0;
static void
list_dealloc(PyListObject *op)
{
Py_ssize_t i;
PyObject_GC_UnTrack(op);
Py_TRASHCAN_SAFE_BEGIN(op)
if (op->ob_item != NULL) {
/* Do it backwards, for Christian Tismer.
There's a simple test case where somehow this reduces
thrashing when a *very* large list is created and
immediately deleted. */
i = Py_SIZE(op);
while (--i >= 0) {
Py_XDECREF(op->ob_item[i]);
}
PyMem_FREE(op->ob_item);
}
if (numfree < PyList_MAXFREELIST && PyList_CheckExact(op))
free_list[numfree++] = op;
else
Py_TYPE(op)->tp_free((PyObject *)op);
Py_TRASHCAN_SAFE_END(op)
}
6.2. List对象的维护
6.2.1. 设置元素
test = [0, 1, 2, 3, 4]
test[0] = 100
上面的Python代码会调用C中的list_ass_subscript方法,由于是index case,所以最终会调用list_ass_item方法。需要注意,此处将v的引用计数加1,并且将之前的引用减1。
// listobject.c
static PySequenceMethods list_as_sequence = {
(lenfunc)list_length, /* sq_length */
(binaryfunc)list_concat, /* sq_concat */
(ssizeargfunc)list_repeat, /* sq_repeat */
(ssizeargfunc)list_item, /* sq_item */
0, /* sq_slice */
(ssizeobjargproc)list_ass_item, /* sq_ass_item */
0, /* sq_ass_slice */
(objobjproc)list_contains, /* sq_contains */
(binaryfunc)list_inplace_concat, /* sq_inplace_concat */
(ssizeargfunc)list_inplace_repeat, /* sq_inplace_repeat */
};
static int
list_ass_item(PyListObject *a, Py_ssize_t i, PyObject *v)
{
if (i < 0 || i >= Py_SIZE(a)) {
PyErr_SetString(PyExc_IndexError,
"list assignment index out of range");
return -1;
}
if (v == NULL)
return list_ass_slice(a, i, i+1, v);
Py_INCREF(v);
Py_SETREF(a->ob_item[i], v);
return 0;
}
Py_SETREF宏定义如下:
// object.h
#define Py_SETREF(op, op2) \
do { \
PyObject *_py_tmp = (PyObject *)(op); \
(op) = (op2); \
Py_DECREF(_py_tmp); \
} while (0)
6.2.2. 追加元素
test = []
test.append(1000)
上面的Python代码会调用C中的list_append方法,最终会调用app1方法。
// listobject.h
#define LIST_APPEND_METHODDEF \
{"append", (PyCFunction)list_append, METH_O, list_append__doc__},
#define PyList_SET_ITEM(op, i, v) (((PyListObject *)(op))->ob_item[i] = (v))
#define PyList_GET_SIZE(op) (assert(PyList_Check(op)),Py_SIZE(op))
app1方法中获取List对象里的ob_item长度,调整ob_item大小,对传入的v增加引用,并设置第n位为v。
// listobject.c
static PyMethodDef list_methods[] = {
// sth.
LIST_APPEND_METHODDEF
// sth.
};
static PyObject *
list_append(PyListObject *self, PyObject *object)
{
if (app1(self, object) == 0)
Py_RETURN_NONE;
return NULL;
}
static int
app1(PyListObject *self, PyObject *v)
{
Py_ssize_t n = PyList_GET_SIZE(self);
assert (v != NULL);
if (n == PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError,
"cannot add more objects to list");
return -1;
}
if (list_resize(self, n+1) < 0)
return -1;
Py_INCREF(v);
PyList_SET_ITEM(self, n, v);
return 0;
}
6.2.3. 插入元素
test = [1, 2, 3, 4, 5]
test.insert(0, 10)
test.insert(-1, 100)
上面的Python代码会调用C中的list_insert方法,最终会调用ins1方法。
// listobject.h
#define LIST_INSERT_METHODDEF \
{"insert", (PyCFunction)list_insert, METH_FASTCALL, list_insert__doc__},
static PyObject *
list_insert_impl(PyListObject *self, Py_ssize_t index, PyObject *object);
static PyObject *
list_insert(PyListObject *self, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
Py_ssize_t index;
PyObject *object;
if (!_PyArg_ParseStack(args, nargs, "nO:insert",
&index, &object)) {
goto exit;
}
return_value = list_insert_impl(self, index, object);
exit:
return return_value;
}
在ins1方法中调整ob_item大小,计算插入位置,对插入位置进行容错处理,将插入位置之后的元素全部向后移一位,对传入的v增加引用,并设置第where位为v。插入元素支持传入负值。
// listobject.c
static PyMethodDef list_methods[] = {
// sth.
LIST_INSERT_METHODDEF
// sth.
};
static PyObject *
list_insert_impl(PyListObject *self, Py_ssize_t index, PyObject *object)
{
if (ins1(self, index, object) == 0)
Py_RETURN_NONE;
return NULL;
}
static int
ins1(PyListObject *self, Py_ssize_t where, PyObject *v)
{
Py_ssize_t i, n = Py_SIZE(self);
PyObject **items;
if (v == NULL) {
PyErr_BadInternalCall();
return -1;
}
if (n == PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError,
"cannot add more objects to list");
return -1;
}
if (list_resize(self, n+1) < 0)
return -1;
if (where < 0) {
where += n;
if (where < 0)
where = 0;
}
if (where > n)
where = n;
items = self->ob_item;
for (i = n; --i >= where; )
items[i+1] = items[i];
Py_INCREF(v);
items[where] = v;
return 0;
}
6.2.4. 删除元素
test = [1, 2, 3, 4, 5]
test.remove(3)
上面的Python代码会调用C中的list_remove方法。
// listobject.h
#define LIST_REMOVE_METHODDEF \
{"remove", (PyCFunction)list_remove, METH_O, list_remove__doc__},
list_remove方法遍历整个list,发现第一个匹配上的对象,调用list_ass_slice方法进行删除。
// listobject.c
static PyMethodDef list_methods[] = {
// sth.
LIST_REMOVE_METHODDEF
// sth.
};
static PyObject *
list_remove(PyListObject *self, PyObject *value)
{
Py_ssize_t i;
for (i = 0; i < Py_SIZE(self); i++) {
int cmp = PyObject_RichCompareBool(self->ob_item[i], value, Py_EQ);
if (cmp > 0) {
if (list_ass_slice(self, i, i+1,
(PyObject *)NULL) == 0)
Py_RETURN_NONE;
return NULL;
}
else if (cmp < 0)
return NULL;
}
PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list");
return NULL;
}
list_ass_slice并不是专门用于删除的方法,根据方法的注释可以看出,由于list_remove方法调用list_ass_slice,传入NULL,所以相当于是删除。
a[ilow:ihigh] = v if v != NULL.
del a[ilow:ihigh] if v == NULL.
list_ass_slice的实现主要是用memmove来移动内存。值得注意的是此处使用recycle_on_stack节省对堆上内存的使用。
// listobject.c
static int
list_ass_slice(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v)
{
PyObject *recycle_on_stack[8];
PyObject **recycle = recycle_on_stack;
PyObject **item;
PyObject **vitem = NULL;
PyObject *v_as_SF = NULL;
Py_ssize_t n;
Py_ssize_t norig;
Py_ssize_t d;
Py_ssize_t k;
size_t s;
int result = -1;
#define b ((PyListObject *)v)
if (v == NULL)
n = 0;
else {
// do sth.
}
if (ilow < 0)
ilow = 0;
else if (ilow > Py_SIZE(a))
ilow = Py_SIZE(a);
if (ihigh < ilow)
ihigh = ilow;
else if (ihigh > Py_SIZE(a))
ihigh = Py_SIZE(a);
norig = ihigh - ilow;
assert(norig >= 0);
d = n - norig;
if (Py_SIZE(a) + d == 0) {
Py_XDECREF(v_as_SF);
return _list_clear(a);
}
item = a->ob_item;
s = norig * sizeof(PyObject *);
if (s) {
if (s > sizeof(recycle_on_stack)) {
recycle = (PyObject **)PyMem_MALLOC(s);
if (recycle == NULL) {
PyErr_NoMemory();
goto Error;
}
}
memcpy(recycle, &item[ilow], s);
}
if (d < 0) { /* Delete -d items */
Py_ssize_t tail;
tail = (Py_SIZE(a) - ihigh) * sizeof(PyObject *);
memmove(&item[ihigh+d], &item[ihigh], tail);
if (list_resize(a, Py_SIZE(a) + d) < 0) {
memmove(&item[ihigh], &item[ihigh+d], tail);
memcpy(&item[ilow], recycle, s);
goto Error;
}
item = a->ob_item;
}
else if (d > 0) { /* Insert d items */
k = Py_SIZE(a);
if (list_resize(a, k+d) < 0)
goto Error;
item = a->ob_item;
memmove(&item[ihigh+d], &item[ihigh],
(k - ihigh)*sizeof(PyObject *));
}
for (k = 0; k < n; k++, ilow++) {
PyObject *w = vitem[k];
Py_XINCREF(w);
item[ilow] = w;
}
for (k = norig - 1; k >= 0; --k)
Py_XDECREF(recycle[k]);
result = 0;
Error:
if (recycle != recycle_on_stack)
PyMem_FREE(recycle);
Py_XDECREF(v_as_SF);
return result;
#undef b
}
6.2.5. 调整大小
无论是append、insert还是remove都会调用list_resize,重新调整list的allocated属性。list_resize源码如下:
// listobject.c
static int
list_resize(PyListObject *self, Py_ssize_t newsize)
{
PyObject **items;
size_t new_allocated, num_allocated_bytes;
Py_ssize_t allocated = self->allocated;
if (allocated >= newsize && newsize >= (allocated >> 1)) {
assert(self->ob_item != NULL || newsize == 0);
Py_SIZE(self) = newsize;
return 0;
}
new_allocated = (size_t)newsize + (newsize >> 3) + (newsize < 9 ? 3 : 6);
if (new_allocated > (size_t)PY_SSIZE_T_MAX / sizeof(PyObject *)) {
PyErr_NoMemory();
return -1;
}
if (newsize == 0)
new_allocated = 0;
num_allocated_bytes = new_allocated * sizeof(PyObject *);
items = (PyObject **)PyMem_Realloc(self->ob_item, num_allocated_bytes);
if (items == NULL) {
PyErr_NoMemory();
return -1;
}
self->ob_item = items;
Py_SIZE(self) = newsize;
self->allocated = new_allocated;
return 0;
}
可以看出:
- 如果new size小于等于allocated,且大于等于allocated/2,则不修改allocated,仅仅将list的size设置为new size;
- 否则重新计算allocated 值并realloc;
值得注意的是resize会扩大内存也会收缩内存。
6.3. List对象的特性
支持tp_as_sequence、tp_as_mapping两种操作。
6.3.1. 序列操作
// listobject.c
&list_as_sequence, /* tp_as_sequence */
// listobject.c
static PySequenceMethods list_as_sequence = {
(lenfunc)list_length, /* sq_length */
(binaryfunc)list_concat, /* sq_concat */
(ssizeargfunc)list_repeat, /* sq_repeat */
(ssizeargfunc)list_item, /* sq_item */
0, /* sq_slice */
(ssizeobjargproc)list_ass_item, /* sq_ass_item */
0, /* sq_ass_slice */
(objobjproc)list_contains, /* sq_contains */
(binaryfunc)list_inplace_concat, /* sq_inplace_concat */
(ssizeargfunc)list_inplace_repeat, /* sq_inplace_repeat */
};
其中:
- list_length
len([1, 2, 3, 4, 5])
- list_concat
[0] + [1]
- list_repeat
[0]*10
- list_item:暂时没有找到相应Python语句;
- list_ass_item
test = [0, 1, 2, 3, 4]
test[0] = 100
- list_contains
test = [0, 1, 2]
0 in test
- list_inplace_concat
test = [0, 1, 2]
test += [3]
- list_inplace_repeat
test = [0, 1, 2]
test *= 10
6.3.2. 关联操作
// listobject.c
&list_as_mapping, /* tp_as_mapping */
// listobject.c
static PyMappingMethods list_as_mapping = {
(lenfunc)list_length,
(binaryfunc)list_subscript,
(objobjargproc)list_ass_subscript
};
其中:
- list_subscript
test = [0, 1, 2, 3, 4]
test[1]
test[0:3]
a[1]会走index case,a[0:3]会走slice case
- list_ass_subscript
test = [0, 1, 2, 3, 4]
test[0] = 100
test[1:3] = [1000]
test[0]会走list_ass_subscript方法的index分支,test[1:3]会走slice分支;
6.3.3. to string
// listobject.c
(reprfunc)list_repr, /* tp_repr */
0, /* tp_str */
6.3.4. hash
// listobject.c
PyObject_HashNotImplemented, /* tp_hash */
6.3.5. 比较
// listobject.c
list_richcompare, /* tp_richcompare */
6.3.6. 内置方法
// listobject.c
list_methods, /* tp_methods */
6.4 参考
- Python源码剖析