3.1. 整数对象
整数对象是“变长对象”。
3.1.1. Python中的创建
Python中整数对象最重要的创建方法为PyLong_FromLong,如下Python语句最终会调用到PyLong_FromLong:
a = 1
b = int(1)
3.1.2. PyLong_FromLong的C调用栈
词法解析,最终调到PyLong_FromLong,调用顺序如下:
// ast.c
ast_for_expr
=>ast_for_power
=>ast_for_atom_expr
=>ast_for_atom (case NUMBER)
=>parsenumber
=>parsenumber_raw
// longobject.c
=> PyLong_FromLong
3.1.3. PyLong_FromLong源码
// longobject.c
PyObject *
PyLong_FromLong(long ival)
{
PyLongObject *v;
unsigned long abs_ival;
unsigned long t; /* unsigned so >> doesn't propagate sign bit */
int ndigits = 0;
int sign;
CHECK_SMALL_INT(ival);
if (ival < 0) {
/* negate: can't write this as abs_ival = -ival since that invokes undefined behaviour when ival is LONG_MIN */
abs_ival = 0U-(unsigned long)ival;
sign = -1;
}
else {
abs_ival = (unsigned long)ival;
sign = ival == 0 ? 0 : 1;
}
/* Fast path for single-digit ints */
if (!(abs_ival >> PyLong_SHIFT)) {
v = _PyLong_New(1);
if (v) {
Py_SIZE(v) = sign;
v->ob_digit[0] = Py_SAFE_DOWNCAST(
abs_ival, unsigned long, digit);
}
return (PyObject*)v;
}
#if PyLong_SHIFT==15
// 64位机器不会调用此处代码
#endif
/* Larger numbers: loop to determine number of digits */
t = abs_ival;
while (t) {
++ndigits;
t >>= PyLong_SHIFT;
}
v = _PyLong_New(ndigits);
if (v != NULL) {
digit *p = v->ob_digit;
Py_SIZE(v) = ndigits*sign;
t = abs_ival;
while (t) {
*p++ = Py_SAFE_DOWNCAST(
t & PyLong_MASK, unsigned long, digit);
t >>= PyLong_SHIFT;
}
}
return (PyObject *)v;
}
3.1.4. 整数对象的C数据结构
此处使用了C语言的技巧:把单一元素的数组放在一个struct的尾端,每个struct objects可以拥有可变大小的数组(深度探索C++对象模型 P19)。
// longobject.c
typedef struct _longobject PyLongObject;
// longintrepr.h
struct _longobject {
PyObject_VAR_HEAD
digit ob_digit[1];
};
从数据结构也可以看出Python中的整数对象是“变长对象”。
3.1.5. 小整数和大整数
从上述源码中可以看到,整数分为小整数、介于小整数和大整数之间的整数和大整数三类,处理不尽相同。对于负数,转成正数,并记录符号。
- 小整数:
CHECK_SMALL_INT用于处理小整数,[-5, 257)在Python中被视为小整数。
// longobject.c
#ifndef NSMALLPOSINTS
#define NSMALLPOSINTS 257
#endif
#ifndef NSMALLNEGINTS
#define NSMALLNEGINTS 5
#endif
small_ints在_PyLong_Init中被初始化:
// longobject.c
static PyLongObject small_ints[NSMALLNEGINTS + NSMALLPOSINTS];
如果在[-5, 257)范围内,会直接返回存于small_ints的对象,所以小整数只会存在一个实例:
// longobject.c
static PyObject *
get_small_int(sdigit ival)
{
PyObject *v;
assert(-NSMALLNEGINTS <= ival && ival < NSMALLPOSINTS);
v = (PyObject *)&small_ints[ival + NSMALLNEGINTS];
Py_INCREF(v);
#ifdef COUNT_ALLOCS
if (ival >= 0)
quick_int_allocs++;
else
quick_neg_int_allocs++;
#endif
return v;
}
- 介于小整数和大整数之间的整数:[257, 1073741824)
// longobject.c
/* Fast path for single-digit ints */
if (!(abs_ival >> PyLong_SHIFT)) {
v = _PyLong_New(1);
if (v) {
Py_SIZE(v) = sign;
v->ob_digit[0] = Py_SAFE_DOWNCAST(
abs_ival, unsigned long, digit);
}
return (PyObject*)v;
}
- 大整数:[1073741824, +∞)
// longobject.c
t = abs_ival;
while (t) {
++ndigits;
t >>= PyLong_SHIFT;
}
v = _PyLong_New(ndigits);
if (v != NULL) {
digit *p = v->ob_digit;
Py_SIZE(v) = ndigits*sign;
t = abs_ival;
while (t) {
*p++ = Py_SAFE_DOWNCAST(
t & PyLong_MASK, unsigned long, digit);
t >>= PyLong_SHIFT;
}
}
return (PyObject *)v;
Python的整数不再有限制。
The sys.maxint constant was removed, since there is no longer a limit to the value of integers. However, sys.maxsize can be used as an integer larger than any practical list or string index. It conforms to the implementation’s “natural” integer size and is typically the same as sys.maxint in previous releases on the same platform (assuming the same build options).
3.1.6. PyLong_FromLong中其它需要关注的
- _PyLong_New,此方法需要注意内存分配计算方式,以及最后初始化变长变量调用了PyObject_INIT_VAR方法:
// longobject.c
PyLongObject *
_PyLong_New(Py_ssize_t size)
{
PyLongObject *result;
/* Number of bytes needed is: offsetof(PyLongObject, ob_digit) + sizeof(digit)*size. Previous incarnations of this code used sizeof(PyVarObject) instead of the offsetof, but this risks being incorrect in the presence of padding between the PyVarObject header and the digits. */
if (size > (Py_ssize_t)MAX_LONG_DIGITS) {
PyErr_SetString(PyExc_OverflowError,
"too many digits in integer");
return NULL;
}
result = PyObject_MALLOC(offsetof(PyLongObject, ob_digit) + size*sizeof(digit));
if (!result) {
PyErr_NoMemory();
return NULL;
}
return (PyLongObject*)PyObject_INIT_VAR(result, &PyLong_Type, size);
}
- PyObject_INIT_VAR,初始化变长变量:
// object.h
#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
#define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)
#define _Py_NewReference(op) ( \
_Py_INC_TPALLOCS(op) _Py_COUNT_ALLOCS_COMMA \
_Py_INC_REFTOTAL _Py_REF_DEBUG_COMMA \
Py_REFCNT(op) = 1)
// objimpl.h
#define PyObject_INIT(op, typeobj) \
( Py_TYPE(op) = (typeobj), _Py_NewReference((PyObject *)(op)), (op) )
#define PyObject_INIT_VAR(op, typeobj, size) \
( Py_SIZE(op) = (size), PyObject_INIT((op), (typeobj)) )
release版本下_Py_INC_TPALLOCS、_Py_COUNT_ALLOCS_COMMA、_Py_INC_REFTOTAL、_Py_REF_DEBUG_COMMA都没有定义,所以PyObject_INIT_VAR其实就是将PyVarObject对象的ob_size、ob_type和ob_refcnt进行了赋值。
- Py_SAFE_DOWNCAST,只是一个强转:
#ifdef Py_DEBUG
#define Py_SAFE_DOWNCAST(VALUE, WIDE, NARROW) \
(assert((WIDE)(NARROW)(VALUE) == (VALUE)), (NARROW)(VALUE))
#else
#define Py_SAFE_DOWNCAST(VALUE, WIDE, NARROW) (NARROW)(VALUE)
#endif
3.2. 整数对象的特性
3.2.1. 数值计算
// longobject.c
&long_as_number, /* tp_as_number */
整数对象的数值计算由long_as_number定义:
// longobject.c
static PyNumberMethods long_as_number = {
(binaryfunc)long_add, /*nb_add*/
(binaryfunc)long_sub, /*nb_subtract*/
(binaryfunc)long_mul, /*nb_multiply*/
long_mod, /*nb_remainder*/
long_divmod, /*nb_divmod*/
long_pow, /*nb_power*/
(unaryfunc)long_neg, /*nb_negative*/
(unaryfunc)long_long, /*tp_positive*/
(unaryfunc)long_abs, /*tp_absolute*/
(inquiry)long_bool, /*tp_bool*/
(unaryfunc)long_invert, /*nb_invert*/
long_lshift, /*nb_lshift*/
(binaryfunc)long_rshift, /*nb_rshift*/
long_and, /*nb_and*/
long_xor, /*nb_xor*/
long_or, /*nb_or*/
long_long, /*nb_int*/
0, /*nb_reserved*/
long_float, /*nb_float*/
0, /* nb_inplace_add */
0, /* nb_inplace_subtract */
0, /* nb_inplace_multiply */
0, /* nb_inplace_remainder */
0, /* nb_inplace_power */
0, /* nb_inplace_lshift */
0, /* nb_inplace_rshift */
0, /* nb_inplace_and */
0, /* nb_inplace_xor */
0, /* nb_inplace_or */
long_div, /* nb_floor_divide */
long_true_divide, /* nb_true_divide */
0, /* nb_inplace_floor_divide */
0, /* nb_inplace_true_divide */
long_long, /* nb_index */
};
3.2.2. to string
// longobject.c
long_to_decimal_string, /* tp_repr */
long_to_decimal_string, /* tp_str */
3.2.3. hash
// longobject.c
(hashfunc)long_hash, /* tp_hash */
3.2.4. 比较
// longobject.c
long_richcompare, /* tp_richcompare */
3.2.5. 内置方法
// longobject.c
long_methods, /* tp_methods */
3.2.6. 内置属性
// longobject.c
long_getset, /* tp_getset */