Numpy数据处理函数

Numpy函数介绍

import numpy as np
#sqrt 计算各元素的平方根
arr = np.arange(10)
np.sqrt(arr)
array([0. , 1. , 1.41421356, 1.73205081, 2. ,
2.23606798, 2.44948974, 2.64575131, 2.82842712, 3. ]) # square 计算各元素的平方
arr1 = np.arange(10)
np.square(arr1)
array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81], dtype=int32) # modf将数组的小数和整数部分以两个独立数组的形式返回
arr2 = np.array([1.22,3.55])
np.modf(arr2)
(array([0.22, 0.55]), array([1., 3.]))

利用数组进行数据处理

Numpy数组表达式代替循环,可以比等价的Python快上一两个数量级,这就是矢量化计算的强大手段。

points = np.arange(-5,5,0.01) # 1000个间隔相等的点
xs,ys = np.meshgrid(points,points)
xs
array([[-5. , -4.99, -4.98, ..., 4.97, 4.98, 4.99],
[-5. , -4.99, -4.98, ..., 4.97, 4.98, 4.99],
[-5. , -4.99, -4.98, ..., 4.97, 4.98, 4.99],
...,
[-5. , -4.99, -4.98, ..., 4.97, 4.98, 4.99],
[-5. , -4.99, -4.98, ..., 4.97, 4.98, 4.99],
[-5. , -4.99, -4.98, ..., 4.97, 4.98, 4.99]])
ys
array([[-5. , -5. , -5. , ..., -5. , -5. , -5. ],
[-4.99, -4.99, -4.99, ..., -4.99, -4.99, -4.99],
[-4.98, -4.98, -4.98, ..., -4.98, -4.98, -4.98],
...,
[ 4.97, 4.97, 4.97, ..., 4.97, 4.97, 4.97],
[ 4.98, 4.98, 4.98, ..., 4.98, 4.98, 4.98],
[ 4.99, 4.99, 4.99, ..., 4.99, 4.99, 4.99]]) z = np.sqrt(xs ** 2 + ys ** 2)
z
array([[7.07106781, 7.06400028, 7.05693985, ..., 7.04988652, 7.05693985,
7.06400028],
[7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815,
7.05692568],
[7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354,
7.04985815],
...,
[7.04988652, 7.04279774, 7.03571603, ..., 7.0286414 , 7.03571603,
7.04279774],
[7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354,
7.04985815],
[7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815,
7.05692568]]) import matplotlib.pyplot as plt
plt.imshow(z, cmap = plt.cm.gray);plt.colorbar()
plt.title("Image plot")

Numpy数据处理函数

将条件逻辑表述为数组运算

xarr = np.array([1.1,1.2,1.3,1.4,1.5])
yarr = np.array([2.1,2.2,2.3,2.4,2.5])
cond = np.array([True,False,True,True,False])
# cond为True取xarr 否则取yarr
# 传统写法
result = [(x if c else y) for x,y,c in zip(xarr,yarr,cond)]
result
[1.1, 2.2, 1.3, 1.4, 2.5] # numpy做法
result1 = np.where(cond,xarr,yarr)
result1
array([1.1, 2.2, 1.3, 1.4, 2.5])

where闪亮登场

  • np.where(条件,真值,假值)
  • 传递给where的数组大小可以不相等,甚至是标量值
arr3 = np.random.randn(4, 4)
arr3
array([[ 0.6498161 , 0.35784392, -1.47023858, 1.09367264],
[-0.62756846, 0.23898718, 1.41371883, 0.48955242],
[-0.10017446, 0.24327529, 0.04354429, 0.80346031],
[-0.74234979, -0.11921036, -0.11432723, -0.37912988]]) result2 = np.where(arr3>0,2,-2)
result2
array([[ 2, 2, -2, 2],
[-2, 2, 2, 2],
[-2, 2, 2, 2],
[-2, -2, -2, -2]])
result3 = np.where(arr3>0,2,arr3)
result3
array([[ 2. , 2. , -1.47023858, 2. ],
[-0.62756846, 2. , 2. , 2. ],
[-0.10017446, 2. , 2. , 2. ],
[-0.74234979, -0.11921036, -0.11432723, -0.37912988]]) cond1 = np.array([True,True,False,False])
cond2 = np.array([True,False,True,False]) # 如果cond1和cond2都为真,则输出0,如果cond1为真,则输出1,如果cond2为真,则输出2,如果都是为假,则输出3
result4 = np.where(cond1 & cond2, 0, np.where(cond1, 1, np.where(cond2, 2, 3)))
result4
array([0, 1, 2, 3])

Numpy数据处理函数

数学和统计方法

  • sum/mean/std等聚合计算(和,平均值,标准差)
test1 = np.array([[2,2,3,4,5],[6,7,8,9,10]])
test1 array([[ 2, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10]])
np.mean(test1)
5.5 test1.sum()
55 test1.mean(axis=1)
array([3., 8.]) test1.mean(axis=0)
array([3.5, 4.5, 5.5, 6.5, 7.5]) test1.cumsum(0) # cumsum 所有元素的累计和 cumprod 所有元素的累计积
array([[ 2, 2, 3, 4, 5],
[ 8, 9, 11, 13, 15]], dtype=int32)
- axis轴,指的是维度 test1.std(axis=0) # 标准差
array([2. , 2.5, 2.5, 2.5, 2.5]) test1.var(axis=0) # 方差
array([4. , 6.25, 6.25, 6.25, 6.25]) test1.argmin(axis=0)
array([0, 0, 0, 0, 0], dtype=int64) test1.argmax(axis=0)
array([1, 1, 1, 1, 1], dtype=int64)

用于布尔型数组的方法

test2 = np.random.randn(100)
test2
array([ 0.25903273, 0.13939567, -0.10597059, 0.77790221, -0.76341781,
-0.61086865, 0.36002937, -0.1423488 , 1.69071728, 1.63576044,
0.39950296, 1.37198449, 0.454591 , -1.67927663, 0.73649581,
0.88126353, -1.82159175, -0.46323513, -0.30399076, 2.16435963,
-0.79892847, 0.05767935, 0.45429729, 0.96934967, -0.78818112,
-0.8438922 , 0.31373184, 0.05242094, 0.2332054 , 0.26647064,
1.56850088, 0.41425585, -1.62452194, -1.17165311, 0.23586585,
0.45476575, -0.57501697, 1.42377017, 0.00666962, 1.53916711,
0.508553 , -1.37573917, 0.51378532, 1.72682708, -0.76148258,
-1.19819233, -1.05367328, 1.0792924 , 0.80229908, 1.03273504,
0.71938515, 0.28893472, -0.08472809, 1.02170717, 0.03897593,
-0.0693723 , -0.60612239, -0.35538122, -1.09975843, 0.23485432,
-0.4513678 , -0.8119979 , -0.53072714, 1.02247374, 0.52980399,
-1.17365366, -0.4948684 , -0.81596822, 1.10386231, -1.10894077,
1.33491691, 0.21015349, -0.32206128, -0.33041407, -0.06815369,
2.27874416, -0.26642346, -0.95616127, -1.38222481, -0.89619146,
2.70433 , -1.8758817 , -1.61408998, -0.70112051, 0.63143197,
-0.5937125 , -0.82650637, 1.24456287, -0.61903984, -0.45140393,
0.25139079, -0.18882441, -0.61667939, 0.84566077, -1.08506887,
-0.45491845, -1.68915454, 0.58872177, -0.30961048, -0.43431663]) (test2 > 0).sum()
49 # any用于检测数组中是否存在True
bools = np.array([False,False,False,False])
bools.any()
False # all用于检测数组中所有值是否都是True
bools.all()
False

排序

sort()方法直接修改数组本身

test2 = np.array([11,55,33,44,88])
test2
array([11, 55, 33, 44, 88]) test2.sort()
test2
array([11, 33, 44, 55, 88]) test4 = np.array([[82,222,1,4,5],[62,72,8,93,10]])
test4
array([[ 82, 222, 1, 4, 5],
[ 62, 72, 8, 93, 10]]) # 最终的值排序
test4.sort(1)
test4
array([[ 1, 4, 5, 62, 72],
[ 8, 10, 82, 93, 222]]) # 两个位于同一0维度的值排序
test4.sort(0)
test4 array([[ 1, 4, 5, 62, 72],
[ 8, 10, 82, 93, 222]])

唯一化以及其他的集合逻辑

np.unique找出唯一值并返回已排序的结果

names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])
np.unique(names)
array(['Bob', 'Joe', 'Will'], dtype='<U4')

是1不是l

方法 说明
unique(x) 计算x中的唯一元素,并返回有序结果
intersect1d(x,y) x和y的公共元素
union1d(x,y) 计算x和y的并集
in1d(x,y) 得到一个表示"x的元素是否包含于y"的布尔型数组
setdiff1d(x,y) 集合的差,即元素在x中且不再y中
setxor1d(x,y) 集合的对称差,存在一个数组中,但不同时存在于两个数组中的元素
values = np.array([6,0,0,3,2,5,6])
np.in1d(values,[2,3,6])
array([ True, False, False, True, True, False, True])
上一篇:Tp-validate进阶


下一篇:Flask简介,安装,demo,快速入门