PyTorch深度学习基础
一、Tensor对象及其运算
1、torch.tensor()创建Tensor对象
使用tensor数据类型时,可以通过dtype属性指定它的数据类型
print('torch.Tensor 默认为:{}'.format(torch.Tensor(1).dtype))
print('torch.tensor 默认为:{}'.format(torch.tensor(1).dtype))
'''
torch.Tensor 默认为:torch.float32
torch.tensor 默认为:torch.int64
'''
# 用list构建tensor
a = torch.tensor([[1, 2], [3, 4]], dtype=torch.float64)
print(a)
'''
tensor([[1., 2.],
[3., 4.]], dtype=torch.float64)
'''
# 用np.array构建tensor
b = torch.tensor(np.array([[1, 2], [3, 4]]), dtype=torch.uint8)
print(b)
'''
tensor([[1, 2],
[3, 4]], dtype=torch.uint8)
'''
'''
2、torch.device()指定设备
# 通过device指定设备
cuda0 = torch.device('cuda:0')
c = torch.ones((2, 2), device=cuda0)
# print(c)
'''
tensor([[1., 1.],
[1., 1.]], device='cuda:0')
'''
3、torch.mm执行矩阵乘法
a = torch.tensor([[1, 2], [3, 4]])
b = torch.tensor([[1, 2], [3, 4]])
# 逐元素相乘
c = a * b
print("逐元素相乘:", c)
'''
逐元素相乘: tensor([[ 1, 4],
[ 9, 16]])
'''
# 矩阵相乘
c = torch.mm(a, b)
print("矩阵相乘", c)
'''
矩阵相乘: tensor([[ 7, 10],
[15, 22]])
'''
4、torch.clamp分段函数
常用于过滤矩阵中过小或过大的元素
a = torch.tensor([[1, 2], [3, 4]])
print(torch.clamp(a, min=2, max=3))
'''tensor([[2, 2],
[3, 3]])'''
5、torch.round将小数化整
a = torch.tensor([-1.501, -1.5, -1.499, 0.499, 0.5, 0.501])
print(torch.round(a))
'''
tensor([-2., -2., -1., 0., 0., 1.])
大于0.5则为1, 小于等于0.5则为0
'''
6、torch.tanh计算双曲正切函数
a = torch.Tensor([-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5])
print(torch.tanh(a))
'''
tensor([-0.9999, -0.9993, -0.9951, -0.9640, -0.7616, 0.0000, 0.7616, 0.9640,
0.9951, 0.9993, 0.9999])
'''
7、torch.arange
- torch.arange和python中的range函数用法相同
- 接收参数为起始值、结束值、步长(默认为1)
for i in range(1, 6):
print(i)
'''
1
2
3
4
5
'''
a = torch.range(1, 5, 2)
print("torch.range(1, 5, 2): ", a)
print("torch.range(1, 5, 2).type: ", a.dtype)
b = torch.arange(1, 5, 2)
print("torch.arange(1, 5, 2): ", b)
print("torch.arange(1, 5, 2).type: ", b.dtype)
'''
torch.range(1, 5, 2): tensor([1., 3., 5.])
torch.range(1, 5, 2).type: torch.float32
torch.arange(1, 5, 2): tensor([1, 3])
torch.arange(1, 5, 2).type: torch.int64
'''
总结:
- torch.range是包含头尾的,而torch.arange是不包含尾的(和python的range函数一样)
- torch.range创建的tensor类型是torch.float32,而torch.arange创建的tensor类型是torch.int64
8、torch.linspace在指定范围内生成指定个数元素的
torch.linspace生成含指定头尾的n个参数的一维tensor
print(torch.linspace(1, 10, 5))
'''
tensor([ 1.0000, 3.2500, 5.5000, 7.7500, 10.0000])
'''
9、torch.ones返回全1矩阵
print(torch.ones((3, 3)))
'''
tensor([[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.]])
'''
10、torch.zeros返回全零矩阵
print(torch.zeros((3, 3)))
'''
tensor([[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.]])
'''
11、torch.rand返回[0,1]均匀分布采样元素组成的矩阵
print(torch.rand(3, 3))
'''
tensor([[0.4122, 0.6305, 0.1612],
[0.4039, 0.0532, 0.3719],
[0.4255, 0.2876, 0.3646]])
'''
12、torch.randn返回正态分布采样元素组成的矩阵
print(torch.randn(3, 3))
'''
tensor([[ 0.4774, 0.4414, 1.5994],
[-0.0403, 0.5516, 1.1847],
[ 1.0757, 0.0362, 0.9328]])
'''
13、torch.randint返回指定区间均匀分布采样的随机整数组成的矩阵
print(torch.randint(1, 5, (3, 3)))
'''
tensor([[3, 2, 2],
[3, 1, 1],
[4, 3, 4]])
'''
二、Tensor的索引和切片
1、基本索引
a = torch.arange(16).view(4, 4)
print(a)
'''
tensor([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15]])
'''
print(a[2, 3])
'''tensor(11)'''
2、切片
a = torch.rand(3, 3, 3, 3)
print(a)
'''
tensor([[[[0.7442, 0.6572, 0.8844],
[0.2714, 0.9543, 0.7813],
[0.2124, 0.6246, 0.2005]],
[[0.5282, 0.0026, 0.3609],
[0.6771, 0.5858, 0.9272],
[0.1732, 0.5411, 0.8177]],
[[0.4464, 0.2875, 0.5549],
[0.2754, 0.7300, 0.8469],
[0.6238, 0.5733, 0.5625]]],
[[[0.5165, 0.3800, 0.0851],
[0.0991, 0.7287, 0.9255],
[0.9094, 0.1059, 0.2375]],
[[0.8601, 0.7303, 0.3683],
[0.3715, 0.1885, 0.6126],
[0.5266, 0.4642, 0.8999]],
[[0.4903, 0.1294, 0.0379],
[0.5235, 0.2812, 0.2052],
[0.3850, 0.2854, 0.8109]]],
[[[0.4168, 0.4594, 0.1869],
[0.2987, 0.7347, 0.3478],
[0.1939, 0.3430, 0.8521]],
[[0.7731, 0.8609, 0.3416],
[0.9221, 0.2054, 0.4342],
[0.6090, 0.6602, 0.1535]],
[[0.4223, 0.2500, 0.3425],
[0.7285, 0.1590, 0.2787],
[0.8561, 0.1645, 0.3753]]]])
# '''
# 取第一个维度的前两个数据
print(a[:2])
'''
tensor([[[[0.7442, 0.6572, 0.8844],
[0.2714, 0.9543, 0.7813],
[0.2124, 0.6246, 0.2005]],
[[0.5282, 0.0026, 0.3609],
[0.6771, 0.5858, 0.9272],
[0.1732, 0.5411, 0.8177]],
[[0.4464, 0.2875, 0.5549],
[0.2754, 0.7300, 0.8469],
[0.6238, 0.5733, 0.5625]]],
[[[0.5165, 0.3800, 0.0851],
[0.0991, 0.7287, 0.9255],
[0.9094, 0.1059, 0.2375]],
[[0.8601, 0.7303, 0.3683],
[0.3715, 0.1885, 0.6126],
[0.5266, 0.4642, 0.8999]],
[[0.4903, 0.1294, 0.0379],
[0.5235, 0.2812, 0.2052],
[0.3850, 0.2854, 0.8109]]]])
'''
# 取第一个维度的前两个数据,取第2个维度的前1个数据,后两个维度全都取到
print(a[:2, :1, :, :])
'''
tensor([[[[0.7442, 0.6572, 0.8844],
[0.2714, 0.9543, 0.7813],
[0.2124, 0.6246, 0.2005]]],
[[[0.5165, 0.3800, 0.0851],
[0.0991, 0.7287, 0.9255],
[0.9094, 0.1059, 0.2375]]]])
'''
# 取第一个维度的前两个数据,取第2个维度的第1个索引到最后索引的数据(包含1),后两个维度全都取到
print(a[:2, 1:, :, :])
'''
tensor([[[[0.5282, 0.0026, 0.3609],
[0.6771, 0.5858, 0.9272],
[0.1732, 0.5411, 0.8177]],
[[0.4464, 0.2875, 0.5549],
[0.2754, 0.7300, 0.8469],
[0.6238, 0.5733, 0.5625]]],
[[[0.8601, 0.7303, 0.3683],
[0.3715, 0.1885, 0.6126],
[0.5266, 0.4642, 0.8999]],
[[0.4903, 0.1294, 0.0379],
[0.5235, 0.2812, 0.2052],
[0.3850, 0.2854, 0.8109]]]])
'''
# 负号表示第2个维度上从倒数第3个数据取到最后倒数第一个数据-1(包含-3)
print(a[:2, -3:])
'''
tensor([[[[0.7442, 0.6572, 0.8844],
[0.2714, 0.9543, 0.7813],
[0.2124, 0.6246, 0.2005]],
[[0.5282, 0.0026, 0.3609],
[0.6771, 0.5858, 0.9272],
[0.1732, 0.5411, 0.8177]],
[[0.4464, 0.2875, 0.5549],
[0.2754, 0.7300, 0.8469],
[0.6238, 0.5733, 0.5625]]],
[[[0.5165, 0.3800, 0.0851],
[0.0991, 0.7287, 0.9255],
[0.9094, 0.1059, 0.2375]],
[[0.8601, 0.7303, 0.3683],
[0.3715, 0.1885, 0.6126],
[0.5266, 0.4642, 0.8999]],
[[0.4903, 0.1294, 0.0379],
[0.5235, 0.2812, 0.2052],
[0.3850, 0.2854, 0.8109]]]])
'''
# 两个冒号表示隔行取数据,一定的间隔
print(a[:, :, 0:3:2, 0:3:1])
'''
tensor([[[[0.7442, 0.6572, 0.8844],
[0.2124, 0.6246, 0.2005]],
[[0.5282, 0.0026, 0.3609],
[0.1732, 0.5411, 0.8177]],
[[0.4464, 0.2875, 0.5549],
[0.6238, 0.5733, 0.5625]]],
[[[0.5165, 0.3800, 0.0851],
[0.9094, 0.1059, 0.2375]],
[[0.8601, 0.7303, 0.3683],
[0.5266, 0.4642, 0.8999]],
[[0.4903, 0.1294, 0.0379],
[0.3850, 0.2854, 0.8109]]],
[[[0.4168, 0.4594, 0.1869],
[0.1939, 0.3430, 0.8521]],
[[0.7731, 0.8609, 0.3416],
[0.6090, 0.6602, 0.1535]],
[[0.4223, 0.2500, 0.3425],
[0.8561, 0.1645, 0.3753]]]])
'''
# 两个冒号直接写表示从所有的数据中隔行取数据
print(a[:, :, ::1, ::1])
'''
tensor([[[[0.7442, 0.6572, 0.8844],
[0.2714, 0.9543, 0.7813],
[0.2124, 0.6246, 0.2005]],
[[0.5282, 0.0026, 0.3609],
[0.6771, 0.5858, 0.9272],
[0.1732, 0.5411, 0.8177]],
[[0.4464, 0.2875, 0.5549],
[0.2754, 0.7300, 0.8469],
[0.6238, 0.5733, 0.5625]]],
[[[0.5165, 0.3800, 0.0851],
[0.0991, 0.7287, 0.9255],
[0.9094, 0.1059, 0.2375]],
[[0.8601, 0.7303, 0.3683],
[0.3715, 0.1885, 0.6126],
[0.5266, 0.4642, 0.8999]],
[[0.4903, 0.1294, 0.0379],
[0.5235, 0.2812, 0.2052],
[0.3850, 0.2854, 0.8109]]],
[[[0.4168, 0.4594, 0.1869],
[0.2987, 0.7347, 0.3478],
[0.1939, 0.3430, 0.8521]],
[[0.7731, 0.8609, 0.3416],
[0.9221, 0.2054, 0.4342],
[0.6090, 0.6602, 0.1535]],
[[0.4223, 0.2500, 0.3425],
[0.7285, 0.1590, 0.2787],
[0.8561, 0.1645, 0.3753]]]])
'''
3、整数索引
a = torch.randn((3, 3))
print(a)
rows = [1, 2]
cols = [2]
print(a[rows, cols])
'''
第二、三行,第三列的元素
tensor([[-1.1423, 0.4933, 1.2880],
[ 0.4578, 0.3748, -0.0443],
[ 0.1310, -1.1097, -2.0845]])
tensor([-0.0443, -2.0845])
'''
4、布尔索引
a = torch.arange(9).view(3, 3)
index = a > 4
print(index)
print(a[index])
'''
tensor([[False, False, False],
[False, False, True],
[ True, True, True]])
tensor([5, 6, 7, 8])
'''
index = torch.nonzero(a >= 8)
print(index)
'''
torch.nonzero用于返回非零值的索引矩阵
tensor([[2, 2]])
'''
5、torch.where(condition, x, y)
判断condition条件是否满足,当某个元素满足时,则返回对应矩阵x相同位置的元素,否则返回矩阵y的元素
x = torch.randn(3, 3)
y = torch.ones(3, 3)
print(x)
print(torch.where(x > 0, x, y))
'''
tensor([[-0.2932, 1.1689, 1.0301],
[-0.0757, -0.1569, 1.3105],
[ 1.6842, 0.8742, 1.7629]])
tensor([[1.0000, 1.1689, 1.0301],
[1.0000, 1.0000, 1.3105],
[1.6842, 0.8742, 1.7629]])
'''
三、Tensor的变换、拼接、拆分
1、Tensor.nelement和Tensor.ndimension查看Tensor信息
a = torch.rand(1, 2, 3, 4, 5)
print("元素个数: ", a.nelement())
print("轴的个数: ", a.ndimension())
print("矩阵维度: ", a.shape, a.shape[0], a.shape[1], a.shape[2])
'''
元素个数: 120
轴的个数: 5
矩阵维度: torch.Size([1, 2, 3, 4, 5]) 1 2 3
'''
2、Tensor.view和Tensor.reshape更改Tensor维度
- Tensor.view要求Tensor的物理存储必须是连续的
- 输出的矩阵元素个数不能改变,在维度上输出-1,PyTorch会自动判断
a = torch.rand(1, 2, 3, 4)
b = a.view(6, 4)
print(b.shape)
c = a.reshape(-1)
print(c.shape)
d = a.reshape(6, -1)
print(d.shape)
'''
torch.Size([6, 4])
torch.Size([24])
torch.Size([6, 4])
'''
3、torch.squeeze和torch.unsqueeze
a = torch.rand(1, 2, 3, 4)
print(a.shape)
b = torch.squeeze(a)
print(b.shape)
c = torch.unsqueeze(b, 2)
print(c.shape)
'''
torch.Size([1, 2, 3, 4])
torch.Size([2, 3, 4])
torch.Size([2, 3, 1, 4])
'''
4、torch.transpose转置Tensor的两个维度
a = torch.rand(1, 2, 3, 4)
print(a.shape)
b = torch.transpose(a, 1, 3)
print(b.shape)
'''
torch.Size([1, 2, 3, 4])
torch.Size([1, 4, 3, 2])
'''
5、Tensor.permute任意变换Tensor维度
a = torch.rand(1, 2, 3, 4)
print(a.shape)
b = a.permute(2, 1, 3, 0)
print(b.shape)
'''
torch.Size([1, 2, 3, 4])
torch.Size([3, 2, 4, 1])
'''
6、torch.cat和torch.stack拼接矩阵
- torch.cat在已有的轴dim上拼接矩阵,给定轴的维度可以不同,其他轴的维度必须相同,dim默认为0
- torch.stack在新的轴上拼接,要求被拼接的矩阵所有维度都相同
a = torch.rand(2, 3)
b = torch.rand(3, 3)
c = torch.cat((a, b))
print(c.shape)
d = torch.stack((a, a))
print(d.shape)
'''
torch.Size([5, 3])
torch.Size([2, 2, 3])
'''
c = torch.stack((b, b), dim=0)
print(c.shape)
d = torch.stack((b, b), dim=1)
print(d.shape)
'''
torch.Size([2, 3, 3])
torch.Size([3, 2, 3])
'''
7、torch.split和torch.chunk拆分矩阵
- torch.split传入的是拆分后每个矩阵的大小,可以为整数,也可以为list集合
- torch.chunk传入的是拆分的矩阵个数
a = torch.randn(10, 2)
for x in torch.split(a, [1, 2, 3, 4], dim=0):
print(x.shape)
'''
torch.Size([1, 2])
torch.Size([2, 2])
torch.Size([3, 2])
torch.Size([4, 2])
'''
for x in torch.split(a, 4, dim=0):
print(x.shape)
'''
torch.Size([4, 2])
torch.Size([4, 2])
torch.Size([2, 2])
'''
for x in torch.chunk(a, 4, dim=0):
print(x.shape)
'''
torch.Size([3, 2])
torch.Size([3, 2])
torch.Size([3, 2])
torch.Size([1, 2])
'''
四、PyTorch的Reduction操作
1、tensor.max找极大值
- 不指定维度的话返回全局最大值
- 指定维度的话,返回最大值及其索引
a = torch.Tensor([[1, 2], [3, 4]])
print(torch.max(a))
print(torch.max(a, dim=0))
'''
tensor(4.)
torch.return_types.max(
values=tensor([3., 4.]),
indices=tensor([1, 1]))
'''
2、tensor.cumsum进行累加
a = torch.Tensor([[1, 2], [3, 4]])
print(torch.cumsum(a, dim=1))
'''
tensor([[1., 3.],
[3., 7.]])
'''
3、tensor.cumprod进行累乘
a = torch.Tensor([[1, 2], [3, 4]])
print(torch.cumprod(a, dim=1))
'''
tensor([[ 1., 2.],
[ 3., 12.]])
'''
4、tensor.mean、tensor.median、tensor.std计算矩阵的均值,中值,协方差
a = torch.Tensor([[1, 2], [3, 4]])
print(a.mean(), a.median(), a.std())
'''
tensor(2.5000) tensor(2.) tensor(1.2910)
'''
5、tensor.unique找到矩阵出现了哪些元素
a = torch.randint(1, 5, (3, 4))
print(a)
print(torch.unique(a))
'''
tensor([[1, 2, 2, 2],
[1, 1, 1, 4],
[2, 3, 1, 3]])
tensor([1, 2, 3, 4])
'''
五、PyTorch的自动微分
将Tensor的requires_grad设置为True,pytorch的torch.autograd会自动地追踪他的计算轨迹,需要计算微分的时候,只需要对最终计算结果的Tensor调用backward方法,中间所有节点的微分就会保存在grad属性中
1、backward
x = torch.randn(3, 3, requires_grad=True)
print(x)
w = torch.ones(3, 3, requires_grad=True)
y = torch.sum(torch.mm(x, w))
print(y)
'''
tensor([[ 0.4195, -0.5490, -0.4192],
[ 0.5463, 2.0776, -1.0063],
[-0.0103, 1.2296, 0.1743]], requires_grad=True)
tensor(7.3874, grad_fn=<SumBackward0>)
'''
y.backward()
print(y.grad)
print(w.grad)
print(x.grad)
'''
None
tensor([[ 0.9554, 0.9554, 0.9554],
[ 2.7582, 2.7582, 2.7582],
[-1.2512, -1.2512, -1.2512]])
tensor([[3., 3., 3.],
[3., 3., 3.],
[3., 3., 3.]])
'''
2、Tensor.detach将Tensor从计算图剥离出去,不再计算它的微分
x = torch.rand(3, 3, requires_grad=True)
w = torch.ones(3, 3, requires_grad=True)
print(x)
print(w)
'''
tensor([[0.9318, 0.3074, 0.0789],
[0.3595, 0.9793, 0.2073],
[0.8808, 0.7111, 0.6849]], requires_grad=True)
tensor([[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.]], requires_grad=True)
'''
yy = torch.mm(x, w)
detached_yy = yy.detach()
print(detached_yy)
'''
tensor([[1.3181, 1.3181, 1.3181],
[1.5461, 1.5461, 1.5461],
[2.2768, 2.2768, 2.2768]])
'''
y = torch.mean(yy)
y.backward()
print(x.grad)
print(w.grad)
print(yy.grad)
'''
tensor([[0.3333, 0.3333, 0.3333],
[0.3333, 0.3333, 0.3333],
[0.3333, 0.3333, 0.3333]])
tensor([[0.2413, 0.2413, 0.2413],
[0.2220, 0.2220, 0.2220],
[0.1079, 0.1079, 0.1079]])
None
'''
3、with torch.no_grad()包含的代码段不会计算微分
x = torch.rand(3, 3, requires_grad=True)
w = torch.ones(3, 3, requires_grad=True)
y = torch.sum(torch.mm(x, w))
print(y.requires_grad)
'''True'''
with torch.no_grad():
y = torch.sum(torch.mm(x, w))
print(y.requires_grad)
'''False'''