要迁移的项目为图像压缩算法https://github.com/ywz978020607/HESIC
1.自定义算子迁移--LowerBoundFunction类
为了能够准确迁移底层封装的类,需要详细测试原版类以及迁移测试
pytorch中自定义的算子有torch.autograd.Function
import torch
import torch.nn as nn
class LowerBoundFunction(torch.autograd.Function):
"""Autograd function for the `LowerBound` operator.
"""
@staticmethod
def forward(ctx, input_, bound):
ctx.save_for_backward(input_, bound)
return torch.max(input_, bound)
@staticmethod
def backward(ctx, grad_output):
input_, bound = ctx.saved_tensors
pass_through_if = (input_ >= bound) | (grad_output < 0)
print(grad_output) #tensor([ 0., 2., 15.], grad_fn=<MulBackward0>)
print(pass_through_if)
print(pass_through_if.type(grad_output.dtype) * grad_output)
return pass_through_if.type(grad_output.dtype) * grad_output, None
if __name__=="__main__":
a = torch.Tensor([1,2,3])
b = torch.Tensor([0,1,5])
a.requires_grad_(True)
b.requires_grad_(True)
c = a*b
m = LowerBoundFunction.apply(a,b)
m.backward(c)
输出为
tensor([ 0., 2., 15.], grad_fn=<MulBackward0>)
tensor([ True, True, False])
tensor([0., 2., 0.])
通过两行print测试后发现,这个类用于阻断梯度,有点类似Relu的感觉
而mindspore的自定义算子在昇腾、GPU、CPU下定义不同且过于复杂,咨询hw工程师后,准备继承nn.Cell并重载bprop函数实现,测试bprop反向梯度传播如下
# https://gitee.com/mindspore/mindspore/blob/master/tests/ut/python/pynative_mode/test_hook.py#
import numpy as np
import pytest
import mindspore.nn as nn
import mindspore.ops.operations as P
from mindspore import context, Tensor, ParameterTuple
from mindspore.common.initializer import TruncatedNormal
from mindspore.nn import WithLossCell, Momentum
from mindspore.ops import composite as C
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
grad_all = C.GradOperation(get_all=True)
bprop_debug = False
class MulAdd(nn.Cell):
def __init__(self):
super(MulAdd, self).__init__()
def construct(self, x, y):
return 2 * x * x + y * y
def bprop(self, x, y, out, dout):
global bprop_debug
bprop_debug = True
print(x)
print(y)
print(out)
print(dout)
# [1 2 3]
# [2 3 5]
# [ 6 17 43]
# [1 1 1]
return dout, 2 * y
def test_custom_bprop():
mul_add = MulAdd()
mul_add.bprop_debug = True
x = Tensor(np.array([1, 2, 3]).astype(np.int32))
y = Tensor(np.array([2, 3, 5]).astype(np.int32))
ret = grad_all(mul_add)(x, y)
print(ret) #(Tensor(shape=[3], dtype=Int32, value= [1, 1, 1]), Tensor(shape=[3], dtype=Int32, value= [ 4, 6, 10]))
assert bprop_debug
##############
#ywz
test_custom_bprop()
print(bprop_debug)
测试通bprop重载的原理后,实现相应的类
import numpy as np
import pytest
import mindspore as msp
import mindspore.nn as nn
import mindspore.ops.operations as P
from mindspore import context, Tensor, ParameterTuple
from mindspore.common.initializer import TruncatedNormal
from mindspore.nn import WithLossCell, Momentum
from mindspore.ops import composite as C
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
class LowerBoundFunction(nn.Cell):
def __init__(self):
super(LowerBoundFunction, self).__init__()
def construct(self, input_, bound):
return msp.ops.maximum(input_,bound)
def bprop(self, input_, bound, out, dout):
global bprop_debug
bprop_debug = True
print(out)
# return out, None
# pass_through_if = (input_ >= bound) | (out < 0)
pass_through_if = ((input_ >= bound).astype(input_.dtype) + (out < 0).astype(input_.dtype)).astype(‘Bool‘)
return pass_through_if.astype(out.dtype) * out, None
if __name__=="__main__":
grad_all = C.GradOperation(get_all=True)
lowerboundfunc = LowerBoundFunction()
x = Tensor(np.array([1, 2, 3]).astype(np.int32))
y = Tensor(np.array([0, 1, 5]).astype(np.int32))
test = lowerboundfunc(x, y)
ret = grad_all(lowerboundfunc)(x, y)
print(ret)
# p = (x >= y)
# p.astype(x.dtype)