神经网络已经在很多场景下表现出了很好的识别能力,但是缺乏解释性一直所为人诟病。《Grad-CAM:Visual Explanations from Deep Networks via Gradient-based Localization》这篇论文基于梯度为其可解释性做了一些工作,它可以显著描述哪块图片区域对识别起了至关重要的作用,以热度图的方式可视化神经网络的注意力。本博客主要是基于pytorch的简单工程复现。原文见这里,本代码基于这里。
1 import torch 2 import torchvision 3 from torchvision import models 4 from torchvision import transforms 5 from PIL import Image 6 import pylab as plt 7 import numpy as np 8 import cv2 9 10 11 class Extractor(): 12 """ 13 pytorch在设计时,中间层的梯度完成回传后就释放了 14 这里用hook工具在保存中间参数的梯度 15 """ 16 def __init__(self, model, target_layer): 17 self.model = model 18 self.target_layer = target_layer 19 self.gradient = None 20 21 def save_gradient(self, grad): 22 self.gradient=grad 23 24 def __call__(self, x): 25 outputs = [] 26 self.gradients = [] 27 for name,module in self.model.features._modules.items(): 28 x = module(x) 29 if name == self.target_layer: 30 x.register_hook(self.save_gradient) 31 target_activation=x 32 x=x.view(1,-1) 33 for name,module in self.model.classifier._modules.items(): 34 x = module(x) 35 # 维度为(1,c, h, w) , (1,class_num) 36 return target_activation, x 37 38 39 def preprocess_image(path): 40 means=[0.485, 0.456, 0.406] 41 stds=[0.229, 0.224, 0.225] 42 m_transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize(means,stds)]) 43 img=Image.open(path) 44 return m_transform(img).reshape(1,3,224,224) 45 46 47 class GradCam(): 48 def __init__(self, model, target_layer_name, use_cuda): 49 self.model = model 50 self.model.eval() 51 self.cuda = use_cuda 52 if self.cuda: 53 self.model = model.cuda() 54 55 self.extractor = Extractor(self.model, target_layer_name) 56 57 58 def __call__(self, input, index = None): 59 if self.cuda: 60 target_activation, output = self.extractor(input.cuda()) 61 else: 62 target_activation, output = self.extractor(input) 63 64 # index是想要查看的类别,未指定时选择网络做出的预测类 65 if index == None: 66 index = np.argmax(output.cpu().data.numpy()) 67 68 # batch维为1(我们默认输入的是单张图) 69 one_hot = np.zeros((1, output.size()[-1]), dtype = np.float32) 70 one_hot[0][index] = 1.0 71 one_hot = torch.tensor(one_hot) 72 if self.cuda: 73 one_hot = torch.sum(one_hot.cuda() * output) 74 else: 75 one_hot = torch.sum(one_hot * output) 76 77 self.model.zero_grad() 78 one_hot.backward(retain_graph=True) 79 80 grads_val = self.extractor.gradient.cpu().data.numpy() 81 # 维度为(c, h, w) 82 target = target_activation.cpu().data.numpy()[0] 83 # 维度为(c,) 84 weights = np.mean(grads_val, axis = (2, 3))[0, :] 85 # cam要与target一样大 86 cam = np.zeros(target.shape[1 : ], dtype = np.float32) 87 for i, w in enumerate(weights): 88 cam += w * target[i, :, :] 89 90 # 每个位置选择c个通道上最大的最为输出 91 cam = np.maximum(cam, 0) 92 cam = cv2.resize(cam, (224, 224)) 93 cam = cam - np.min(cam) 94 cam = cam / np.max(cam) 95 return cam 96 97 98 def show_cam_on_image(img, mask): 99 heatmap = cv2.applyColorMap(np.uint8(255*mask), cv2.COLORMAP_JET) 100 heatmap = np.float32(heatmap) / 255 101 cam = heatmap + np.float32(img) 102 cam = cam / np.max(cam) 103 cv2.imwrite("cam2.jpg", np.uint8(255 * cam)) 104 105 106 #target_layer 越靠近分类层效果越好 107 grad_cam = GradCam(model = models.vgg19(pretrained=True), target_layer_name = "35", use_cuda=True) 108 input = preprocess_image("both.png") 109 mask = grad_cam(input, None) 110 img = cv2.imread("both.png", 1) 111 #热度图是直接resize加到输入图上的 112 img = np.float32(cv2.resize(img, (224, 224))) / 255 113 show_cam_on_image(img, mask)
原图:
可视化图: