第六节 图片风格迁移
- 图片风格迁移
- 用GAN生成MNIST
- 用DCGAN生成更复杂的图片
## 图片风格迁移 Neural Style Transfer
matplotlib inline
from __future__ import division
from torchvision import models
from torchvision import transforms
from PIL import Image
import argparse
import torch
import torchvision
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def load_image(image_path, transform=None, max_size=None, shape=None):
image = Image.open(image_path)
if max_size:
scale = max_size / max(image.size)
size= np.array(image.size) * scale
image = image.resize(size.astype(int), Image.ANTIALIAS)
if shape:
image = image.resize(shape, Image.LANCZOS)
if transform:
image = transform(image).unsqueeze(0)
return image.to(device)
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
]) # 来自ImageNet的mean和variance
content = load_image("png/content.png", transform, max_size=400)
stype = load_image("png/style.png", transform, shape=[content.size(2), content.size(3)])
# content = load_image("png/content.png", transforms.Compose([
# transforms.ToTensor(),
# ]), max_size=400)
# style = load_image("png/style.png", transforms.Compose([
# transforms.ToTensor(),
# ]), shape=[content.size(2), content.size(3)])
#%%
stype.shape
#%%
unloader = transforms.ToPILImage() # reconvert into PIL image
plt.ion()
def imshow(tensor, title=None):
image = tensor.cpu().clone() # we clone the tensor to not do changes on it
image = image.squeeze(0) # remove the fake batch dimension
image = unloader(image)
plt.imshow(image)
if title is not None:
plt.title(title)
plt.pause(0.001) # pause a bit so that plots are updated
plt.figure()
imshow(style[0], title='Image')
# content.shape
#%%
class VGGNet(nn.Module):
def __init__(self):
super(VGGNet, self).__init__()
self.select = ['0', '5', '10', '19', '28']
self.vgg = models.vgg19(pretrained=True).features
def forward(self, x):
features = []
for name, layer in self.vgg._modules.items():
x = layer(x)
if name in self.select:
features.append(x)
return features
target = content.clone().requires_grad_(True)
optimizer = torch.optim.Adam([target], lr=0.003, betas=[0.5, 0.999])
vgg = VGGNet().to(device).eval()
#%%
target_features = vgg(target)
#%%
total_step = 2000
style_weight = 100.
for step in range(total_step):
target_features = vgg(target)
content_features = vgg(content)
style_features = vgg(style)
style_loss = 0
content_loss = 0
for f1, f2, f3 in zip(target_features, content_features, style_features):
content_loss += torch.mean((f1-f2)**2)
_, c, h, w = f1.size()
f1 = f1.view(c, h*w)
f3 = f3.view(c, h*w)
# 计算gram matrix
f1 = torch.mm(f1, f1.t())
f3 = torch.mm(f3, f3.t())
style_loss += torch.mean((f1-f3)**2)/(c*h*w)
loss = content_loss + style_weight * style_loss
# 更新target
optimizer.zero_grad()
loss.backward()
optimizer.step()
if step % 10 == 0:
print("Step [{}/{}], Content Loss: {:.4f}, Style Loss: {:.4f}"
.format(step, total_step, content_loss.item(), style_loss.item()))
#%%
denorm = transforms.Normalize((-2.12, -2.04, -1.80), (4.37, 4.46, 4.44))
img = target.clone().squeeze()
img = denorm(img).clamp_(0, 1)
plt.figure()
imshow(img, title='Target Image')
#%% md
## Generative Adversarial Networks
#%%
batch_size=32
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=(0.5, 0.5, 0.5),
std=(0.5, 0.5, 0.5))
])
mnist_data = torchvision.datasets.MNIST("./mnist_data", train=True, download=True, transform=transform)
dataloader = torch.utils.data.DataLoader(dataset=mnist_data,
batch_size=batch_size,
shuffle=True)
#%%
image_size = 784
hidden_size = 256
# discriminator
D = nn.Sequential(
nn.Linear(image_size, hidden_size),
nn.LeakyReLU(0.2),
nn.Linear(hidden_size, hidden_size),
nn.LeakyReLU(0.2),
nn.Linear(hidden_size, 1),
nn.Sigmoid()
)
latent_size = 64
# Generator
G = nn.Sequential(
nn.Linear(latent_size, hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, image_size),
nn.Tanh()
)
D = D.to(device)
G = G.to(device)
loss_fn = nn.BCELoss()
d_optimizer = torch.optim.Adam(D.parameters(), lr=0.0002)
g_optimizer = torch.optim.Adam(G.parameters(), lr=0.0002)
#%% md
开始训练
#%%
def reset_grad():
d_optimizer.zero_grad()
g_optimizer.zero_grad()
total_step = len(dataloader)
num_epochs = 200
for epoch in range(num_epochs):
for i, (images, _) in enumerate(dataloader):
batch_size = images.size(0)
images = images.reshape(batch_size, image_size).to(device)
real_labels = torch.ones(batch_size, 1).to(device)
fake_labels = torch.zeros(batch_size, 1).to(device)
outputs = D(images)
d_loss_real = loss_fn(outputs, real_labels)
real_score = outputs
# 开始生成fake images
z = torch.randn(batch_size, latent_size).to(device)
fake_images = G(z)
outputs = D(fake_images.detach())
d_loss_fake = loss_fn(outputs, fake_labels)
fake_score = outputs
# 开始优化discriminator
d_loss = d_loss_real + d_loss_fake
reset_grad()
d_loss.backward()
d_optimizer.step()
# 开始优化generator
z = torch.randn(batch_size, latent_size).to(device)
fake_images = G(z)
outputs = D(fake_images)
g_loss = loss_fn(outputs, real_labels)
reset_grad()
g_loss.backward()
g_optimizer.step()
if i % 1000 == 0:
print("Epoch [{}/{}], Step [{}/{}], d_loss: {:.4f}, g_loss: {:.4f}, D(x): {:.2f}, D(G(z)): {:.2f}"
.format(epoch, num_epochs, i, total_step, d_loss.item(), g_loss.item(), real_score.mean().item(), fake_score.mean().item()))
#%% md
fake images
#%%
z = torch.randn(1, latent_size).to(device)
fake_images = G(z).view(28, 28).data.cpu().numpy()
plt.imshow(fake_images)
#%% md
真实图片
#%%
plt.imshow(images[0].view(28,28).data.cpu().numpy())
#%% md
## DCGAN
#%% md
[UNSUPERVISED REPRESENTATION LEARNING WITH DEEP CONVOLUTIONAL GENERATIVE ADVERSARIAL NETWORKS](https://arxiv.org/pdf/1511.06434.pdf)
#%% md
[图片下载地址](https://drive.google.com/drive/folders/0B7EVK8r0v71pbWNEUjJKdDQ3dGc)
https://drive.google.com/drive/folders/0B7EVK8r0v71pbWNEUjJKdDQ3dGc
#%%
import torchvision.utils as vutils
#%%
# !ls celeba/img_align_celeba/img_align_celeba_png
#%%
image_size=64
batch_size=128
dataroot="celeba/img_align_celeba"
num_workers = 2
dataset = torchvision.datasets.ImageFolder(root=dataroot, transform=transforms.Compose([
transforms.Resize(image_size),
transforms.CenterCrop(image_size),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
]))
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
#%%
real_batch=next(iter(dataloader))
plt.figure(figsize=(8,8))
plt.axis=("off")
plt.title("Training Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=2, normalize=True).cpu(), (1,2,0)))
#%% md
我们把模型的所有参数都初始化城mean=0, std=0.2
#%%
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find('BatchNorm') != -1:
nn.init.normal_(m.weight.data, 1.0, 0.02)
nn.init.constant_(m.bias.data, 0)
#%% md
![dcgan](images/dcgan_generator.png)
#%%
nz = 100 # latent vector的大小
ngf = 64 # generator feature map size
ndf = 64 # discriminator feature map size
nc = 3 # color channels
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.main = nn.Sequential(
# input is Z, going into a convolution
# torch.nn.ConvTranspose2d(in_channels, out_channels,
# kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1)
nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False),
nn.BatchNorm2d(ngf * 8),
nn.ReLU(True),
# state size. (ngf*8) x 4 x 4
nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 4),
nn.ReLU(True),
# state size. (ngf*4) x 8 x 8
nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 2),
nn.ReLU(True),
# state size. (ngf*2) x 16 x 16
nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf),
nn.ReLU(True),
# state size. (ngf) x 32 x 32
nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False),
nn.Tanh()
# state size. (nc) x 64 x 64
)
def forward(self, input):
return self.main(input)
#%%
# Now, we can instantiate the generator and apply the weights_init function. Check out the printed model to see how the generator object is structured.
# Create the generator
netG = Generator().to(device)
# Apply the weights_init function to randomly initialize all weights
# to mean=0, stdev=0.2.
netG.apply(weights_init)
# Print the model
print(netG)
#%% md
Discriminator
#%%
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.main = nn.Sequential(
# input is (nc) x 64 x 64
nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf) x 32 x 32
nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*2) x 16 x 16
nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*4) x 8 x 8
nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 8),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*8) x 4 x 4
nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
nn.Sigmoid()
)
def forward(self, input):
return self.main(input)
#%%
# Now, as with the generator, we can create the discriminator, apply the weights_init function, and print the model’s structure.
# Create the Discriminator
netD = Discriminator().to(device)
# Apply the weights_init function to randomly initialize all weights
# to mean=0, stdev=0.2.
netD.apply(weights_init)
# Print the model
print(netD)
#%% md
开始训练
#%%
lr = 0.0002
beta1 = 0.5
loss_fn = nn.BCELoss()
fixed_noise = torch.randn(64, nz, 1, 1, device=device)
d_optimizer = torch.optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
g_optimizer = torch.optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))
#%%
num_epochs = 5
G_losses = []
D_losses = []
for epoch in range(num_epochs):
for i, data in enumerate(dataloader):
# 训练discriminator, maximize log(D(x)) + log(1-D(G(z)))
# 首先训练真实图片
netD.zero_grad()
real_images = data[0].to(device)
b_size = real_images.size(0)
label = torch.ones(b_size).to(device)
output = netD(real_images).view(-1)
real_loss = loss_fn(output, label)
real_loss.backward()
D_x = output.mean().item()
# 然后训练生成的假图片
noise = torch.randn(b_size, nz, 1, 1, device=device)
fake_images = netG(noise)
label.fill_(0)
output = netD(fake_images.detach()).view(-1)
fake_loss = loss_fn(output, label)
fake_loss.backward()
D_G_z1 = output.mean().item()
loss_D = real_loss + fake_loss
d_optimizer.step()
# 训练Generator
netG.zero_grad()
label.fill_(1)
output = netD(fake_images).view(-1)
loss_G = loss_fn(output, label)
loss_G.backward()
D_G_z2 = output.mean().item()
g_optimizer.step()
if i % 50 == 0:
print("[{}/{}] [{}/{}] Loss_D: {:.4f} Loss_G {:.4f} D(x): {:.4f} D(G(z)): {:.4f}/{:.4f}"
.format(epoch, num_epochs, i, len(dataloader), loss_D.item(), loss_G.item(), D_x, D_G_z1, D_G_z2))
G_losses.append(loss_G.item())
D_losses.append(loss_D.item())
#%%
with torch.no_grad():
fake = netG(fixed_noise).detach().cpu()
# fake
#%%
real_batch = next(iter(dataloader))
# Plot the real images
plt.figure(figsize=(30,30))
plt.subplot(1,2,1)
plt.axis=("off")
plt.title("Real Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=5, normalize=True).cpu(),(1,2,0)))
# Plot the fake images from the last epoch
plt.subplot(1,2,2)
plt.axis=("off")
plt.title("Fake Images")
plt.imshow(np.transpose(vutils.make_grid(fake, padding=2, normalize=True), (1,2,0)))
plt.show()