今回はこのサイトのPyTorch tutorialを実践する。このチュートリアルは、モデルとしてalexnetを使用して画像分類を行っている。
Automatic differentiation in pytorch¶
import torch
from torch import nn
from torch.nn.parameter import Parameter
from torch.autograd import Variable
# Let's define a linear layer.
class nn_Linear(nn.Module):
def __init__(self, input_dim, output_dim):
super(nn_Linear, self).__init__()
# Create the layer parameters.
self.weight = Parameter(torch.Tensor(output_dim, input_dim))
self.bias = Parameter(torch.Tensor(1, output_dim))
# intialize the weight and bias parameters using random values.
self.weight.data.uniform_(-0.001, 0.001) # Parameters have .data and .gradient values.
self.bias.data.uniform_(-0.001, 0.001) # Parameters have .data and .gradient values.
# y = Wx + b
def forward(self, x):
# Here you could try to see what values or sizes have these inputs.
# print(self.weight.size())
# print(x.size())
# Note that this type of debugging is not usually possible in tensorflow/keras because
# in those frameworks these operations only define a computation graph but are not operating
# directly on values.
batch_expanded_bias = self.bias.expand(x.size(0), self.bias.size(1))
return torch.addmm(1, batch_expanded_bias.t(), 1, self.weight, x.t()).t()
# Let's create an instance of nn_linear
linear = nn_Linear(4, 2)
# Let's define some input variable.
inputVar = Variable(torch.Tensor([[0.2, 0.3, -0.1, 0.2],
[0.3, 0.1, 0.3, -0.4],
[0.1, 0.2, 0.4, -0.4]]))
# Let's print some code output of the linear layer.
outputVar = linear(inputVar)
print(outputVar.data) # This will contain y = Wx + b
print(outputVar.grad) # This will contain dy, the gradient of the output after backpropagation.
# This is to show how pytorch's magic. It registers parameters so you can easily traverse them.
print([param.size() for param in linear.parameters()])
param.data.add_(-0.001 * param.grad.data) -
class nn_MSECriterion(nn.Module): # MSE = mean squared error.
def forward(self, predictions, labels):
return (predictions - labels).pow(2).sum()
inputs = Variable(torch.Tensor([[0.2, 0.3, -0.1, 0.2],
[0.3, 0.1, 0.3, -0.4],
[0.1, 0.2, 0.4, -0.4]]))
labels = Variable(torch.Tensor([[1, 1],
[2, 2],
[3, 3]]))
# Now optimize until the loss becomes small.
linear = nn_Linear(4, 2)
linear.train() # Makes a difference when the module has dropout or batchnorm which behave different during testing.
for iteration in range(0, 50):
predictions = linear(inputs) # forward pass.
loss = nn_MSECriterion()(predictions, labels) # loss function.
loss.backward() # This backpropagates errors all-the-way.
linear.weight.data.add_(-0.0001 * linear.weight.grad.data) # SGD step.
linear.bias.data.add_(-0.0001 * linear.bias.grad.data) # SGD step.
print(iteration, loss.item())
最初に気付くのは、勾配を計算するのにバックワード関数を書く必要がないということで、パイトーチが使う全演算を実装する限り、バックワードパス機能性を無償で得られる。また、パイトーチは全ての基礎層(といくつかの複雑層)をtorch.nn内(例えば、nn.Sequential, nn.Linear, nn.Conv2D, nn.ReLU, nn.Sigmoid)とtorch.nn.functional内(例えば、層がパラメータを持っていない時に便利なF.relu, F.sigmoid等の関数として利用可能)に既に有しているので、nn_Linear等の基礎関数を実装する必要がない。しかし、自身のモジュールを作成してtorch演算を使用してモジュールのフォワード関数を書き換えることで新しい関数を実装できる。
Convolutional Neural Networks in pytorch¶
Pytorchは畳み込み層を実装していて、簡単に事前訓練されたモデル(VGG, Resnetなど)にアクセスできる。故に、Kerasやlua-torch並に便利だと言えよう。
import torchvision.models as models
alexnet = models.alexnet(pretrained = True)
import torchvision.transforms as transforms
from PIL import Image
import matplotlib.pyplot as plt
import json, string
%matplotlib inline
# 1. Define the appropriate image pre-processing function.
preprocessFn = transforms.Compose([transforms.Scale(256),
transforms.Normalize(mean = [0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])])
# 2. Load the imagenet class names.
imagenetClasses = {int(idx): entry[1] for (idx, entry) in json.load(open('/root/.keras/models/imagenet_class_index.json')).items()}
# 3. Forward a test image of the toaster.
# Never forget to set in evaluation mode so Dropoff layers don't add randomness.
# unsqueeze(0) adds a dummy batch dimension which is required for all models in pytorch.
image = Image.open('test_image.jpg').convert('RGB')
inputVar = Variable(preprocessFn(image).unsqueeze(0))
predictions = alexnet(inputVar)
# 4. Decode the top 10 classes predicted for this image.
# We need to apply softmax because the model outputs the last linear layer activations and not softmax scores.
probs, indices = (-nn.Softmax()(predictions).data).sort()
probs = (-probs).numpy()[0][:10]; indices = indices.numpy()[0][:10]
preds = [imagenetClasses[idx] + ': ' + str(prob) for (prob, idx) in zip(probs, indices)]
# 5. Show image and predictions
plt.title(string.join(preds, '\n'))
stringの部分を”\n “に変更して、”\n “.join(preds)と書き換える。
import torchvision.transforms as transforms
from PIL import Image
import matplotlib.pyplot as plt
import json, string
%matplotlib inline
# 1. Define the appropriate image pre-processing function.
preprocessFn = transforms.Compose([transforms.Scale(256),
transforms.Normalize(mean = [0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])])
# 2. Load the imagenet class names.
imagenetClasses = {int(idx): entry[1] for (idx, entry) in json.load(open('/root/.keras/models/imagenet_class_index.json')).items()}
# 3. Forward a test image of the toaster.
# Never forget to set in evaluation mode so Dropoff layers don't add randomness.
# unsqueeze(0) adds a dummy batch dimension which is required for all models in pytorch.
image = Image.open('test_image.jpg').convert('RGB')
inputVar = Variable(preprocessFn(image).unsqueeze(0))
predictions = alexnet(inputVar)
# 4. Decode the top 10 classes predicted for this image.
# We need to apply softmax because the model outputs the last linear layer activations and not softmax scores.
probs, indices = (-nn.Softmax()(predictions).data).sort()
probs = (-probs).numpy()[0][:10]; indices = indices.numpy()[0][:10]
preds = [imagenetClasses[idx] + ': ' + str(prob) for (prob, idx) in zip(probs, indices)]
# 5. Show image and predictions
plt.rcParams['figure.figsize'] = 20, 20
plt.rcParams["font.size"] = "20"
plt.title("\n ".join(preds))