今回はこのサイトのコードを使って画像分類をやる。画像分類器モデルのベースにはalexnetが使われている。

環境設定¶

先ず最初に作業用フォルダーを作成する。

!mkdir example8

必要なpython fileを作成したフォルダーにダウンロードする。

%download https://raw.githubusercontent.com/guerzh/pytorch_myalexnet/master/caffe_classes.py -f example8/caffe_classes.py

Downloaded 'example8/caffe_classes.py'.

画像分類のテストに使用する画像をダウンロードする。

%download https://github.com/guerzh/pytorch_myalexnet/raw/master/laska.png -f example8/laska.jpg

Downloaded 'example8/laska.jpg'.

ダウンロードした画像の中身をチェックする。

import matplotlib.pyplot as plt
import matplotlib.image as mpi

img = mpi.imread('example8/laska.jpg')
plt.imshow(img)
plt.show()

モデル構築と画像推論¶

import torch
import torchvision.models as models
import torchvision
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt
from scipy.misc import imread, imresize
import torch.nn as nn
from example8.caffe_classes import class_names

class MyAlexNet(nn.Module):
    def load_weights(self):
        an_builtin = torchvision.models.alexnet(pretrained=True)
        
        features_weight_i = [0, 3, 6, 8, 10]
        for i in features_weight_i:
            self.features[i].weight = an_builtin.features[i].weight
            self.features[i].bias = an_builtin.features[i].bias
            
        classifier_weight_i = [1, 4, 6]
        for i in classifier_weight_i:
            self.classifier[i].weight = an_builtin.classifier[i].weight
            self.classifier[i].bias = an_builtin.classifier[i].bias

    def __init__(self, num_classes=1000):
        super(MyAlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )
        
        self.load_weights()

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        return x

# model_orig = torchvision.models.alexnet(pretrained=True)
model = MyAlexNet()
model.eval()

im = imread('example8/laska.jpg')[:,:,:3]
im = im - np.mean(im.flatten())
im = im/np.max(np.abs(im.flatten()))

im = np.rollaxis(im, -1).astype(np.float32)

im_v = Variable(torch.from_numpy(im).unsqueeze_(0), requires_grad=False)    
softmax = torch.nn.Softmax()

all_probs = softmax(model.forward(im_v)).data.numpy()[0]
sorted_ans = np.argsort(all_probs)

for i in range(-1, -6, -1):
    print("Answer:", class_names[sorted_ans[i]], ", Prob:", all_probs[sorted_ans[i]])

ans = np.argmax(model.forward(im_v).data.numpy())
prob_ans = softmax(model.forward(im_v)).data.numpy()[0][ans]
print("Top Answer:", class_names[ans], "P(ans) = ", prob_ans)

/root/.pyenv/versions/py365/lib/python3.6/site-packages/ipykernel_launcher.py:64: DeprecationWarning: `imread` is deprecated!
`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
/root/.pyenv/versions/py365/lib/python3.6/site-packages/ipykernel_launcher.py:73: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.

Answer: weasel , Prob: 0.9786052
Answer: mink , Prob: 0.015375151
Answer: polecat, fitch, foulmart, foumart, Mustela putorius , Prob: 0.003676169
Answer: black-footed ferret, ferret, Mustela nigripes , Prob: 0.0012465762
Answer: hare , Prob: 0.00025085305
Top Answer: weasel P(ans) =  0.9786052

/root/.pyenv/versions/py365/lib/python3.6/site-packages/ipykernel_launcher.py:80: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.

上の2つの警告を修正する。

import torch
import torchvision.models as models
import torchvision
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt
from scipy.misc import imresize
import imageio
import torch.nn as nn
from example8.caffe_classes import class_names

class MyAlexNet(nn.Module):
    def load_weights(self):
        an_builtin = torchvision.models.alexnet(pretrained=True)
        
        features_weight_i = [0, 3, 6, 8, 10]
        for i in features_weight_i:
            self.features[i].weight = an_builtin.features[i].weight
            self.features[i].bias = an_builtin.features[i].bias
            
        classifier_weight_i = [1, 4, 6]
        for i in classifier_weight_i:
            self.classifier[i].weight = an_builtin.classifier[i].weight
            self.classifier[i].bias = an_builtin.classifier[i].bias

    def __init__(self, num_classes=1000):
        super(MyAlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )
        
        self.load_weights()

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        return x

# model_orig = torchvision.models.alexnet(pretrained=True)
model = MyAlexNet()
model.eval()

im = imageio.imread('example8/laska.jpg')[:,:,:3]
im = im - np.mean(im.flatten())
im = im/np.max(np.abs(im.flatten()))

im = np.rollaxis(im, -1).astype(np.float32)

im_v = Variable(torch.from_numpy(im).unsqueeze_(0), requires_grad=False)    
softmax = torch.nn.Softmax(dim=-1)

all_probs = softmax(model.forward(im_v)).data.numpy()[0]
sorted_ans = np.argsort(all_probs)

for i in range(-1, -6, -1):
    print("Answer:", class_names[sorted_ans[i]], ", Prob:", all_probs[sorted_ans[i]])

ans = np.argmax(model.forward(im_v).data.numpy())
prob_ans = softmax(model.forward(im_v)).data.numpy()[0][ans]
print("Top Answer:", class_names[ans], "P(ans) = ", prob_ans)

Answer: weasel , Prob: 0.9786052
Answer: mink , Prob: 0.015375151
Answer: polecat, fitch, foulmart, foumart, Mustela putorius , Prob: 0.003676169
Answer: black-footed ferret, ferret, Mustela nigripes , Prob: 0.0012465762
Answer: hare , Prob: 0.00025085305
Top Answer: weasel P(ans) =  0.9786052

今度はもう少し難しい画像を使ってテストする。

%download https://github.com/guerzh/pytorch_myalexnet/raw/master/kiwi227.png -f example8/kiwi227.png

Downloaded 'example8/kiwi227.png'.

import matplotlib.pyplot as plt
import matplotlib.image as mpi

img = mpi.imread('example8/kiwi227.png')
plt.imshow(img)
plt.show()

import torch
import torchvision.models as models
import torchvision
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt
from scipy.misc import imresize
import imageio
import torch.nn as nn
from example8.caffe_classes import class_names

class MyAlexNet(nn.Module):
    def load_weights(self):
        an_builtin = torchvision.models.alexnet(pretrained=True)
        
        features_weight_i = [0, 3, 6, 8, 10]
        for i in features_weight_i:
            self.features[i].weight = an_builtin.features[i].weight
            self.features[i].bias = an_builtin.features[i].bias
            
        classifier_weight_i = [1, 4, 6]
        for i in classifier_weight_i:
            self.classifier[i].weight = an_builtin.classifier[i].weight
            self.classifier[i].bias = an_builtin.classifier[i].bias

    def __init__(self, num_classes=1000):
        super(MyAlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )
        
        self.load_weights()

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        return x

# model_orig = torchvision.models.alexnet(pretrained=True)
model = MyAlexNet()
model.eval()

im = imageio.imread('example8/kiwi227.png')[:,:,:3]
im = im - np.mean(im.flatten())
im = im/np.max(np.abs(im.flatten()))

im = np.rollaxis(im, -1).astype(np.float32)

im_v = Variable(torch.from_numpy(im).unsqueeze_(0), requires_grad=False)    
softmax = torch.nn.Softmax(dim=-1)

all_probs = softmax(model.forward(im_v)).data.numpy()[0]
sorted_ans = np.argsort(all_probs)

for i in range(-1, -6, -1):
    print("Answer:", class_names[sorted_ans[i]], ", Prob:", all_probs[sorted_ans[i]])

ans = np.argmax(model.forward(im_v).data.numpy())
prob_ans = softmax(model.forward(im_v)).data.numpy()[0][ans]
print("Top Answer:", class_names[ans], "P(ans) = ", prob_ans)

Answer: porcupine, hedgehog , Prob: 0.37629023
Answer: mink , Prob: 0.1874968
Answer: otter , Prob: 0.18650346
Answer: beaver , Prob: 0.11107038
Answer: echidna, spiny anteater, anteater , Prob: 0.063236944
Top Answer: porcupine, hedgehog P(ans) =  0.37629023

確かにハリネズミやヤマアラシに見えないこともない。ドリルモグラとも言える。