# CS231n：Multiclass Support Vector Machine exercise

Stanford CS231n assignment1の続き

このexerciseの目的：  1. SVM(support vector machine：サポートベクターマシン)用完全ベクトル化loss function(損失関数)実装
4. 学習率と正則化強度の調整にvalidation set(バリデーションセット)を使う。
6. 最終的なlearned weights(学習済み重み)を可視化する。
スポンサーリンク

## Support Vector Machine (SVM) exercise¶

まずは必要なモジュールをロードする。

# Run some setup code for this notebook.

import random
import numpy as np
import matplotlib.pyplot as plt

from __future__ import print_function

# This is a bit of magic to make matplotlib figures appear inline in the
# notebook rather than in a new window.
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# Some more magic so that the notebook will reload external python modules;

スポンサーリンク

# Load the raw CIFAR-10 data.
cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'

# Cleaning up variables to prevent loading data multiple times (which may cause memory issue)
try:
del X_train, y_train
del X_test, y_test
except:
pass

X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

# As a sanity check, we print out the size of the training and test data.
print('Training data shape: ', X_train.shape)
print('Training labels shape: ', y_train.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

Training data shape:  (50000, 32, 32, 3)
Training labels shape:  (50000,)
Test data shape:  (10000, 32, 32, 3)
Test labels shape:  (10000,)

# Visualize some examples from the dataset.
# We show a few examples of training images from each class.
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
num_classes = len(classes)
samples_per_class = 7
for y, cls in enumerate(classes):
idxs = np.flatnonzero(y_train == y)
idxs = np.random.choice(idxs, samples_per_class, replace=False)
for i, idx in enumerate(idxs):
plt_idx = i * num_classes + y + 1
plt.subplot(samples_per_class, num_classes, plt_idx)
plt.imshow(X_train[idx].astype('uint8'))
plt.axis('off')
if i == 0:
plt.title(cls)
plt.show() # Split the data into train, val, and test sets. In addition we will
# create a small development set as a subset of the training data;
# we can use this for development so our code runs faster.
num_training = 49000
num_validation = 1000
num_test = 1000
num_dev = 500

# Our validation set will be num_validation points from the original
# training set.
mask = range(num_training, num_training + num_validation)

# Our training set will be the first num_train points from the original
# training set.

# We will also make a development set, which is a small subset of
# the training set.

# We use the first num_test points of the original test set as our
# test set.

print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

Train data shape:  (49000, 32, 32, 3)
Train labels shape:  (49000,)
Validation data shape:  (1000, 32, 32, 3)
Validation labels shape:  (1000,)
Test data shape:  (1000, 32, 32, 3)
Test labels shape:  (1000,)

# Preprocessing: reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape, -1))
X_val = np.reshape(X_val, (X_val.shape, -1))
X_test = np.reshape(X_test, (X_test.shape, -1))
X_dev = np.reshape(X_dev, (X_dev.shape, -1))

# As a sanity check, print out the shapes of the data
print('Training data shape: ', X_train.shape)
print('Validation data shape: ', X_val.shape)
print('Test data shape: ', X_test.shape)
print('dev data shape: ', X_dev.shape)

Training data shape:  (49000, 3072)
Validation data shape:  (1000, 3072)
Test data shape:  (1000, 3072)
dev data shape:  (500, 3072)

# Preprocessing: subtract the mean image
# first: compute the image mean based on the training data
mean_image = np.mean(X_train, axis=0)
print(mean_image[:10]) # print a few of the elements
plt.figure(figsize=(4,4))
plt.imshow(mean_image.reshape((32,32,3)).astype('uint8')) # visualize the mean image
plt.show()

[130.64189796 135.98173469 132.47391837 130.05569388 135.34804082
131.75402041 130.96055102 136.14328571 132.47636735 131.48467347] # second: subtract the mean image from train and test data
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
X_dev -= mean_image

# third: append the bias dimension of ones (i.e. bias trick) so that our SVM
# only has to worry about optimizing a single weight matrix W.
X_train = np.hstack([X_train, np.ones((X_train.shape, 1))])
X_val = np.hstack([X_val, np.ones((X_val.shape, 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape, 1))])
X_dev = np.hstack([X_dev, np.ones((X_dev.shape, 1))])

print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape)

(49000, 3073) (1000, 3073) (1000, 3073) (500, 3073)

スポンサーリンク

## SVM Classifier¶

codeは全てcs231n/classifiers/linear_svm.pyに付け足す。

# Evaluate the naive implementation of the loss we provided for you:
from cs231n.classifiers.linear_svm import svm_loss_naive
import time

# generate a random SVM weight matrix of small numbers
W = np.random.randn(3073, 10) * 0.0001

loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.000005)
print('loss: %f' % (loss, ))

loss: 9.322196


# Once you've implemented the gradient, recompute it with the code below
# and gradient check it with the function we provided for you

# Compute the loss and its gradient at W.
loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0)

# Numerically compute the gradient along several randomly chosen dimensions, and
# compare them with your analytically computed gradient. The numbers should match
# almost exactly along all dimensions.
f = lambda w: svm_loss_naive(w, X_dev, y_dev, 0.0)

# do the gradient check once again with regularization turned on
# you didn't forget the regularization gradient did you?
loss, grad = svm_loss_naive(W, X_dev, y_dev, 5e1)
f = lambda w: svm_loss_naive(w, X_dev, y_dev, 5e1)

numerical: -22.962130 analytic: -22.962130, relative error: 8.842977e-12
numerical: 5.158193 analytic: 5.158193, relative error: 6.248406e-11
numerical: -8.738529 analytic: -8.738529, relative error: 1.082561e-11
numerical: 10.029621 analytic: 10.029621, relative error: 1.859099e-11
numerical: -11.261281 analytic: -11.261281, relative error: 6.761376e-12
numerical: 17.990444 analytic: 17.990444, relative error: 6.204852e-12
numerical: -10.340305 analytic: -10.340305, relative error: 2.165485e-11
numerical: -11.366335 analytic: -11.366335, relative error: 3.830299e-11
numerical: 20.836045 analytic: 20.836045, relative error: 4.294075e-12
numerical: 3.908802 analytic: 3.908802, relative error: 1.055978e-11
numerical: 1.443579 analytic: 1.447720, relative error: 1.432201e-03
numerical: -16.904036 analytic: -16.905006, relative error: 2.866900e-05
numerical: -15.416151 analytic: -15.410540, relative error: 1.820295e-04
numerical: 23.967411 analytic: 23.963517, relative error: 8.123411e-05
numerical: 1.576035 analytic: 1.576861, relative error: 2.621245e-04
numerical: -5.902289 analytic: -5.899145, relative error: 2.664683e-04
numerical: -24.601918 analytic: -24.600075, relative error: 3.747627e-05
numerical: -4.140559 analytic: -4.139986, relative error: 6.912845e-05
numerical: -4.091359 analytic: -4.093339, relative error: 2.419414e-04
numerical: 13.344288 analytic: 13.344272, relative error: 6.312556e-07


# Next implement the function svm_loss_vectorized; for now only compute the loss;
# we will implement the gradient in a moment.
tic = time.time()
loss_naive, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('Naive loss: %e computed in %fs' % (loss_naive, toc - tic))

from cs231n.classifiers.linear_svm import svm_loss_vectorized
tic = time.time()
loss_vectorized, _ = svm_loss_vectorized(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('Vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic))

# The losses should match but your vectorized implementation should be much faster.
print('difference: %f' % (loss_naive - loss_vectorized))

Naive loss: 9.322196e+00 computed in 0.172648s
Vectorized loss: 9.322196e+00 computed in 0.019609s
difference: 0.000000


svm_loss_vectorized関数を完成させる。

# Complete the implementation of svm_loss_vectorized, and compute the gradient
# of the loss function in a vectorized way.

# The naive implementation and the vectorized implementation should match, but
# the vectorized version should still be much faster.
tic = time.time()
_, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('Naive loss and gradient: computed in %fs' % (toc - tic))

tic = time.time()
_, grad_vectorized = svm_loss_vectorized(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('Vectorized loss and gradient: computed in %fs' % (toc - tic))

# The loss is a single number, so it is easy to compare the values computed
# by the two implementations. The gradient on the other hand is a matrix, so
# we use the Frobenius norm to compare them.
print('difference: %f' % difference)

Naive loss and gradient: computed in 0.135903s
Vectorized loss and gradient: computed in 0.018575s
difference: 0.000000

スポンサーリンク

# In the file linear_classifier.py, implement SGD in the function
# LinearClassifier.train() and then run it with the code below.
from cs231n.classifiers import LinearSVM
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4,
num_iters=1500, verbose=True)
toc = time.time()
print('That took %fs' % (toc - tic))

iteration 0 / 1500: loss 403.329780
iteration 100 / 1500: loss 239.992805
iteration 200 / 1500: loss 145.825788
iteration 300 / 1500: loss 89.932865
iteration 400 / 1500: loss 56.568289
iteration 500 / 1500: loss 36.068400
iteration 600 / 1500: loss 23.121211
iteration 700 / 1500: loss 16.256138
iteration 800 / 1500: loss 11.828638
iteration 900 / 1500: loss 9.550678
iteration 1000 / 1500: loss 7.093021
iteration 1100 / 1500: loss 6.556628
iteration 1200 / 1500: loss 5.832984
iteration 1300 / 1500: loss 5.488553
iteration 1400 / 1500: loss 4.796923
That took 13.210262s

import matplotlib.pylab as pylab
pylab.rcParams['figure.figsize'] = 20, 10
pylab.rcParams["font.size"] = "30"
# A useful debugging strategy is to plot the loss as a function of
# iteration number:

plt.plot(loss_hist)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show() LinearSVM.predict関数を完成させて、トレーニング・バリデーションセットでのパフォーマンスを評価する。

# Write the LinearSVM.predict function and evaluate the performance on both the
# training and validation set
y_train_pred = svm.predict(X_train)
print('training accuracy: %f' % (np.mean(y_train == y_train_pred), ))
y_val_pred = svm.predict(X_val)
print('validation accuracy: %f' % (np.mean(y_val == y_val_pred), ))

training accuracy: 0.378796
validation accuracy: 0.384000


The following (solution) code was adapted from here

# Use the validation set to tune hyperparameters (regularization strength and
# learning rate). You should experiment with different ranges for the learning
# rates and regularization strengths; if you are careful you should be able to
# get a classification accuracy of about 0.4 on the validation set.
learning_rates = [1e-7, 2e-7, 3e-7, 5e-5, 8e-7]
regularization_strengths = [1e4, 2e4, 3e4, 4e4, 5e4, 6e4, 7e4, 8e4, 1e5]

# results is dictionary mapping tuples of the form
# (learning_rate, regularization_strength) to tuples of the form
# (training_accuracy, validation_accuracy). The accuracy is simply the fraction
# of data points that are correctly classified.
results = {}
best_val = -1   # The highest validation accuracy that we have seen so far.
best_svm = None # The LinearSVM object that achieved the highest validation rate.

################################################################################
# TODO:                                                                        #
# Write code that chooses the best hyperparameters by tuning on the validation #
# set. For each combination of hyperparameters, train a linear SVM on the      #
# training set, compute its accuracy on the training and validation sets, and  #
# store these numbers in the results dictionary. In addition, store the best   #
# validation accuracy in best_val and the LinearSVM object that achieves this  #
# accuracy in best_svm.                                                        #
#                                                                              #
# Hint: You should use a small value for num_iters as you develop your         #
# validation code so that the SVMs don't take much time to train; once you are #
# confident that your validation code works, you should rerun the validation   #
# code with a larger value for num_iters.                                      #
################################################################################
for learning_rate in learning_rates:
for regularization_strength in regularization_strengths:
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train, y_train, learning_rate=learning_rate,
reg=regularization_strength, num_iters=1500, verbose=False)
toc = time.time()
print ('training took %fs' % (toc - tic))

y_train_pred = svm.predict(X_train)
y_val_pred = svm.predict(X_val)

y_train_accuracy = np.mean(y_train_pred == y_train)
y_val_accuracy = np.mean(y_val_pred == y_val)

results[(learning_rate, regularization_strength)] = (y_train_accuracy, y_val_accuracy)
if y_val_accuracy > best_val:
best_val = y_val_accuracy
best_svm = svm
################################################################################
#                              END OF YOUR CODE                                #
################################################################################

# Print out results.
for lr, reg in sorted(results):
train_accuracy, val_accuracy = results[(lr, reg)]
print('lr %e reg %e train accuracy: %f val accuracy: %f' % (
lr, reg, train_accuracy, val_accuracy))

print('best validation accuracy achieved during cross-validation: %f' % best_val)

training took 14.846310s
training took 15.161544s
training took 14.746502s
training took 14.597927s
training took 14.628679s
training took 14.966994s
training took 14.913861s
training took 14.784116s
training took 14.560508s
training took 14.804074s
training took 14.588058s
training took 14.768435s
training took 14.865593s
training took 14.886837s
training took 14.718396s
training took 14.815547s
training took 14.887396s
training took 14.996877s
training took 14.904103s
training took 14.972387s
training took 14.975579s
training took 14.999545s
training took 14.950944s
training took 14.795421s
training took 14.598305s
training took 14.867305s
training took 14.864840s
training took 14.809388s
training took 14.590193s
training took 14.730036s
training took 14.640054s

/root/cs231n.github.io/assignments/2018/assignment1/cs231n/classifiers/linear_svm.py:92: RuntimeWarning: overflow encountered in double_scalars
loss += 0.5*reg*np.sum(W*W)
/root/.pyenv/versions/3.6.5/envs/py365/lib/python3.6/site-packages/numpy/core/_methods.py:32: RuntimeWarning: overflow encountered in reduce
return umr_sum(a, axis, dtype, out, keepdims)
/root/cs231n.github.io/assignments/2018/assignment1/cs231n/classifiers/linear_svm.py:92: RuntimeWarning: overflow encountered in multiply
loss += 0.5*reg*np.sum(W*W)

training took 14.773416s

/root/cs231n.github.io/assignments/2018/assignment1/cs231n/classifiers/linear_svm.py:84: RuntimeWarning: overflow encountered in subtract
y].reshape(-1,1)+1)
/root/cs231n.github.io/assignments/2018/assignment1/cs231n/classifiers/linear_svm.py:113: RuntimeWarning: overflow encountered in multiply
dW += reg*W
/root/cs231n.github.io/assignments/2018/assignment1/cs231n/classifiers/linear_svm.py:84: RuntimeWarning: invalid value encountered in subtract
y].reshape(-1,1)+1)
/root/cs231n.github.io/assignments/2018/assignment1/cs231n/classifiers/linear_svm.py:107: RuntimeWarning: invalid value encountered in greater
margins[margins>0] = 1

training took 14.568501s
training took 14.976347s
training took 14.991391s
training took 15.021895s
training took 14.964431s
training took 15.124234s
training took 14.611161s
training took 14.969440s
training took 14.615703s
training took 15.116109s
training took 15.030473s
training took 15.261901s
training took 14.805158s
lr 1.000000e-07 reg 1.000000e+04 train accuracy: 0.373959 val accuracy: 0.384000
lr 1.000000e-07 reg 2.000000e+04 train accuracy: 0.382714 val accuracy: 0.385000
lr 1.000000e-07 reg 3.000000e+04 train accuracy: 0.381265 val accuracy: 0.391000
lr 1.000000e-07 reg 4.000000e+04 train accuracy: 0.369653 val accuracy: 0.375000
lr 1.000000e-07 reg 5.000000e+04 train accuracy: 0.375816 val accuracy: 0.375000
lr 1.000000e-07 reg 6.000000e+04 train accuracy: 0.364122 val accuracy: 0.370000
lr 1.000000e-07 reg 7.000000e+04 train accuracy: 0.362694 val accuracy: 0.371000
lr 1.000000e-07 reg 8.000000e+04 train accuracy: 0.358429 val accuracy: 0.369000
lr 1.000000e-07 reg 1.000000e+05 train accuracy: 0.355694 val accuracy: 0.359000
lr 2.000000e-07 reg 1.000000e+04 train accuracy: 0.385245 val accuracy: 0.389000
lr 2.000000e-07 reg 2.000000e+04 train accuracy: 0.372082 val accuracy: 0.402000
lr 2.000000e-07 reg 3.000000e+04 train accuracy: 0.372837 val accuracy: 0.385000
lr 2.000000e-07 reg 4.000000e+04 train accuracy: 0.369531 val accuracy: 0.360000
lr 2.000000e-07 reg 5.000000e+04 train accuracy: 0.355163 val accuracy: 0.363000
lr 2.000000e-07 reg 6.000000e+04 train accuracy: 0.362143 val accuracy: 0.372000
lr 2.000000e-07 reg 7.000000e+04 train accuracy: 0.355918 val accuracy: 0.366000
lr 2.000000e-07 reg 8.000000e+04 train accuracy: 0.358776 val accuracy: 0.364000
lr 2.000000e-07 reg 1.000000e+05 train accuracy: 0.345694 val accuracy: 0.367000
lr 3.000000e-07 reg 1.000000e+04 train accuracy: 0.382204 val accuracy: 0.381000
lr 3.000000e-07 reg 2.000000e+04 train accuracy: 0.371265 val accuracy: 0.376000
lr 3.000000e-07 reg 3.000000e+04 train accuracy: 0.365857 val accuracy: 0.378000
lr 3.000000e-07 reg 4.000000e+04 train accuracy: 0.354429 val accuracy: 0.360000
lr 3.000000e-07 reg 5.000000e+04 train accuracy: 0.342653 val accuracy: 0.360000
lr 3.000000e-07 reg 6.000000e+04 train accuracy: 0.336592 val accuracy: 0.352000
lr 3.000000e-07 reg 7.000000e+04 train accuracy: 0.347449 val accuracy: 0.369000
lr 3.000000e-07 reg 8.000000e+04 train accuracy: 0.339347 val accuracy: 0.335000
lr 3.000000e-07 reg 1.000000e+05 train accuracy: 0.342000 val accuracy: 0.338000
lr 8.000000e-07 reg 1.000000e+04 train accuracy: 0.335673 val accuracy: 0.344000
lr 8.000000e-07 reg 2.000000e+04 train accuracy: 0.295980 val accuracy: 0.313000
lr 8.000000e-07 reg 3.000000e+04 train accuracy: 0.310388 val accuracy: 0.315000
lr 8.000000e-07 reg 4.000000e+04 train accuracy: 0.300673 val accuracy: 0.317000
lr 8.000000e-07 reg 5.000000e+04 train accuracy: 0.316265 val accuracy: 0.328000
lr 8.000000e-07 reg 6.000000e+04 train accuracy: 0.328816 val accuracy: 0.331000
lr 8.000000e-07 reg 7.000000e+04 train accuracy: 0.311735 val accuracy: 0.317000
lr 8.000000e-07 reg 8.000000e+04 train accuracy: 0.308204 val accuracy: 0.307000
lr 8.000000e-07 reg 1.000000e+05 train accuracy: 0.269041 val accuracy: 0.272000
lr 5.000000e-05 reg 1.000000e+04 train accuracy: 0.152347 val accuracy: 0.153000
lr 5.000000e-05 reg 2.000000e+04 train accuracy: 0.174878 val accuracy: 0.181000
lr 5.000000e-05 reg 3.000000e+04 train accuracy: 0.110510 val accuracy: 0.095000
lr 5.000000e-05 reg 4.000000e+04 train accuracy: 0.128429 val accuracy: 0.122000
lr 5.000000e-05 reg 5.000000e+04 train accuracy: 0.063388 val accuracy: 0.049000
lr 5.000000e-05 reg 6.000000e+04 train accuracy: 0.100265 val accuracy: 0.087000
lr 5.000000e-05 reg 7.000000e+04 train accuracy: 0.100265 val accuracy: 0.087000
lr 5.000000e-05 reg 8.000000e+04 train accuracy: 0.100265 val accuracy: 0.087000
lr 5.000000e-05 reg 1.000000e+05 train accuracy: 0.100265 val accuracy: 0.087000
best validation accuracy achieved during cross-validation: 0.402000

import matplotlib.pylab as pylab
pylab.rcParams['figure.figsize'] = 20, 20
pylab.rcParams["font.size"] = "25"
# Visualize the cross-validation results
import math
x_scatter = [math.log10(x) for x in results]
y_scatter = [math.log10(x) for x in results]

# plot training accuracy
marker_size = 200
colors = [results[x] for x in results]
plt.subplot(2, 1, 1)
plt.scatter(x_scatter, y_scatter, marker_size, c=colors)
plt.colorbar()
plt.xlabel('log learning rate')
plt.ylabel('log regularization strength')
plt.title('CIFAR-10 training accuracy')

# plot validation accuracy
colors = [results[x] for x in results] # default size of markers is 20
plt.subplot(2, 1, 2)
plt.scatter(x_scatter, y_scatter, marker_size, c=colors)
plt.colorbar()
plt.xlabel('log learning rate')
plt.ylabel('log regularization strength')
plt.title('CIFAR-10 validation accuracy')
plt.show() # Evaluate the best svm on test set
y_test_pred = best_svm.predict(X_test)
test_accuracy = np.mean(y_test == y_test_pred)
print('linear SVM on raw pixels final test set accuracy: %f' % test_accuracy)

linear SVM on raw pixels final test set accuracy: 0.376000

import matplotlib.pylab as pylab
pylab.rcParams['figure.figsize'] = 15, 5
pylab.rcParams["font.size"] = "17"
# Visualize the learned weights for each class.
# Depending on your choice of learning rate and regularization strength, these may
# or may not be nice to look at.
w = best_svm.W[:-1,:] # strip out the bias
w = w.reshape(32, 32, 3, 10)
w_min, w_max = np.min(w), np.max(w)
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
for i in range(10):
plt.subplot(2, 5, i + 1)

# Rescale the weights to be between 0 and 255
wimg = 255.0 * (w[:, :, :, i].squeeze() - w_min) / (w_max - w_min)
plt.imshow(wimg.astype('uint8'))
plt.axis('off')
plt.title(classes[i]) スポンサーリンク
スポンサーリンク

フォローする