# Stanford CS231n：Softmax Exercise

このExerciseの目的：

1. SVM(support vector machine：サポートベクターマシン)用完全ベクトル化loss function(損失関数)実装
4. 学習率と正則化強度の調整にvalidation set(バリデーションセット)を使う。
6. 最終的なlearned weights(学習済み重み)を可視化する。
スポンサーリンク

## Softmax exercise¶

まずは必要なモジュールをロードする。  import random
import numpy as np
import matplotlib.pyplot as plt

from __future__ import print_function

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'


def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500):
"""
Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
it for the linear classifier. These are the same steps as we used for the
SVM, but condensed to a single function.
"""
# Load the raw CIFAR-10 data
cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'

X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

# subsample the data
mask = list(range(num_training, num_training + num_validation))

# Preprocessing: reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape, -1))
X_val = np.reshape(X_val, (X_val.shape, -1))
X_test = np.reshape(X_test, (X_test.shape, -1))
X_dev = np.reshape(X_dev, (X_dev.shape, -1))

# Normalize the data: subtract the mean image
mean_image = np.mean(X_train, axis = 0)
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
X_dev -= mean_image

# add bias dimension and transform into columns
X_train = np.hstack([X_train, np.ones((X_train.shape, 1))])
X_val = np.hstack([X_val, np.ones((X_val.shape, 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape, 1))])
X_dev = np.hstack([X_dev, np.ones((X_dev.shape, 1))])

return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev

# Cleaning up variables to prevent loading data multiple times (which may cause memory issue)
try:
del X_train, y_train
del X_test, y_test
except:
pass

# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = get_CIFAR10_data()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)
print('dev data shape: ', X_dev.shape)
print('dev labels shape: ', y_dev.shape)

Train data shape:  (49000, 3073)
Train labels shape:  (49000,)
Validation data shape:  (1000, 3073)
Validation labels shape:  (1000,)
Test data shape:  (1000, 3073)
Test labels shape:  (1000,)
dev data shape:  (500, 3073)
dev labels shape:  (500,)

スポンサーリンク

## Softmax Classifier¶

ここからコードをcs231n/classifiers/softmax.pyに付け足していく。最初にnested loopsを持ったnaive softmax loss functionをsoftmax_loss_naive functionとしてsoftmax.py内に実装する。

# First implement the naive softmax loss function with nested loops.
# Open the file cs231n/classifiers/softmax.py and implement the
# softmax_loss_naive function.

from cs231n.classifiers.softmax import softmax_loss_naive
import time

# Generate a random softmax weight matrix and use it to compute the loss.
W = np.random.randn(3073, 10) * 0.0001
loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0)

# As a rough sanity check, our loss should be something close to -log(0.1).
print('loss: %f' % loss)
print('sanity check: %f' % (-np.log(0.1)))

loss: 2.311287
sanity check: 2.302585


# Complete the implementation of softmax_loss_naive and implement a (naive)
# version of the gradient that uses nested loops.
loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0)

# As we did for the SVM, use numeric gradient checking as a debugging tool.
f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 0.0)

# similar to SVM case, do another gradient check with regularization
loss, grad = softmax_loss_naive(W, X_dev, y_dev, 5e1)
f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 5e1)

numerical: -1.140735 analytic: -1.140735, relative error: 1.553361e-08
numerical: -1.663914 analytic: -1.663914, relative error: 1.056297e-08
numerical: -0.036534 analytic: -0.036534, relative error: 2.974485e-07
numerical: -1.156886 analytic: -1.156886, relative error: 3.574864e-09
numerical: -1.561361 analytic: -1.561361, relative error: 1.195498e-08
numerical: 2.316011 analytic: 2.316011, relative error: 1.257622e-08
numerical: 1.477111 analytic: 1.477111, relative error: 1.030068e-08
numerical: 0.084744 analytic: 0.084744, relative error: 1.284474e-07
numerical: 0.300592 analytic: 0.300592, relative error: 1.983670e-07
numerical: -2.503997 analytic: -2.503997, relative error: 1.598446e-08
numerical: -2.398485 analytic: -2.398485, relative error: 8.343111e-09
numerical: -0.923731 analytic: -0.923731, relative error: 2.517794e-08
numerical: 1.724362 analytic: 1.724362, relative error: 3.505197e-08
numerical: -0.696853 analytic: -0.696853, relative error: 4.032136e-08
numerical: -0.824816 analytic: -0.824817, relative error: 3.900513e-08
numerical: -3.303563 analytic: -3.303563, relative error: 1.310547e-08
numerical: -3.314236 analytic: -3.314236, relative error: 1.162209e-08
numerical: -1.850071 analytic: -1.850072, relative error: 1.929262e-08
numerical: 1.182281 analytic: 1.182282, relative error: 3.525947e-08
numerical: 0.849532 analytic: 0.849532, relative error: 4.928193e-08


softmax loss関数とグラディエントのナイーブ版を実装したので、今度は、softmax_loss_vectorized内にベクトル化版を実装する。

# Now that we have a naive implementation of the softmax loss function and its gradient,
# implement a vectorized version in softmax_loss_vectorized.
# The two versions should compute the same results, but the vectorized version should be
# much faster.
tic = time.time()
loss_naive, grad_naive = softmax_loss_naive(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('naive loss: %e computed in %fs' % (loss_naive, toc - tic))

from cs231n.classifiers.softmax import softmax_loss_vectorized
tic = time.time()
loss_vectorized, grad_vectorized = softmax_loss_vectorized(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic))

# As we did for the SVM, we use the Frobenius norm to compare the two versions
print('Loss difference: %f' % np.abs(loss_naive - loss_vectorized))

naive loss: 2.311287e+00 computed in 0.163708s
vectorized loss: 2.311287e+00 computed in 0.011355s
Loss difference: 0.000000


hyperparameters(ハイパーパラメーター)の調整次第で、分類精度(classification accuracy)は0.35以上になる。

# Use the validation set to tune hyperparameters (regularization strength and
# learning rate). You should experiment with different ranges for the learning
# rates and regularization strengths; if you are careful you should be able to
# get a classification accuracy of over 0.35 on the validation set.
from cs231n.classifiers import Softmax
results = {}
best_val = -1
best_softmax = None
learning_rates = [1e-7, 5e-7, 2e-6, 5e-4, 3e-3]
regularization_strengths = [5e4, 1e8, 2e7, 3e3, 1e6, 200]

################################################################################
# TODO:                                                                        #
# Use the validation set to set the learning rate and regularization strength. #
# This should be identical to the validation that you did for the SVM; save    #
# the best trained softmax classifer in best_softmax.                          #
################################################################################
for learning_rate in learning_rates:
for regularization_strength in regularization_strengths:
softmax = Softmax()
tic = time.time()
loss_hist = softmax.train(X_train, y_train, learning_rate=learning_rate,
reg=regularization_strength, num_iters=400, verbose=False)
toc = time.time()
print ('training took %fs' % (toc - tic))

y_train_pred = softmax.predict(X_train)
y_val_pred = softmax.predict(X_val)

y_train_accuracy = np.mean(y_train_pred == y_train)
y_val_accuracy = np.mean(y_val_pred == y_val)

results[(learning_rate, regularization_strength)] = (y_train_accuracy, y_val_accuracy)
if y_val_accuracy > best_val:
best_val = y_val_accuracy
best_softmax = softmax
################################################################################
#                              END OF YOUR CODE                                #
################################################################################

# Print out results.
for lr, reg in sorted(results):
train_accuracy, val_accuracy = results[(lr, reg)]
print('lr %e reg %e train accuracy: %f val accuracy: %f' % (
lr, reg, train_accuracy, val_accuracy))

print('best validation accuracy achieved during cross-validation: %f' % best_val)

training took 3.618501s

/root/cs231n.github.io/assignments/2018/assignment1/cs231n/classifiers/softmax.py:91: RuntimeWarning: divide by zero encountered in log
loss = -np.sum(np.log(exp_f_y/np.sum(exp_f, axis=1)))
/root/cs231n.github.io/assignments/2018/assignment1/cs231n/classifiers/softmax.py:100: RuntimeWarning: overflow encountered in double_scalars
loss += 0.5 * reg * np.sum(W*W)
/root/.pyenv/versions/3.6.5/envs/py365/lib/python3.6/site-packages/numpy/core/_methods.py:32: RuntimeWarning: overflow encountered in reduce
return umr_sum(a, axis, dtype, out, keepdims)
/root/cs231n.github.io/assignments/2018/assignment1/cs231n/classifiers/softmax.py:100: RuntimeWarning: overflow encountered in multiply
loss += 0.5 * reg * np.sum(W*W)
/root/cs231n.github.io/assignments/2018/assignment1/cs231n/classifiers/softmax.py:102: RuntimeWarning: overflow encountered in multiply
dW += reg*W
/root/.pyenv/versions/3.6.5/envs/py365/lib/python3.6/site-packages/numpy/core/_methods.py:26: RuntimeWarning: invalid value encountered in reduce
return umr_maximum(a, axis, None, out, keepdims)

training took 3.666402s
training took 3.659864s
training took 3.666498s
training took 3.636717s
training took 3.631186s
training took 3.645728s
training took 3.630206s
training took 3.598429s
training took 3.596047s
training took 3.613413s
training took 3.608009s
training took 3.655658s
training took 3.602936s
training took 3.641383s
training took 3.621818s
training took 4.549786s
training took 3.661821s
training took 3.597917s
training took 3.611806s
training took 3.627976s
training took 4.711145s
training took 3.606025s
training took 4.352435s
training took 3.603151s
training took 3.583408s
training took 3.610322s

/root/cs231n.github.io/assignments/2018/assignment1/cs231n/classifiers/softmax.py:81: RuntimeWarning: overflow encountered in subtract
f -= np.max(f, axis=1, keepdims=True) #for stability
/root/cs231n.github.io/assignments/2018/assignment1/cs231n/classifiers/softmax.py:81: RuntimeWarning: invalid value encountered in subtract
f -= np.max(f, axis=1, keepdims=True) #for stability

training took 3.609238s
training took 3.591437s
training took 4.364637s
lr 1.000000e-07 reg 2.000000e+02 train accuracy: 0.188102 val accuracy: 0.189000
lr 1.000000e-07 reg 3.000000e+03 train accuracy: 0.202633 val accuracy: 0.184000
lr 1.000000e-07 reg 5.000000e+04 train accuracy: 0.280878 val accuracy: 0.287000
lr 1.000000e-07 reg 1.000000e+06 train accuracy: 0.266551 val accuracy: 0.279000
lr 1.000000e-07 reg 2.000000e+07 train accuracy: 0.102327 val accuracy: 0.085000
lr 1.000000e-07 reg 1.000000e+08 train accuracy: 0.100265 val accuracy: 0.087000
lr 5.000000e-07 reg 2.000000e+02 train accuracy: 0.265816 val accuracy: 0.271000
lr 5.000000e-07 reg 3.000000e+03 train accuracy: 0.294347 val accuracy: 0.301000
lr 5.000000e-07 reg 5.000000e+04 train accuracy: 0.327490 val accuracy: 0.351000
lr 5.000000e-07 reg 1.000000e+06 train accuracy: 0.206020 val accuracy: 0.210000
lr 5.000000e-07 reg 2.000000e+07 train accuracy: 0.100265 val accuracy: 0.087000
lr 5.000000e-07 reg 1.000000e+08 train accuracy: 0.100265 val accuracy: 0.087000
lr 2.000000e-06 reg 2.000000e+02 train accuracy: 0.335490 val accuracy: 0.335000
lr 2.000000e-06 reg 3.000000e+03 train accuracy: 0.380041 val accuracy: 0.372000
lr 2.000000e-06 reg 5.000000e+04 train accuracy: 0.302939 val accuracy: 0.322000
lr 2.000000e-06 reg 1.000000e+06 train accuracy: 0.098633 val accuracy: 0.094000
lr 2.000000e-06 reg 2.000000e+07 train accuracy: 0.100265 val accuracy: 0.087000
lr 2.000000e-06 reg 1.000000e+08 train accuracy: 0.100265 val accuracy: 0.087000
lr 5.000000e-04 reg 2.000000e+02 train accuracy: 0.208245 val accuracy: 0.226000
lr 5.000000e-04 reg 3.000000e+03 train accuracy: 0.071878 val accuracy: 0.089000
lr 5.000000e-04 reg 5.000000e+04 train accuracy: 0.100265 val accuracy: 0.087000
lr 5.000000e-04 reg 1.000000e+06 train accuracy: 0.100265 val accuracy: 0.087000
lr 5.000000e-04 reg 2.000000e+07 train accuracy: 0.100265 val accuracy: 0.087000
lr 5.000000e-04 reg 1.000000e+08 train accuracy: 0.100265 val accuracy: 0.087000
lr 3.000000e-03 reg 2.000000e+02 train accuracy: 0.143449 val accuracy: 0.128000
lr 3.000000e-03 reg 3.000000e+03 train accuracy: 0.100265 val accuracy: 0.087000
lr 3.000000e-03 reg 5.000000e+04 train accuracy: 0.100265 val accuracy: 0.087000
lr 3.000000e-03 reg 1.000000e+06 train accuracy: 0.100265 val accuracy: 0.087000
lr 3.000000e-03 reg 2.000000e+07 train accuracy: 0.100265 val accuracy: 0.087000
lr 3.000000e-03 reg 1.000000e+08 train accuracy: 0.100265 val accuracy: 0.087000
best validation accuracy achieved during cross-validation: 0.372000

# evaluate on test set
# Evaluate the best softmax on test set
y_test_pred = best_softmax.predict(X_test)
test_accuracy = np.mean(y_test == y_test_pred)
print('softmax on raw pixels final test set accuracy: %f' % (test_accuracy, ))

softmax on raw pixels final test set accuracy: 0.373000

import matplotlib.pylab as pylab
pylab.rcParams['figure.figsize'] = 15, 5
pylab.rcParams["font.size"] = "17"
# Visualize the learned weights for each class
w = best_softmax.W[:-1,:] # strip out the bias
w = w.reshape(32, 32, 3, 10)

w_min, w_max = np.min(w), np.max(w)

classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
for i in range(10):
plt.subplot(2, 5, i + 1)

# Rescale the weights to be between 0 and 255
wimg = 255.0 * (w[:, :, :, i].squeeze() - w_min) / (w_max - w_min)
plt.imshow(wimg.astype('uint8'))
plt.axis('off')
plt.title(classes[i]) スポンサーリンク
スポンサーリンク

フォローする