Stanford/CS231n/assignment1の最後、Image features exerciseをやる。
Image features exercise¶
import random
import numpy as np
from cs231n.data_utils import load_CIFAR10
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
# for auto-reloading extenrnal modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2
Load data¶
前回のエクササイズ同様、CIFAR-10データをディスクからロードする。
from cs231n.features import color_histogram_hsv, hog_feature
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):
# Load the raw CIFAR-10 data
cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
# Subsample the data
mask = range(num_training, num_training + num_validation)
X_val = X_train[mask]
y_val = y_train[mask]
mask = range(num_training)
X_train = X_train[mask]
y_train = y_train[mask]
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]
return X_train, y_train, X_val, y_val, X_test, y_test
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
Extract Features¶
HSVカラースペースの色相チャネルを使ってカラーヒストグラムを計算するだけではなく、各画像のHistogram of Oriented Gradients (HOG/勾配方向ヒストグラム)も計算する。各画像の最終的な特徴ベクトルは、HOGとカラーヒストグラム特徴ベクトルによって形成する。大雑把に言って、HOGは色情報を無視する一方で画像のテクスチャは捉え、カラーヒストグラムはテクスチャを無視しながら入力画像の色を表現する。結果として、両方を一緒に使うことで、どちらか一方を個別に使うよりもパフォーマンス工場が望めるだろうと予測する。この推論を証明することは、ボーナスセクションにトライするにはもってこいだろう。
hog_featureとcolor_histogram_hsvの両関数は、単一画像で機能し、その画像の特徴ベクトルを返す。extract_features関数は画像セットと特徴関数リストを受け取って画像毎に各特徴関数を実行して、各列が単一画像の全特徴ベクトルの連結である行列に結果を保存する。
from cs231n.features import *
num_color_bins = 10 # Number of bins in the color histogram
feature_fns = [hog_feature, lambda img: color_histogram_hsv(img, nbin=num_color_bins)]
X_train_feats = extract_features(X_train, feature_fns, verbose=True)
X_val_feats = extract_features(X_val, feature_fns)
X_test_feats = extract_features(X_test, feature_fns)
# Preprocessing: Subtract the mean feature
mean_feat = np.mean(X_train_feats, axis=0, keepdims=True)
X_train_feats -= mean_feat
X_val_feats -= mean_feat
X_test_feats -= mean_feat
# Preprocessing: Divide by standard deviation. This ensures that each feature
# has roughly the same scale.
std_feat = np.std(X_train_feats, axis=0, keepdims=True)
X_train_feats /= std_feat
X_val_feats /= std_feat
X_test_feats /= std_feat
# Preprocessing: Add a bias dimension
X_train_feats = np.hstack([X_train_feats, np.ones((X_train_feats.shape[0], 1))])
X_val_feats = np.hstack([X_val_feats, np.ones((X_val_feats.shape[0], 1))])
X_test_feats = np.hstack([X_test_feats, np.ones((X_test_feats.shape[0], 1))])
Train SVM on features¶
過去の宿題で書いたmulticlass SVMコードを使って、上で抽出した特徴をベースにSVM(サポートベクターマシン)を訓練する。こうすることで直接的にraw pixels(ローピクセル)を基にしてSVMを訓練するより良好な結果が得られるはず。
# Use the validation set to tune the learning rate and regularization strength
from cs231n.classifiers.linear_classifier import LinearSVM
import time
learning_rates = [1e-9, 1e-8, 1e-7]
regularization_strengths = [1e5, 1e6, 1e7]
results = {}
best_val = -1
best_svm = None
################################################################################
# TODO: #
# Use the validation set to set the learning rate and regularization strength. #
# This should be identical to the validation that you did for the SVM; save #
# the best trained classifer in best_svm. You might also want to play #
# with different numbers of bins in the color histogram. If you are careful #
# you should be able to get accuracy of near 0.44 on the validation set. #
################################################################################
for learning_rate in learning_rates:
for regularization_strength in regularization_strengths:
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train_feats, y_train, learning_rate=learning_rate,
reg=regularization_strength, num_iters=1500, verbose=False)
toc = time.time()
print ('training took %fs' % (toc - tic))
y_train_feats_pred = svm.predict(X_train_feats)
y_val_feats_pred = svm.predict(X_val_feats)
y_train_feats_accuracy = np.mean(y_train_feats_pred == y_train)
y_val_feats_accuracy = np.mean(y_val_feats_pred == y_val)
results[(learning_rate, regularization_strength)] = (y_train_feats_accuracy, y_val_feats_accuracy)
if y_val_feats_accuracy > best_val:
best_val = y_val_feats_accuracy
best_svm = svm
################################################################################
# END OF YOUR CODE #
################################################################################
# Print out results.
for lr, reg in sorted(results):
train_accuracy, val_accuracy = results[(lr, reg)]
print ('lr %e reg %e train accuracy: %f val accuracy: %f' % (
lr, reg, train_accuracy, val_accuracy))
print ('best validation accuracy achieved during cross-validation: %f' % best_val)
# Evaluate your trained SVM on the test set
y_test_pred = best_svm.predict(X_test_feats)
test_accuracy = np.mean(y_test == y_test_pred)
print (test_accuracy)
# An important way to gain intuition about how an algorithm works is to
# visualize the mistakes that it makes. In this visualization, we show examples
# of images that are misclassified by our current system. The first column
# shows images that our system labeled as "plane" but whose true label is
# something other than "plane".
plt.rcParams['figure.figsize'] = (20.0, 16.0)
plt.rcParams["font.size"] = "16"
examples_per_class = 8
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
for cls, cls_name in enumerate(classes):
idxs = np.where((y_test != cls) & (y_test_pred == cls))[0]
idxs = np.random.choice(idxs, examples_per_class, replace=False)
for i, idx in enumerate(idxs):
plt.subplot(examples_per_class, len(classes), i * len(classes) + cls + 1)
plt.imshow(X_test[idx].astype('uint8'))
plt.axis('off')
if i == 0:
plt.title(cls_name)
plt.show()
Neural Network on image features¶
assignment1の前半部分で、two-layer neural network(二層ニューラルネットワーク)を生ピクセルで訓練すると線形分類器をローピクセルで訓練するよりも良好な分類パフォーマンスを得られることを目撃している。このノートブックでは、画像特徴で線形分類器を訓練することが、ローピクセルで訓練するよりも高パフォーマンスを得られることを見ている。
完璧を期すために、ニューラルネットワークも画像特徴を使って訓練する必要がある。このアプローチは、過去の全てのアプローチを数字の上で上回るはずだ。テストセットで55%の分類精度を楽に達成することができるはずで、ベストモデルは大体60%の分類精度を達成している。このノートブックで、画像特徴の線形分類器が生ピクセルの線形分類器をアウトパフォームするのを目の当たりにしている。
print (X_train_feats.shape)
print (X_val_feats.shape)
print (X_test_feats.shape)
from cs231n.classifiers.neural_net import TwoLayerNet
################################################################################
# TODO: Train a two-layer neural network on image features. You may want to #
# cross-validate various parameters as in previous sections. Store your best #
# model in the best_net variable. #
################################################################################
hidden_size = 500
learning_rates = [0.7] # [0, 0.1, 0.3, 1, 3, 10]
regs = [3e-4] # [1e-4, 3e-4, 1e-3, 3e-3, 1e-2]
input_size = X_train_feats.shape[1]
num_classes = 10
best_net = None
best_val = -1
best_learning_rate = None
best_reg = None
for learning_rate in learning_rates:
for reg in regs:
net = TwoLayerNet(input_size, hidden_size, num_classes)
# Train the network
stats = net.train(X_train_feats, y_train, X_val_feats, y_val,
num_iters=2000, batch_size=200,
learning_rate=learning_rate, learning_rate_decay=0.95,
reg=reg, verbose=True)
# Predict on the validation set
val_acc = (net.predict(X_val_feats) == y_val).mean()
if val_acc > best_val:
best_val = val_acc
best_net = net
best_reg = reg
best_learning_rate = learning_rate
print ('Best validation accuracy so far: %f' % (best_val))
print ('with learning rate: %e, reg %e' % (best_learning_rate, best_reg))
print ('best validation accuracy %f' % (best_val))
################################################################################
# END OF YOUR CODE #
################################################################################
# Plot the loss function and train / validation accuracies
plt.rcParams['figure.figsize'] = (25.0, 25.0)
plt.rcParams["font.size"] = "22"
plt.subplot(2, 1, 1)
plt.plot(stats['loss_history'])
plt.title('Loss history')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.subplot(2, 1, 2)
plt.plot(stats['train_acc_history'], label='train')
plt.plot(stats['val_acc_history'], label='val')
plt.title('Classification accuracy history')
plt.xlabel('Epoch')
plt.ylabel('Clasification accuracy')
plt.legend(('Training Accuracy', 'Validation Accuracy'), loc='lower right')
plt.show()
# Run your neural net classifier on the test set. You should be able to
# get more than 55% accuracy.
test_acc = (net.predict(X_test_feats) == y_test).mean()
print (test_acc)
自分なりにハイパーパラメータをいじってはみたが、思ったような結果は得られなかった。