# スタンフォード/CS131/宿題6-6 PCA次元削減・散乱行列

スポンサーリンク

## Fisherface: Linear Discriminant Analysis¶

LDA is a linear transformation method like PCA, but with a different goal. The main difference is that LDA takes information from the labels of the examples to maximize the separation of the different classes in the transformed space.

Therefore, LDA is not totally unsupervised since it requires labels. PCA is fully unsupervised.

In summary(要約):

• PCA preserves maximum variance in the projected space.
PCAは射影空間の最大分散を保存する。

• LDA preserves discrimination between classes in the project space. We want to maximize scatter between classes and minimize scatter intra class.
LDAは投影空間のクラスの区別を保存する。クラス間散乱を最大化してクラス内散乱を最小化したい。

import numpy as np
from skimage import io
import scipy
import scipy.linalg
from collections import defaultdict
import matplotlib.pyplot as plt
from matplotlib import rc

%matplotlib inline
plt.rcParams['figure.figsize'] = (15.0, 12.0) # set default size of plots
plt.rcParams["font.size"] = "17"
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

X_train, y_train, classes_train = load_dataset('faces', train=True, as_gray=True)
X_test, y_test, classes_test = load_dataset('faces', train=False, as_gray=True)
assert classes_train == classes_test
classes = classes_train
num_classes = len(classes)

# Flatten the image data into rows
# we now have one 4096 dimensional featue vector for each example
X_train = np.reshape(X_train, (X_train.shape, -1))
X_test = np.reshape(X_test, (X_test.shape, -1))


### Dimensionality Reduction via PCA¶

To apply LDA, we need $D < N$. Since in our dataset, $N = 800$ and $D = 4096$, we first need to reduce the number of dimensions of the images using PCA.
LDA適用には$D < N$である必要がある。ここで使用するデータセットは、$N = 800$, $D = 4096$なので、最初にPCAを使って画像の次元数をを削減する必要がある。

from features import PCA

N = X_train.shape
c = num_classes

pca = PCA()
pca.fit(X_train)
X_train_pca = pca.transform(X_train, N-c)
X_test_pca = pca.transform(X_test, N-c)


### Scatter matrices¶

We first need to compute the within-class scatter matrix:

$$S_W = \sum_{i=1}^c S_i$$
where $S_i = \sum_{x_k \in Y_i} (x_k – \mu_i)(x_k – \mu_i)^T$ is the scatter of class $i$.
$S_i = \sum_{x_k \in Y_i} (x_k – \mu_i)(x_k – \mu_i)^T$は、クラス$i$の散乱である。

We then need to compute the between-class scatter matrix:

$$S_B = \sum_{i=1}^c N_i (\mu_i – \mu)(\mu_i – \mu)^T$$
where $N_i$ is the number of examples in class $i$.
$N_i$は、クラス$i$の標本数である。

class LDA(object):
"""Class implementing Principal component analysis (PCA).
Steps to perform PCA on a matrix of features X:
1. Fit the training data using method fit (either with eigen decomposition of SVD)
2. Project X into a lower dimensional space using method transform
3. Optionally reconstruct the original X (as best as possible) using method reconstruct
"""
def __init__(self):
self.W_lda = None

def fit(self, X, y):
"""Fit the training data X using the labels y.
Will store the projection matrix in self.W_lda.
Args:
X: numpy array of shape (N, D). Each of the N rows represent a data point.
Each data point contains D features.
y: numpy array of shape (N,) containing labels of examples in X
"""
N, D = X.shape
scatter_between = self._between_class_scatter(X, y)
scatter_within = self._within_class_scatter(X, y)
e_vecs = None
# Solve generalized eigenvalue problem for matrices scatter_between and scatter_within
# Use scipy.linalg.eig instead of numpy's eigenvalue solver.
# Don't forget to sort the values and vectors in descending order.
e_vals,e_vecs = scipy.linalg.eig(scatter_between,scatter_within)
idx = np.argsort(-e_vals)
e_vecs = e_vecs[:, idx]
self.W_lda = e_vecs
# Check that the shape of self.W_lda is correct
assert self.W_lda.shape == (D, D)
# Each column of self.W_lda should have norm 1 (each one is an eigenvector)
for i in range(D):
assert np.allclose(np.linalg.norm(self.W_lda[:, i]), 1.0)

def _within_class_scatter(self, X, y):
"""Compute the covariance matrix of each class, and sum over the classes.
For every label i, we have:
- X_i: matrix of examples with labels i
- S_i: covariance matrix of X_i (per class covariance matrix for class i)
The formula for covariance matrix is: X_centered^T X_centered
where X_centered is the matrix X with mean 0 for each feature.
Our result scatter_within is the sum of all the S_i
Args:
X: numpy array of shape (N, D) containing N examples with D features each
y: numpy array of shape (N,), labels of examples in X
Returns:
scatter_within: numpy array of shape (D, D), sum of covariance matrices of each label
"""
_, D = X.shape
assert X.shape == y.shape
scatter_within = np.zeros((D, D))
for i in np.unique(y):
# Get the covariance matrix for class i, and add it to scatter_within
X_i = X[y == i]
X_centered = X_i - np.mean(X_i, axis=0)
S_i = X_centered.T.dot(X_centered)
scatter_within += S_i
return scatter_within

def _between_class_scatter(self, X, y):
"""Compute the covariance matrix as if each class is at its mean.
For every label i, we have:
- X_i: matrix of examples with labels i
- mu_i: mean of X_i.
Our result scatter_between is the covariance matrix of X where we replaced every
example labeled i with mu_i.
Args:
X: numpy array of shape (N, D) containing N examples with D features each
y: numpy array of shape (N,), labels of examples in X
Returns:
scatter_between: numpy array of shape (D, D)
"""
_, D = X.shape
assert X.shape == y.shape
scatter_between = np.zeros((D, D))
mu = X.mean(axis=0)
for i in np.unique(y):
X_i = X[y == i]
N_i = X_i.shape
mu_i = np.mean(X_i, axis=0)
scatter_between+=N_i*(mu_i-mu).T.dot(mu_i-mu)
return scatter_between

lda = LDA()
# Compute within-class scatter matrix
S_W = lda._within_class_scatter(X_train_pca, y_train)
print(S_W.shape)

(784, 784)

# Compute between-class scatter matrix
S_B = lda._between_class_scatter(X_train_pca, y_train)
print(S_B.shape)

(784, 784)


スポンサーリンク
スポンサーリンク

フォローする