# NumPy, PyTorch(CPU), PyTorch(GPU)の速度比較

スポンサーリンク

## Simple Neural Network¶

import matplotlib.pyplot as plt
import numpy as np
import IPython.display as ipd  # for display and clear_output
import time  # for sleep and time()

%matplotlib inline
plt.rcParams.update({'font.size': 18, 'font.family': 'STIXGeneral', 'mathtext.fontset': 'stix'})

# Make some training data
n = 20
Xtrain = np.linspace(0., 20.0, n).reshape((n,1)) - 10
Ttrain = 0.2 + 0.05 * (Xtrain+10) + 0.4 * np.sin(Xtrain+10) + 0.2 * np.random.normal(size=(n,1))

# Make some testing data
Xtest = Xtrain + 0.1*np.random.normal(size=(n,1))
Ttest = 0.2 + 0.05 * (Xtest+10) + 0.4 * np.sin(Xtest+10) + 0.2 * np.random.normal(size=(n,1))

Xtrain = Xtrain.astype(np.float32)
Ttrain = Ttrain.astype(np.float32)
Xtest = Xtest.astype(np.float32)
Ttest = Ttest.astype(np.float32)

plt.rcParams['figure.figsize'] = 14, 7
plt.rcParams["font.size"] = "17"
plt.plot(Xtrain, Ttrain, label='Training Data')
plt.plot(Xtest, Ttest, label='Testing Data')
plt.legend();

スポンサーリンク

## Numpy実装¶

def nn_numpy(Xtrain, Ttrain, Xtest, Ttest, nHiddens=10, rhoh=0.1, rhoo=0.1, nReps=50000, graphics=False):

nSamples = Xtrain.shape[0]
nOutputs = Ttrain.shape[1]

rh = rhoh / (nSamples*nOutputs)
ro = rhoo / (nSamples*nOutputs)

startTime = time.time()

# Initialize weights to uniformly distributed values between small normally-distributed between -0.1 and 0.1
V = 0.1*2*(np.random.uniform(size=(1+1,nHiddens))-0.5).astype(Xtrain.dtype)
W = 0.1*2*(np.random.uniform(size=(1+nHiddens,nOutputs))-0.5).astype(Xtrain.dtype)

# collect training and testing errors for plotting
errorTrace = np.zeros((nReps,2))

if graphics:
fig = plt.figure(figsize=(18,18))

for reps in range(nReps):

# Forward pass on training data
Z = np.tanh(Xtrain @ V[1:,:] + V[0,:])
Y = Z @ W[1:,:] + W[0,:]

# Error in output
error = Ttrain - Y

# Backward pass - the backpropagation and weight update steps
vDelta = ( error @ W[1:,:].T) * (1-Z**2)
V[1:, :] += rh * Xtrain.T @ vDelta
V[0, :] += rh * np.sum(vDelta, 0)

W[1:, :] += ro * Z.T @ error
W[0, :] += ro * np.sum(error, 0)

# error traces for plotting
errorTrace[reps,0] = np.sqrt(np.mean((error**2)))
Ytest = np.tanh(Xtest @ V[1:, :] + V[0, :]) @ W[1:, :] + W[0, :]  #!! Forward pass in one line
errorTrace[reps,1] = np.sqrt(np.mean((Ytest-Ttest)**2))

if graphics and (reps % 1000 == 0 or reps == nReps-1):

plt.clf()
plt.subplot(3,1,1)
plt.plot(errorTrace[:reps+1,:])
plt.ylim(0,0.7)
plt.xlabel('Epochs')
plt.ylabel('RMSE')
plt.legend(('Train','Test'),loc='best')

plt.subplot(3,1,2)
plt.plot(Xtrain, Ttrain, 'o-',
Xtest, Ttest, 'o-',
Xtest, Ytest, 'o-')
plt.xlim(-10,10)
plt.legend(('Training','Testing','Model'),loc='best')
plt.xlabel('$x$')
plt.ylabel('Actual and Predicted $f(x)$')

plt.subplot(3,1,3)
plt.plot(Xtrain, Z)
plt.ylim(-1.1,1.1)
plt.xlabel('$x$')
plt.ylabel('Hidden Unit Outputs ($z$)');

ipd.clear_output(wait=True)
ipd.display(fig)

endTime = time.time()

if graphics:
ipd.clear_output(wait=True)

return errorTrace, endTime - startTime

errorTrace, _ = nn_numpy(Xtrain, Ttrain, Xtest, Ttest, nHiddens=10, rhoh=0.5, rhoo=0.1, nReps=50000, graphics=True)


_, seconds = nn_numpy(Xtrain, Ttrain, Xtest, Ttest, nHiddens=1000, rhoh=0.01, rhoo=0.001, nReps=500000, graphics=False)
seconds

952.2789857387543
スポンサーリンク

## Pytorch¶

import torch

dtype = torch.FloatTensor
# dtype = torch.cuda.FloatTensor # Uncomment this to run on GPU
dtype

torch.FloatTensor
Xtrain_torch = torch.from_numpy(Xtrain)

Xtrain_torch.type()

'torch.FloatTensor'
# Make some training data
n = 20

# Xtrain = np.linspace(0.,20.0,n).reshape((n,1)) - 10
# Ttrain = 0.2 + 0.05 * (Xtrain+10) + 0.4 * np.sin(Xtrain+10) + 0.2 * np.random.normal(size=(n,1))
Xtrain_torch = torch.linspace(0.,20.0,n).view((n,1)).type(dtype) - 10
Ttrain_torch = 0.2 + 0.05 * (Xtrain_torch+10) + 0.4 * torch.sin(Xtrain_torch+10) + 0.2 * torch.randn((n,1)).type(dtype)

# Make some testing data
# Xtest = Xtrain + 0.1*np.random.normal(size=(n,1))
# Ttest = 0.2 + 0.05 * (Xtest+10) + 0.4 * np.sin(Xtest+10) + 0.2 * np.random.normal(size=(n,1))
Xtest_torch = Xtrain_torch + 0.1 * torch.randn((n,1)).type(dtype)
Ttest_torch = 0.2 + 0.05 * (Xtest_torch+10) + 0.4 * torch.sin(Xtest_torch+10) + 0.2 * torch.randn((n,1)).type(dtype)


Xtrain_torch = torch.from_numpy(Xtrain)
Ttrain_torch = torch.from_numpy(Ttrain)

Xtest_torch = torch.from_numpy(Xtest)
Ttest_torch = torch.from_numpy(Ttest)


いったんpytorchがスカラー型をインクルードすればこれは必要ないかもしれない。

def torch_rms(error):
m = torch.mean(error**2, 0, keepdim=True)
mall = torch.mean(m, 1, keepdim=True)

dt = torch.FloatTensor
a = torch.ones(5,5).type(dt)
b = a+1.2
print(a)
print(b)
torch_rms(a-b)

tensor([[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]])
tensor([[2.2000, 2.2000, 2.2000, 2.2000, 2.2000],
[2.2000, 2.2000, 2.2000, 2.2000, 2.2000],
[2.2000, 2.2000, 2.2000, 2.2000, 2.2000],
[2.2000, 2.2000, 2.2000, 2.2000, 2.2000],
[2.2000, 2.2000, 2.2000, 2.2000, 2.2000]])

tensor([[1.2000]])
dt = torch.cuda.FloatTensor
a = torch.ones(5,5).type(dt)
b = a+1.2
print(a)
print(b)
torch_rms(a-b)

tensor([[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]], device='cuda:0')
tensor([[2.2000, 2.2000, 2.2000, 2.2000, 2.2000],
[2.2000, 2.2000, 2.2000, 2.2000, 2.2000],
[2.2000, 2.2000, 2.2000, 2.2000, 2.2000],
[2.2000, 2.2000, 2.2000, 2.2000, 2.2000],
[2.2000, 2.2000, 2.2000, 2.2000, 2.2000]], device='cuda:0')

tensor([[1.2000]], device='cuda:0')
def nn_torch(Xtrain, Ttrain, Xtest, Ttest, nHiddens=10, rhoh=0.1, rhoo=0.1, nReps=50000, graphics=False):

dtype = Xtrain.type()  # if data is on GPU, allocate network variables on GPU, too

nSamples = Xtrain.shape[0]
nOutputs = Ttrain.shape[1]
# nSamples = Xtrain.size(0)
# nOutputs = Ttrain.size(1)

rh = rhoh / (nSamples*nOutputs)
ro = rhoo / (nSamples*nOutputs)

startTime = time.time()

# Initialize weights to uniformly distributed values between small normally-distributed between -0.1 and 0.1
# V = 0.1*2*(np.random.uniform(size=(1+1,nHiddens))-0.5)
# W = 0.1*2*(np.random.uniform(size=(1+nHiddens,nOutputs))-0.5)
V = 0.1*2*(torch.rand(1+1,nHiddens)-0.5).type(dtype)
W = 0.1*2*(torch.rand(1+nHiddens,nOutputs)-0.5).type(dtype)

# collect training and testing errors for plotting
# errorTrace = np.zeros((nReps,2))
errorTrace = torch.zeros((nReps,2)).type(dtype)

if graphics:
fig = plt.figure(figsize=(18,18))

for reps in range(nReps):

# Forward pass on training data.  No change going from numpy to pytorch!
Z = np.tanh(Xtrain @ V[1:,:] + V[0,:])
Y = Z @ W[1:,:] + W[0,:]

# Error in output
error = Ttrain - Y

# Backward pass - the backpropagation and weight update steps. Only change is in transpose and sum
# vDelta = ( error @ W[1:,:].T) * (1-Z**2)
# V[1:, :] += rh * Xtrain.T @ vDelta
# V[0, :] += rh * np.sum(vDelta, 0)
vDelta = ( error @ W[1:,:].t() * (1-Z**2) )
V[1:, :] += rh * Xtrain.t() @ vDelta
V[0, :] += rh * torch.sum(vDelta, 0, keepdim=True)  # to prevent conversion to scalar. Necessary?

# W[1:, :] += ro * Z.T @ error
# W[0, :] += ro * np.sum(error, 0)False,
W[1:, :] += ro * Z.t() @ error
W[0, :] += ro * torch.sum(error, 0, keepdim=True)

# error traces for plotting
# errorTrace[reps,0] = np.sqrt(np.mean((error**2)))
# Ytest = np.tanh(Xtest @ V[1:, :] + V[0, :]) @ W[1:, :] + W[0, :]  #!! Forward pass in one line
# errorTrace[reps,1] = np.sqrt(np.mean((Ytest-Ttest)**2))
errorTrace[reps,0:1] = torch_rms(error)
Ytest = torch.tanh(Xtest @ V[1:, :] + V[0, :]) @ W[1:, :] + W[0, :]  #!! Forward pass in one line
errorTrace[reps,1:2] = torch_rms(Ytest - Ttest)

if graphics and (reps % 1000 == 0 or reps == nReps-1):
plt.clf()
plt.subplot(3,1,1)
# plt.plot(errorTrace[:reps+1,:])
if errorTrace.is_cuda:
plt.plot(errorTrace[:reps+1, :].cpu().numpy())
else:
plt.plot(errorTrace[:reps+1, :].numpy())
plt.ylim(0,0.7)
plt.xlabel('Epochs')
plt.ylabel('RMSE')
plt.legend(('Train','Test'),loc='best')

plt.subplot(3,1,2)
# plt.plot(Xtrain, Ttrain, 'o-', Xtest, Ttest, 'o-', Xtest, Ytest, 'o-')
if Xtrain.is_cuda:
plt.plot(Xtrain.cpu().numpy(), Ttrain.cpu().numpy(),'o-',
Xtest.cpu().numpy(), Ttest.cpu().numpy(), 'o-',
Xtest.cpu().numpy(), Ytest.cpu().numpy(), 'o-')
else:
plt.plot(Xtrain.numpy(), Ttrain.numpy(),'o-',
Xtest.numpy(), Ttest.numpy(), 'o-',
Xtest.numpy(), Ytest.numpy(), 'o-')
plt.xlim(-10,10)
plt.legend(('Training','Testing','Model'),loc='best')
plt.xlabel('$x$')
plt.ylabel('Actual and Predicted $f(x)$')

plt.subplot(3,1,3)
# plt.plot(Xtrain, Z)
if Xtrain.is_cuda:
plt.plot(Xtrain.cpu().numpy(), Z.cpu().numpy())
else:
plt.plot(Xtrain.numpy(), Z.numpy())
plt.ylim(-1.1,1.1)
plt.xlabel('$x$')
plt.ylabel('Hidden Unit Outputs ($z$)');

ipd.clear_output(wait=True)
ipd.display(fig)

endTime = time.time()

if graphics:
ipd.clear_output(wait=True)

return errorTrace, endTime - startTime

errorTrace, _ = nn_torch(Xtrain_torch, Ttrain_torch, Xtest_torch, Ttest_torch,
nHiddens=10, rhoh=0.1, rhoo=0.1, nReps=50000, graphics=True)

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-15-0f7e5d50646e> in <module>()
1 errorTrace, _ = nn_torch(Xtrain_torch, Ttrain_torch, Xtest_torch, Ttest_torch,
----> 2                          nHiddens=10, rhoh=0.1, rhoo=0.1, nReps=50000, graphics=True)

<ipython-input-14-41bdb63b2125> in nn_torch(Xtrain, Ttrain, Xtest, Ttest, nHiddens, rhoh, rhoo, nReps, graphics)
41         vDelta = ( error @ W[1:,:].t() * (1-Z**2) )
42         V[1:, :] += rh * Xtrain.t() @ vDelta
---> 43         V[0, :] += rh * torch.sum(vDelta, 0, keepdim=True)  # to prevent conversion to scalar. Necessary?
44
45         # W[1:, :] += ro * Z.T @ error

RuntimeError: output with shape [10] doesn't match the broadcast shape [1, 10]
<Figure size 1296x1296 with 0 Axes>
def nn_torch(Xtrain, Ttrain, Xtest, Ttest, nHiddens=10, rhoh=0.1, rhoo=0.1, nReps=50000, graphics=False):

dtype = Xtrain.type()  # if data is on GPU, allocate network variables on GPU, too

nSamples = Xtrain.shape[0]
nOutputs = Ttrain.shape[1]
# nSamples = Xtrain.size(0)
# nOutputs = Ttrain.size(1)

rh = rhoh / (nSamples*nOutputs)
ro = rhoo / (nSamples*nOutputs)

startTime = time.time()

# Initialize weights to uniformly distributed values between small normally-distributed between -0.1 and 0.1
# V = 0.1*2*(np.random.uniform(size=(1+1,nHiddens))-0.5)
# W = 0.1*2*(np.random.uniform(size=(1+nHiddens,nOutputs))-0.5)
V = 0.1*2*(torch.rand(1+1,nHiddens)-0.5).type(dtype)
W = 0.1*2*(torch.rand(1+nHiddens,nOutputs)-0.5).type(dtype)

# collect training and testing errors for plotting
# errorTrace = np.zeros((nReps,2))
errorTrace = torch.zeros((nReps,2)).type(dtype)

if graphics:
fig = plt.figure(figsize=(18,18))

for reps in range(nReps):

# Forward pass on training data.  No change going from numpy to pytorch!
Z = np.tanh(Xtrain @ V[1:,:] + V[0,:])
Y = Z @ W[1:,:] + W[0,:]

# Error in output
error = Ttrain - Y

# Backward pass - the backpropagation and weight update steps. Only change is in transpose and sum
# vDelta = ( error @ W[1:,:].T) * (1-Z**2)
# V[1:, :] += rh * Xtrain.T @ vDelta
# V[0, :] += rh * np.sum(vDelta, 0)
vDelta = ( error @ W[1:,:].t() * (1-Z**2) )
V[1:, :] += rh * Xtrain.t() @ vDelta
V[0:, :] += rh * torch.sum(vDelta, 0, keepdim=True)  # to prevent conversion to scalar. Necessary?

# W[1:, :] += ro * Z.T @ error
# W[0, :] += ro * np.sum(error, 0)False,
W[1:, :] += ro * Z.t() @ error
W[0:, :] += ro * torch.sum(error, 0, keepdim=True)

# error traces for plotting
# errorTrace[reps,0] = np.sqrt(np.mean((error**2)))
# Ytest = np.tanh(Xtest @ V[1:, :] + V[0, :]) @ W[1:, :] + W[0, :]  #!! Forward pass in one line
# errorTrace[reps,1] = np.sqrt(np.mean((Ytest-Ttest)**2))
errorTrace[reps,0:1] = torch_rms(error)
Ytest = torch.tanh(Xtest @ V[1:, :] + V[0, :]) @ W[1:, :] + W[0, :]  #!! Forward pass in one line
errorTrace[reps,1:2] = torch_rms(Ytest - Ttest)

if graphics and (reps % 1000 == 0 or reps == nReps-1):
plt.clf()
plt.subplot(3,1,1)
# plt.plot(errorTrace[:reps+1,:])
if errorTrace.is_cuda:
plt.plot(errorTrace[:reps+1, :].cpu().numpy())
else:
plt.plot(errorTrace[:reps+1, :].numpy())
plt.ylim(0,0.7)
plt.xlabel('Epochs')
plt.ylabel('RMSE')
plt.legend(('Train','Test'),loc='best')

plt.subplot(3,1,2)
# plt.plot(Xtrain, Ttrain, 'o-', Xtest, Ttest, 'o-', Xtest, Ytest, 'o-')
if Xtrain.is_cuda:
plt.plot(Xtrain.cpu().numpy(), Ttrain.cpu().numpy(),'o-',
Xtest.cpu().numpy(), Ttest.cpu().numpy(), 'o-',
Xtest.cpu().numpy(), Ytest.cpu().numpy(), 'o-')
else:
plt.plot(Xtrain.numpy(), Ttrain.numpy(),'o-',
Xtest.numpy(), Ttest.numpy(), 'o-',
Xtest.numpy(), Ytest.numpy(), 'o-')
plt.xlim(-10,10)
plt.legend(('Training','Testing','Model'),loc='best')
plt.xlabel('$x$')
plt.ylabel('Actual and Predicted $f(x)$')

plt.subplot(3,1,3)
# plt.plot(Xtrain, Z)
if Xtrain.is_cuda:
plt.plot(Xtrain.cpu().numpy(), Z.cpu().numpy())
else:
plt.plot(Xtrain.numpy(), Z.numpy())
plt.ylim(-1.1,1.1)
plt.xlabel('$x$')
plt.ylabel('Hidden Unit Outputs ($z$)');

ipd.clear_output(wait=True)
ipd.display(fig)

endTime = time.time()

if graphics:
ipd.clear_output(wait=True)

return errorTrace, endTime - startTime

errorTrace, _ = nn_torch(Xtrain_torch, Ttrain_torch, Xtest_torch, Ttest_torch,
nHiddens=10, rhoh=0.1, rhoo=0.1, nReps=50000, graphics=True)

_, seconds = nn_torch(Xtrain_torch, Ttrain_torch, Xtest_torch, Ttest_torch,
nHiddens=10, rhoh=0.5, rhoo=0.1, nReps=50000, graphics=False)
seconds

21.48136854171753
スポンサーリンク

## Pytorch on GPU¶

torch.cuda.is_available()

True
dtype = torch.cuda.FloatTensor
dtype

torch.cuda.FloatTensor
dtype = torch.cuda.FloatTensor
n = 20

Xtrain_gpu = torch.linspace(0.,20.0,n).view((n,1)).type(dtype) - 10
Ttrain_gpu = 0.2 + 0.05 * (Xtrain_gpu+10) + 0.4 * torch.sin(Xtrain_gpu+10) + 0.2 * torch.randn((n,1)).type(dtype)

# Make some testing data
Xtest_gpu = Xtrain_gpu + 0.1 * torch.randn((n,1)).type(dtype)
Ttest_gpu = 0.2 + 0.05 * (Xtest_gpu+10) + 0.4 * torch.sin(Xtest_gpu+10) + 0.2 * torch.randn((n,1)).type(dtype)

Xtrain_gpu = torch.from_numpy(Xtrain).type(dtype)
Ttrain_gpu = torch.from_numpy(Ttrain).type(dtype)

Xtest_gpu = torch.from_numpy(Xtest).type(dtype)
Ttest_gpu = torch.from_numpy(Ttest).type(dtype)

Xtrain_gpu.type()

'torch.cuda.FloatTensor'
errorTrace, _ = nn_torch(Xtrain_gpu, Ttrain_gpu, Xtest_gpu, Ttest_gpu,
nHiddens=10, rhoh=0.5, rhoo=0.1, nReps=50000, graphics=True)

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-24-62023d513a8e> in <module>()
1 errorTrace, _ = nn_torch(Xtrain_gpu, Ttrain_gpu, Xtest_gpu, Ttest_gpu,
----> 2                          nHiddens=10, rhoh=0.5, rhoo=0.1, nReps=50000, graphics=True)

<ipython-input-23-719c08ecd04d> in nn_torch(Xtrain, Ttrain, Xtest, Ttest, nHiddens, rhoh, rhoo, nReps, graphics)
29
30         # Forward pass on training data.  No change going from numpy to pytorch!
---> 31         Z = np.tanh(Xtrain @ V[1:,:] + V[0,:])
32         Y = Z @ W[1:,:] + W[0,:]
33

~/.pyenv/versions/py365/lib/python3.6/site-packages/torch/tensor.py in __array__(self, dtype)
410     def __array__(self, dtype=None):
411         if dtype is None:
--> 412             return self.numpy()
413         else:
414             return self.numpy().astype(dtype, copy=False)

TypeError: can't convert CUDA tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.
<Figure size 1296x1296 with 0 Axes>
def nn_torch(Xtrain, Ttrain, Xtest, Ttest, nHiddens=10, rhoh=0.1, rhoo=0.1, nReps=50000, graphics=False):

dtype = Xtrain.type()  # if data is on GPU, allocate network variables on GPU, too

nSamples = Xtrain.shape[0]
nOutputs = Ttrain.shape[1]
# nSamples = Xtrain.size(0)
# nOutputs = Ttrain.size(1)

rh = rhoh / (nSamples*nOutputs)
ro = rhoo / (nSamples*nOutputs)

startTime = time.time()

# Initialize weights to uniformly distributed values between small normally-distributed between -0.1 and 0.1
# V = 0.1*2*(np.random.uniform(size=(1+1,nHiddens))-0.5)
# W = 0.1*2*(np.random.uniform(size=(1+nHiddens,nOutputs))-0.5)
V = 0.1*2*(torch.rand(1+1,nHiddens)-0.5).type(dtype)
W = 0.1*2*(torch.rand(1+nHiddens,nOutputs)-0.5).type(dtype)

# collect training and testing errors for plotting
# errorTrace = np.zeros((nReps,2))
errorTrace = torch.zeros((nReps,2)).type(dtype)

if graphics:
fig = plt.figure(figsize=(18,18))

for reps in range(nReps):

# Forward pass on training data.  No change going from numpy to pytorch!
Z = torch.tanh(Xtrain @ V[1:,:] + V[0,:])
Y = Z @ W[1:,:] + W[0,:]

# Error in output
error = Ttrain - Y

# Backward pass - the backpropagation and weight update steps. Only change is in transpose and sum
# vDelta = ( error @ W[1:,:].T) * (1-Z**2)
# V[1:, :] += rh * Xtrain.T @ vDelta
# V[0, :] += rh * np.sum(vDelta, 0)
vDelta = ( error @ W[1:,:].t() * (1-Z**2) )
V[1:, :] += rh * Xtrain.t() @ vDelta
V[0:, :] += rh * torch.sum(vDelta, 0, keepdim=True)  # to prevent conversion to scalar. Necessary?

# W[1:, :] += ro * Z.T @ error
# W[0, :] += ro * np.sum(error, 0)False,
W[1:, :] += ro * Z.t() @ error
W[0:, :] += ro * torch.sum(error, 0, keepdim=True)

# error traces for plotting
# errorTrace[reps,0] = np.sqrt(np.mean((error**2)))
# Ytest = np.tanh(Xtest @ V[1:, :] + V[0, :]) @ W[1:, :] + W[0, :]  #!! Forward pass in one line
# errorTrace[reps,1] = np.sqrt(np.mean((Ytest-Ttest)**2))
errorTrace[reps,0:1] = torch_rms(error)
Ytest = torch.tanh(Xtest @ V[1:, :] + V[0, :]) @ W[1:, :] + W[0, :]  #!! Forward pass in one line
errorTrace[reps,1:2] = torch_rms(Ytest - Ttest)

if graphics and (reps % 1000 == 0 or reps == nReps-1):
plt.clf()
plt.subplot(3,1,1)
# plt.plot(errorTrace[:reps+1,:])
if errorTrace.is_cuda:
plt.plot(errorTrace[:reps+1, :].cpu().numpy())
else:
plt.plot(errorTrace[:reps+1, :].numpy())
plt.ylim(0,0.7)
plt.xlabel('Epochs')
plt.ylabel('RMSE')
plt.legend(('Train','Test'),loc='best')

plt.subplot(3,1,2)
# plt.plot(Xtrain, Ttrain, 'o-', Xtest, Ttest, 'o-', Xtest, Ytest, 'o-')
if Xtrain.is_cuda:
plt.plot(Xtrain.cpu().numpy(), Ttrain.cpu().numpy(),'o-',
Xtest.cpu().numpy(), Ttest.cpu().numpy(), 'o-',
Xtest.cpu().numpy(), Ytest.cpu().numpy(), 'o-')
else:
plt.plot(Xtrain.numpy(), Ttrain.numpy(),'o-',
Xtest.numpy(), Ttest.numpy(), 'o-',
Xtest.numpy(), Ytest.numpy(), 'o-')
plt.xlim(-10,10)
plt.legend(('Training','Testing','Model'),loc='best')
plt.xlabel('$x$')
plt.ylabel('Actual and Predicted $f(x)$')

plt.subplot(3,1,3)
# plt.plot(Xtrain, Z)
if Xtrain.is_cuda:
plt.plot(Xtrain.cpu().numpy(), Z.cpu().numpy())
else:
plt.plot(Xtrain.numpy(), Z.numpy())
plt.ylim(-1.1,1.1)
plt.xlabel('$x$')
plt.ylabel('Hidden Unit Outputs ($z$)');

ipd.clear_output(wait=True)
ipd.display(fig)

endTime = time.time()

if graphics:
ipd.clear_output(wait=True)

return errorTrace, endTime - startTime

errorTrace, _ = nn_torch(Xtrain_gpu, Ttrain_gpu, Xtest_gpu, Ttest_gpu,
nHiddens=10, rhoh=0.5, rhoo=0.1, nReps=50000, graphics=True)

_, seconds = nn_torch(Xtrain_gpu, Ttrain_gpu, Xtest_gpu, Ttest_gpu,
nHiddens=5000, rhoh=0.001, rhoo=0.001, nReps=50000, graphics=False)
seconds

32.83034038543701
スポンサーリンク

## All three, with bigger nets¶

results = []
for nReps in [1000, 5000, 20000]:
for nH in [10, 100, 1000, 10000]:

errors_numpy, seconds_numpy = nn_numpy(Xtrain, Ttrain, Xtest, Ttest,
nHiddens=nH, rhoh=0.1, rhoo=0.0001, nReps=nReps)

errors_torch, seconds_torch = nn_torch(Xtrain_torch, Ttrain_torch, Xtest_torch, Ttest_torch,
nHiddens=nH, rhoh=0.1, rhoo=0.0001, nReps=nReps)

errors_gpu, seconds_gpu = nn_torch(Xtrain_gpu, Ttrain_gpu, Xtest_gpu, Ttest_gpu,
nHiddens=nH, rhoh=0.1, rhoo=0.0001, nReps=nReps)

print('nHidden {:5d} nReps {:7d}, numpy {:8.2f}, torch {:8.2f}, gpu {:8.2f}  errors {:.2f} {:.2f} {:.2f}'.
format(nH, nReps, seconds_numpy, seconds_torch, seconds_gpu,
errors_numpy[-1,1], errors_torch[-1,1], errors_gpu[-1,1]))
results.append([nH, nReps, seconds_numpy, seconds_torch, seconds_gpu])

nHidden    10 nReps    1000, numpy     0.10, torch     0.37, gpu     0.64  errors 0.44 0.39 0.46
nHidden   100 nReps    1000, numpy     0.14, torch     0.44, gpu     0.65  errors 0.34 0.34 0.34
nHidden  1000 nReps    1000, numpy     1.84, torch     0.78, gpu     0.65  errors 0.35 0.35 0.36
nHidden 10000 nReps    1000, numpy    13.09, torch     2.38, gpu     0.66  errors 0.94 0.90 0.68
nHidden    10 nReps    5000, numpy     0.38, torch     2.67, gpu     3.85  errors 0.35 0.35 0.35
nHidden   100 nReps    5000, numpy     0.70, torch     2.93, gpu     3.29  errors 0.34 0.34 0.34
nHidden  1000 nReps    5000, numpy     9.83, torch     3.41, gpu     3.28  errors 0.35 0.35 0.35
nHidden 10000 nReps    5000, numpy    71.22, torch    11.12, gpu     3.33  errors 0.49 0.45 0.51
nHidden    10 nReps   20000, numpy     1.56, torch     7.90, gpu    12.72  errors 0.34 0.34 0.34
nHidden   100 nReps   20000, numpy     2.75, torch     8.48, gpu    12.80  errors 0.34 0.34 0.34
nHidden  1000 nReps   20000, numpy    39.31, torch    13.29, gpu    13.13  errors 0.34 0.36 0.34
nHidden 10000 nReps   20000, numpy   315.43, torch    41.79, gpu    13.23  errors 0.42 0.37 0.50

results

[[10, 1000, 0.09626293182373047, 0.3693883419036865, 0.6446230411529541],
[100, 1000, 0.1426990032196045, 0.4350268840789795, 0.6464996337890625],
[1000, 1000, 1.844691276550293, 0.7803530693054199, 0.6508212089538574],
[10000, 1000, 13.086198806762695, 2.3752691745758057, 0.6644260883331299],
[10, 5000, 0.3769991397857666, 2.6731884479522705, 3.854335069656372],
[100, 5000, 0.6954240798950195, 2.9310402870178223, 3.288994073867798],
[1000, 5000, 9.830886840820312, 3.406135082244873, 3.282198190689087],
[10000, 5000, 71.21766066551208, 11.124212741851807, 3.3327584266662598],
[10, 20000, 1.5561366081237793, 7.895970106124878, 12.721657514572144],
[100, 20000, 2.747685670852661, 8.4824800491333, 12.801572799682617],
[1000, 20000, 39.311296701431274, 13.29249095916748, 13.128490209579468],
[10000, 20000, 315.4342806339264, 41.791707277297974, 13.232482671737671]]
results = np.array(results)

legends = []
nH = results[:4, 0:1]

if False:
rows = results[:,1] == 1000
plt.semilogx(nH,results[rows,2:])
legends = ['nReps 1000 ' + s for s in ['np', 'torch', 'gpu']]

rows = results[:,1] == 5000
plt.semilogx(nH,results[rows, 2:])
legends += ['nReps 5000 ' + s for s in ['np', 'torch', 'gpu']]

rows = results[:,1] == 20000
plt.semilogx(nH,results[rows, 2:], 'o-')
legends += ['nReps 20000 ' + s for s in ['np', 'torch', 'gpu']]

plt.ylabel('Seconds')
plt.xlabel('Number of Hidden Units')
plt.legend(legends);


pytorch(CPU)がGPUと大差があまりないことに驚かされたので、nRepsを100000にして再度コードを走らせてみた。

results = []
for nReps in [1000, 5000, 100000]:
for nH in [10, 100, 1000, 10000]:

errors_numpy, seconds_numpy = nn_numpy(Xtrain, Ttrain, Xtest, Ttest,
nHiddens=nH, rhoh=0.1, rhoo=0.0001, nReps=nReps)

errors_torch, seconds_torch = nn_torch(Xtrain_torch, Ttrain_torch, Xtest_torch, Ttest_torch,
nHiddens=nH, rhoh=0.1, rhoo=0.0001, nReps=nReps)

errors_gpu, seconds_gpu = nn_torch(Xtrain_gpu, Ttrain_gpu, Xtest_gpu, Ttest_gpu,
nHiddens=nH, rhoh=0.1, rhoo=0.0001, nReps=nReps)

print('nHidden {:5d} nReps {:7d}, numpy {:8.2f}, torch {:8.2f}, gpu {:8.2f}  errors {:.2f} {:.2f} {:.2f}'.
format(nH, nReps, seconds_numpy, seconds_torch, seconds_gpu,
errors_numpy[-1,1], errors_torch[-1,1], errors_gpu[-1,1]))
results.append([nH, nReps, seconds_numpy, seconds_torch, seconds_gpu])

nHidden    10 nReps    1000, numpy     0.08, torch     0.37, gpu     0.66  errors 0.43 0.44 0.39
nHidden   100 nReps    1000, numpy     0.14, torch     0.51, gpu     0.68  errors 0.34 0.34 0.34
nHidden  1000 nReps    1000, numpy     2.04, torch     0.73, gpu     0.64  errors 0.39 0.36 0.38
nHidden 10000 nReps    1000, numpy    12.30, torch     2.02, gpu     0.65  errors 0.80 0.53 0.56
nHidden    10 nReps    5000, numpy     0.40, torch     1.81, gpu     3.18  errors 0.35 0.35 0.35
nHidden   100 nReps    5000, numpy     0.69, torch     1.94, gpu     3.24  errors 0.34 0.34 0.34
nHidden  1000 nReps    5000, numpy     8.63, torch     3.21, gpu     3.29  errors 0.35 0.34 0.36
nHidden 10000 nReps    5000, numpy    82.07, torch    10.59, gpu     3.26  errors 0.70 0.40 0.44
nHidden    10 nReps  100000, numpy     7.50, torch    39.12, gpu    63.81  errors 0.33 0.34 0.33
nHidden   100 nReps  100000, numpy    13.75, torch    42.42, gpu    65.74  errors 0.34 0.34 0.34
nHidden  1000 nReps  100000, numpy   205.15, torch    66.14, gpu    64.05  errors 0.35 0.34 0.34
nHidden 10000 nReps  100000, numpy  1515.25, torch   203.42, gpu    65.33  errors 0.41 0.54 0.52

results = np.array(results)

legends = []
nH = results[:4, 0:1]

if False:
rows = results[:,1] == 1000
plt.semilogx(nH,results[rows,2:])
legends = ['nReps 1000 ' + s for s in ['np', 'torch', 'gpu']]

rows = results[:,1] == 5000
plt.semilogx(nH,results[rows, 2:])
legends += ['nReps 5000 ' + s for s in ['np', 'torch', 'gpu']]

rows = results[:,1] == 100000
plt.semilogx(nH,results[rows, 2:], 'o-')
legends += ['nReps 100000 ' + s for s in ['np', 'torch', 'gpu']]

plt.ylabel('Seconds')
plt.xlabel('Number of Hidden Units')
plt.legend(legends);


pytorch(cpu)はバージョンアップでかなり高速化したように見受けられる。

スポンサーリンク
スポンサーリンク

フォローする