今回は、PyTorch-1.0の実力を試してみる。前回このブログでpytorch-0.5がかなり速度アップしていることが分かったので、pytorch1.0はどのくらい速度アップしているのかを知りたくなったというわけだ。
スポンサーリンク
Simple Neural Network¶
import matplotlib.pyplot as plt
import numpy as np
import IPython.display as ipd # for display and clear_output
import time # for sleep and time()
%matplotlib inline
plt.rcParams.update({'font.size': 18, 'font.family': 'STIXGeneral', 'mathtext.fontset': 'stix'})
# Make some training data
n = 20
Xtrain = np.linspace(0., 20.0, n).reshape((n,1)) - 10
Ttrain = 0.2 + 0.05 * (Xtrain+10) + 0.4 * np.sin(Xtrain+10) + 0.2 * np.random.normal(size=(n,1))
# Make some testing data
Xtest = Xtrain + 0.1*np.random.normal(size=(n,1))
Ttest = 0.2 + 0.05 * (Xtest+10) + 0.4 * np.sin(Xtest+10) + 0.2 * np.random.normal(size=(n,1))
Xtrain = Xtrain.astype(np.float32)
Ttrain = Ttrain.astype(np.float32)
Xtest = Xtest.astype(np.float32)
Ttest = Ttest.astype(np.float32)
plt.rcParams['figure.figsize'] = 14, 7
plt.rcParams["font.size"] = "17"
plt.plot(Xtrain, Ttrain, label='Training Data')
plt.plot(Xtest, Ttest, label='Testing Data')
plt.legend();
スポンサーリンク
Numpy実装¶
def nn_numpy(Xtrain, Ttrain, Xtest, Ttest, nHiddens=10, rhoh=0.1, rhoo=0.1, nReps=50000, graphics=False):
nSamples = Xtrain.shape[0]
nOutputs = Ttrain.shape[1]
rh = rhoh / (nSamples*nOutputs)
ro = rhoo / (nSamples*nOutputs)
startTime = time.time()
# Initialize weights to uniformly distributed values between small normally-distributed between -0.1 and 0.1
V = 0.1*2*(np.random.uniform(size=(1+1,nHiddens))-0.5).astype(Xtrain.dtype)
W = 0.1*2*(np.random.uniform(size=(1+nHiddens,nOutputs))-0.5).astype(Xtrain.dtype)
# collect training and testing errors for plotting
errorTrace = np.zeros((nReps,2))
if graphics:
fig = plt.figure(figsize=(18,18))
for reps in range(nReps):
# Forward pass on training data
Z = np.tanh(Xtrain @ V[1:,:] + V[0,:])
Y = Z @ W[1:,:] + W[0,:]
# Error in output
error = Ttrain - Y
# Backward pass - the backpropagation and weight update steps
vDelta = ( error @ W[1:,:].T) * (1-Z**2)
V[1:, :] += rh * Xtrain.T @ vDelta
V[0, :] += rh * np.sum(vDelta, 0)
W[1:, :] += ro * Z.T @ error
W[0, :] += ro * np.sum(error, 0)
# error traces for plotting
errorTrace[reps,0] = np.sqrt(np.mean((error**2)))
Ytest = np.tanh(Xtest @ V[1:, :] + V[0, :]) @ W[1:, :] + W[0, :] #!! Forward pass in one line
errorTrace[reps,1] = np.sqrt(np.mean((Ytest-Ttest)**2))
if graphics and (reps % 1000 == 0 or reps == nReps-1):
plt.clf()
plt.subplot(3,1,1)
plt.plot(errorTrace[:reps+1,:])
plt.ylim(0,0.7)
plt.xlabel('Epochs')
plt.ylabel('RMSE')
plt.legend(('Train','Test'),loc='best')
plt.subplot(3,1,2)
plt.plot(Xtrain, Ttrain, 'o-',
Xtest, Ttest, 'o-',
Xtest, Ytest, 'o-')
plt.xlim(-10,10)
plt.legend(('Training','Testing','Model'),loc='best')
plt.xlabel('$x$')
plt.ylabel('Actual and Predicted $f(x)$')
plt.subplot(3,1,3)
plt.plot(Xtrain, Z)
plt.ylim(-1.1,1.1)
plt.xlabel('$x$')
plt.ylabel('Hidden Unit Outputs ($z$)');
ipd.clear_output(wait=True)
ipd.display(fig)
endTime = time.time()
if graphics:
ipd.clear_output(wait=True)
return errorTrace, endTime - startTime
スポンサーリンク
Pytorch¶
import torch
dtype = torch.FloatTensor
# dtype = torch.cuda.FloatTensor # Uncomment this to run on GPU
dtype
Xtrain_torch = torch.from_numpy(Xtrain)
Xtrain_torch.type()
# Make some training data
n = 20
# Xtrain = np.linspace(0.,20.0,n).reshape((n,1)) - 10
# Ttrain = 0.2 + 0.05 * (Xtrain+10) + 0.4 * np.sin(Xtrain+10) + 0.2 * np.random.normal(size=(n,1))
Xtrain_torch = torch.linspace(0.,20.0,n).view((n,1)).type(dtype) - 10
Ttrain_torch = 0.2 + 0.05 * (Xtrain_torch+10) + 0.4 * torch.sin(Xtrain_torch+10) + 0.2 * torch.randn((n,1)).type(dtype)
# Make some testing data
# Xtest = Xtrain + 0.1*np.random.normal(size=(n,1))
# Ttest = 0.2 + 0.05 * (Xtest+10) + 0.4 * np.sin(Xtest+10) + 0.2 * np.random.normal(size=(n,1))
Xtest_torch = Xtrain_torch + 0.1 * torch.randn((n,1)).type(dtype)
Ttest_torch = 0.2 + 0.05 * (Xtest_torch+10) + 0.4 * torch.sin(Xtest_torch+10) + 0.2 * torch.randn((n,1)).type(dtype)
Xtrain_torch = torch.from_numpy(Xtrain)
Ttrain_torch = torch.from_numpy(Ttrain)
Xtest_torch = torch.from_numpy(Xtest)
Ttest_torch = torch.from_numpy(Ttest)
def torch_rms(error):
m = torch.mean(error**2, 0, keepdim=True)
mall = torch.mean(m, 1, keepdim=True)
return torch.sqrt(mall)
dt = torch.FloatTensor
a = torch.ones(5,5).type(dt)
b = a+1.2
print(a)
print(b)
torch_rms(a-b)
dt = torch.cuda.FloatTensor
a = torch.ones(5,5).type(dt)
b = a+1.2
print(a)
print(b)
torch_rms(a-b)
def nn_torch(Xtrain, Ttrain, Xtest, Ttest, nHiddens=10, rhoh=0.1, rhoo=0.1, nReps=50000, graphics=False):
dtype = Xtrain.type() # if data is on GPU, allocate network variables on GPU, too
nSamples = Xtrain.shape[0]
nOutputs = Ttrain.shape[1]
# nSamples = Xtrain.size(0)
# nOutputs = Ttrain.size(1)
rh = rhoh / (nSamples*nOutputs)
ro = rhoo / (nSamples*nOutputs)
startTime = time.time()
# Initialize weights to uniformly distributed values between small normally-distributed between -0.1 and 0.1
# V = 0.1*2*(np.random.uniform(size=(1+1,nHiddens))-0.5)
# W = 0.1*2*(np.random.uniform(size=(1+nHiddens,nOutputs))-0.5)
V = 0.1*2*(torch.rand(1+1,nHiddens)-0.5).type(dtype)
W = 0.1*2*(torch.rand(1+nHiddens,nOutputs)-0.5).type(dtype)
# collect training and testing errors for plotting
# errorTrace = np.zeros((nReps,2))
errorTrace = torch.zeros((nReps,2)).type(dtype)
if graphics:
fig = plt.figure(figsize=(18,18))
for reps in range(nReps):
# Forward pass on training data. No change going from numpy to pytorch!
Z = np.tanh(Xtrain @ V[1:,:] + V[0,:])
Y = Z @ W[1:,:] + W[0,:]
# Error in output
error = Ttrain - Y
# Backward pass - the backpropagation and weight update steps. Only change is in transpose and sum
# vDelta = ( error @ W[1:,:].T) * (1-Z**2)
# V[1:, :] += rh * Xtrain.T @ vDelta
# V[0, :] += rh * np.sum(vDelta, 0)
vDelta = ( error @ W[1:,:].t() * (1-Z**2) )
V[1:, :] += rh * Xtrain.t() @ vDelta
V[0:, :] += rh * torch.sum(vDelta, 0, keepdim=True) # to prevent conversion to scalar. Necessary?
# W[1:, :] += ro * Z.T @ error
# W[0, :] += ro * np.sum(error, 0)False,
W[1:, :] += ro * Z.t() @ error
W[0:, :] += ro * torch.sum(error, 0, keepdim=True)
# error traces for plotting
# errorTrace[reps,0] = np.sqrt(np.mean((error**2)))
# Ytest = np.tanh(Xtest @ V[1:, :] + V[0, :]) @ W[1:, :] + W[0, :] #!! Forward pass in one line
# errorTrace[reps,1] = np.sqrt(np.mean((Ytest-Ttest)**2))
errorTrace[reps,0:1] = torch_rms(error)
Ytest = torch.tanh(Xtest @ V[1:, :] + V[0, :]) @ W[1:, :] + W[0, :] #!! Forward pass in one line
errorTrace[reps,1:2] = torch_rms(Ytest - Ttest)
if graphics and (reps % 1000 == 0 or reps == nReps-1):
plt.clf()
plt.subplot(3,1,1)
# plt.plot(errorTrace[:reps+1,:])
if errorTrace.is_cuda:
plt.plot(errorTrace[:reps+1, :].cpu().numpy())
else:
plt.plot(errorTrace[:reps+1, :].numpy())
plt.ylim(0,0.7)
plt.xlabel('Epochs')
plt.ylabel('RMSE')
plt.legend(('Train','Test'),loc='best')
plt.subplot(3,1,2)
# plt.plot(Xtrain, Ttrain, 'o-', Xtest, Ttest, 'o-', Xtest, Ytest, 'o-')
if Xtrain.is_cuda:
plt.plot(Xtrain.cpu().numpy(), Ttrain.cpu().numpy(),'o-',
Xtest.cpu().numpy(), Ttest.cpu().numpy(), 'o-',
Xtest.cpu().numpy(), Ytest.cpu().numpy(), 'o-')
else:
plt.plot(Xtrain.numpy(), Ttrain.numpy(),'o-',
Xtest.numpy(), Ttest.numpy(), 'o-',
Xtest.numpy(), Ytest.numpy(), 'o-')
plt.xlim(-10,10)
plt.legend(('Training','Testing','Model'),loc='best')
plt.xlabel('$x$')
plt.ylabel('Actual and Predicted $f(x)$')
plt.subplot(3,1,3)
# plt.plot(Xtrain, Z)
if Xtrain.is_cuda:
plt.plot(Xtrain.cpu().numpy(), Z.cpu().numpy())
else:
plt.plot(Xtrain.numpy(), Z.numpy())
plt.ylim(-1.1,1.1)
plt.xlabel('$x$')
plt.ylabel('Hidden Unit Outputs ($z$)');
ipd.clear_output(wait=True)
ipd.display(fig)
endTime = time.time()
if graphics:
ipd.clear_output(wait=True)
return errorTrace, endTime - startTime
スポンサーリンク
Pytorch on GPU¶
torch.cuda.is_available()
dtype = torch.cuda.FloatTensor
dtype
dtype = torch.cuda.FloatTensor
n = 20
Xtrain_gpu = torch.linspace(0.,20.0,n).view((n,1)).type(dtype) - 10
Ttrain_gpu = 0.2 + 0.05 * (Xtrain_gpu+10) + 0.4 * torch.sin(Xtrain_gpu+10) + 0.2 * torch.randn((n,1)).type(dtype)
# Make some testing data
Xtest_gpu = Xtrain_gpu + 0.1 * torch.randn((n,1)).type(dtype)
Ttest_gpu = 0.2 + 0.05 * (Xtest_gpu+10) + 0.4 * torch.sin(Xtest_gpu+10) + 0.2 * torch.randn((n,1)).type(dtype)
Xtrain_gpu = torch.from_numpy(Xtrain).type(dtype)
Ttrain_gpu = torch.from_numpy(Ttrain).type(dtype)
Xtest_gpu = torch.from_numpy(Xtest).type(dtype)
Ttest_gpu = torch.from_numpy(Ttest).type(dtype)
Xtrain_gpu.type()
def nn_torch(Xtrain, Ttrain, Xtest, Ttest, nHiddens=10, rhoh=0.1, rhoo=0.1, nReps=50000, graphics=False):
dtype = Xtrain.type() # if data is on GPU, allocate network variables on GPU, too
nSamples = Xtrain.shape[0]
nOutputs = Ttrain.shape[1]
# nSamples = Xtrain.size(0)
# nOutputs = Ttrain.size(1)
rh = rhoh / (nSamples*nOutputs)
ro = rhoo / (nSamples*nOutputs)
startTime = time.time()
# Initialize weights to uniformly distributed values between small normally-distributed between -0.1 and 0.1
# V = 0.1*2*(np.random.uniform(size=(1+1,nHiddens))-0.5)
# W = 0.1*2*(np.random.uniform(size=(1+nHiddens,nOutputs))-0.5)
V = 0.1*2*(torch.rand(1+1,nHiddens)-0.5).type(dtype)
W = 0.1*2*(torch.rand(1+nHiddens,nOutputs)-0.5).type(dtype)
# collect training and testing errors for plotting
# errorTrace = np.zeros((nReps,2))
errorTrace = torch.zeros((nReps,2)).type(dtype)
if graphics:
fig = plt.figure(figsize=(18,18))
for reps in range(nReps):
# Forward pass on training data. No change going from numpy to pytorch!
Z = torch.tanh(Xtrain @ V[1:,:] + V[0,:])
Y = Z @ W[1:,:] + W[0,:]
# Error in output
error = Ttrain - Y
# Backward pass - the backpropagation and weight update steps. Only change is in transpose and sum
# vDelta = ( error @ W[1:,:].T) * (1-Z**2)
# V[1:, :] += rh * Xtrain.T @ vDelta
# V[0, :] += rh * np.sum(vDelta, 0)
vDelta = ( error @ W[1:,:].t() * (1-Z**2) )
V[1:, :] += rh * Xtrain.t() @ vDelta
V[0:, :] += rh * torch.sum(vDelta, 0, keepdim=True) # to prevent conversion to scalar. Necessary?
# W[1:, :] += ro * Z.T @ error
# W[0, :] += ro * np.sum(error, 0)False,
W[1:, :] += ro * Z.t() @ error
W[0:, :] += ro * torch.sum(error, 0, keepdim=True)
# error traces for plotting
# errorTrace[reps,0] = np.sqrt(np.mean((error**2)))
# Ytest = np.tanh(Xtest @ V[1:, :] + V[0, :]) @ W[1:, :] + W[0, :] #!! Forward pass in one line
# errorTrace[reps,1] = np.sqrt(np.mean((Ytest-Ttest)**2))
errorTrace[reps,0:1] = torch_rms(error)
Ytest = torch.tanh(Xtest @ V[1:, :] + V[0, :]) @ W[1:, :] + W[0, :] #!! Forward pass in one line
errorTrace[reps,1:2] = torch_rms(Ytest - Ttest)
if graphics and (reps % 1000 == 0 or reps == nReps-1):
plt.clf()
plt.subplot(3,1,1)
# plt.plot(errorTrace[:reps+1,:])
if errorTrace.is_cuda:
plt.plot(errorTrace[:reps+1, :].cpu().numpy())
else:
plt.plot(errorTrace[:reps+1, :].numpy())
plt.ylim(0,0.7)
plt.xlabel('Epochs')
plt.ylabel('RMSE')
plt.legend(('Train','Test'),loc='best')
plt.subplot(3,1,2)
# plt.plot(Xtrain, Ttrain, 'o-', Xtest, Ttest, 'o-', Xtest, Ytest, 'o-')
if Xtrain.is_cuda:
plt.plot(Xtrain.cpu().numpy(), Ttrain.cpu().numpy(),'o-',
Xtest.cpu().numpy(), Ttest.cpu().numpy(), 'o-',
Xtest.cpu().numpy(), Ytest.cpu().numpy(), 'o-')
else:
plt.plot(Xtrain.numpy(), Ttrain.numpy(),'o-',
Xtest.numpy(), Ttest.numpy(), 'o-',
Xtest.numpy(), Ytest.numpy(), 'o-')
plt.xlim(-10,10)
plt.legend(('Training','Testing','Model'),loc='best')
plt.xlabel('$x$')
plt.ylabel('Actual and Predicted $f(x)$')
plt.subplot(3,1,3)
# plt.plot(Xtrain, Z)
if Xtrain.is_cuda:
plt.plot(Xtrain.cpu().numpy(), Z.cpu().numpy())
else:
plt.plot(Xtrain.numpy(), Z.numpy())
plt.ylim(-1.1,1.1)
plt.xlabel('$x$')
plt.ylabel('Hidden Unit Outputs ($z$)');
ipd.clear_output(wait=True)
ipd.display(fig)
endTime = time.time()
if graphics:
ipd.clear_output(wait=True)
return errorTrace, endTime - startTime
スポンサーリンク
All three, with bigger nets¶
results = []
for nReps in [1000, 5000, 100000]:
for nH in [10, 100, 1000, 10000]:
errors_numpy, seconds_numpy = nn_numpy(Xtrain, Ttrain, Xtest, Ttest,
nHiddens=nH, rhoh=0.1, rhoo=0.0001, nReps=nReps)
errors_torch, seconds_torch = nn_torch(Xtrain_torch, Ttrain_torch, Xtest_torch, Ttest_torch,
nHiddens=nH, rhoh=0.1, rhoo=0.0001, nReps=nReps)
errors_gpu, seconds_gpu = nn_torch(Xtrain_gpu, Ttrain_gpu, Xtest_gpu, Ttest_gpu,
nHiddens=nH, rhoh=0.1, rhoo=0.0001, nReps=nReps)
print('nHidden {:5d} nReps {:7d}, numpy {:8.2f}, torch {:8.2f}, gpu {:8.2f} errors {:.2f} {:.2f} {:.2f}'.
format(nH, nReps, seconds_numpy, seconds_torch, seconds_gpu,
errors_numpy[-1,1], errors_torch[-1,1], errors_gpu[-1,1]))
results.append([nH, nReps, seconds_numpy, seconds_torch, seconds_gpu])
results = np.array(results)
legends = []
nH = results[:4, 0:1]
if False:
rows = results[:,1] == 1000
plt.semilogx(nH,results[rows,2:])
legends = ['nReps 1000 ' + s for s in ['np', 'torch', 'gpu']]
rows = results[:,1] == 5000
plt.semilogx(nH,results[rows, 2:])
legends += ['nReps 5000 ' + s for s in ['np', 'torch', 'gpu']]
rows = results[:,1] == 100000
plt.semilogx(nH,results[rows, 2:], 'o-')
legends += ['nReps 100000 ' + s for s in ['np', 'torch', 'gpu']]
plt.ylabel('Seconds')
plt.xlabel('Number of Hidden Units')
plt.legend(legends);
スポンサーリンク
スポンサーリンク