np.dotとtf.matmul、さらに、skcuda.linalg.dotとtf.matmulの速度比較をしてみた。一番遅いのがnumpyだというのは分かるが、skcudaとtensorflowはどっちが速いのか興味があった。
スポンサーリンク
NumPy vs. TensorFlow¶
import numpy as np
import tensorflow as tf
import time
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.667)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
dim = 10000
rnd = np.random.RandomState(0)
a = rnd.rand(dim, dim).astype(np.float32)
b = rnd.rand(dim, dim).astype(np.float32)
start = time.time()
rescpu = np.dot(a, b)
c = time.time() - start
print ('CPU:', c)
X = tf.placeholder(tf.float32, shape=(dim, dim), name=None)
Y = tf.placeholder(tf.float32, shape=(dim, dim), name=None)
Z = tf.matmul(X, Y)
sess = tf.Session()
start = time.time()
resgpu = sess.run(Z, feed_dict={X: a, Y: b})
d = time.time() - start
print ('GPU:', d)
print('Speed difference: {:0.1f}x'.format(c / d))
print (np.allclose(rescpu, resgpu))
print (np.allclose(resgpu, rescpu))
スポンサーリンク
scikit-cuda vs. TensorFlow¶
import pycuda.gpuarray as gpuarray
import pycuda.autoinit
import numpy as np, time
import skcuda.linalg as culinalg
import skcuda
culinalg.init()
import tensorflow as tf
import time
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.667)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
dim = 10000
rnd = np.random.RandomState(0)
a = rnd.rand(dim, dim).astype(np.float32)
b = rnd.rand(dim, dim).astype(np.float32)
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
start = time.time()
ressk = culinalg.dot(a_gpu, b_gpu)
c = time.time() - start
print ('sk:', c)
ressk = ressk.get()
X = tf.placeholder(tf.float32, shape=(dim, dim), name=None)
Y = tf.placeholder(tf.float32, shape=(dim, dim), name=None)
Z = tf.matmul(X, Y)
sess = tf.Session()
start = time.time()
restf = sess.run(Z, feed_dict={X: a, Y: b})
d = time.time() - start
print ('tf:', d)
print ('Speed difference: {:0.1f}x'.format(d / c))
print (np.allclose(ressk, restf))
print (np.allclose(restf, ressk))
skcudaの方がtensorflowよりも25.5倍処理速度が高速だった。これは意外だった。
スポンサーリンク
scikit-cuda vs. NumPy¶
import pycuda.gpuarray as gpuarray
import pycuda.autoinit
import numpy as np, time
import skcuda.linalg as culinalg
import skcuda
import time
culinalg.init()
dim = 11900
rnd = np.random.RandomState(0)
a = rnd.rand(dim, dim).astype(np.float32)
b = rnd.rand(dim, dim).astype(np.float32)
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
start = time.time()
rescpu = np.dot(a, b)
c = time.time() - start
print ('CPU:', c)
start = time.time()
resgpu = culinalg.dot(a_gpu, b_gpu)
d = time.time() - start
print ('GPU:', d)
resgpu = resgpu.get()
print('Speed difference: {:0.1f}x'.format(c / d))
print (np.allclose(rescpu, resgpu))
print (np.allclose(resgpu, rescpu))
skcudaとnumpyを直接比較すると、skcudaの方が588倍処理が高速だった。
スポンサーリンク
スポンサーリンク