python, numpy, numba, cython, pyopenclのマンデルブロ集合描画時間の計測をしてみたところ、非常に意外な結果になった。
スポンサーリンク
マンデルブロ描画(python編)¶
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import colors
%matplotlib inline
def mandelbrot_image(xmin,xmax,ymin,ymax,width=3,height=3,maxiter=80,cmap='hot'):
dpi = 72
img_width = dpi * width
img_height = dpi * height
x,y,z = mandelbrot_set(xmin,xmax,ymin,ymax,img_width,img_height,maxiter)
fig, ax = plt.subplots(figsize=(width, height),dpi=72)
ticks = np.arange(0,img_width,3*dpi)
x_ticks = xmin + (xmax-xmin)*ticks/img_width
plt.xticks(ticks, x_ticks)
y_ticks = ymin + (ymax-ymin)*ticks/img_width
plt.yticks(ticks, y_ticks)
norm = colors.PowerNorm(0.3)
ax.imshow(z.T,cmap=cmap,origin='lower',norm=norm)
def mandelbrot(z,maxiter):
c = z
for n in range(maxiter):
if abs(z) > 2:
return n
z = z*z + c
return maxiter
def mandelbrot_set(xmin,xmax,ymin,ymax,width,height,maxiter):
r1 = np.linspace(xmin, xmax, width)
r2 = np.linspace(ymin, ymax, height)
return (r1,r2,[mandelbrot(complex(r, i),maxiter) for r in r1 for i in r2])
%timeit -n1 -r1 mandelbrot_set(-2.0,0.5,-1.25,1.25,1000,1000,80)
%timeit -n1 -r1 mandelbrot_set(-0.74877,-0.74872,0.06505,0.06510,1000,1000,2048)
スポンサーリンク
マンデルブロ集合描画(Numpy編)¶
def mandelbrot(c,maxiter):
z = c
for n in range(maxiter):
if abs(z) > 2:
return n
z = z*z + c
return 0
def mandelbrot_set(xmin,xmax,ymin,ymax,width,height,maxiter):
r1 = np.linspace(xmin, xmax, width)
r2 = np.linspace(ymin, ymax, height)
n3 = np.empty((width,height))
for i in range(width):
for j in range(height):
n3[i,j] = mandelbrot(r1[i] + 1j*r2[j],maxiter)
return (r1,r2,n3)
%timeit -n1 -r1 mandelbrot_set(-2.0,0.5,-1.25,1.25,1000,1000,80)
%timeit -n1 -r1 mandelbrot_set(-0.74877,-0.74872,0.06505,0.06510,1000,1000,2048)
mandelbrot_image(-0.74877,-0.74872,0.06505,0.06510,maxiter=2048,cmap='gnuplot2')
スポンサーリンク
マンデルブロ集合描画(numba編)¶
from numba import jit
@jit
def mandelbrot(c,maxiter):
z = c
for n in range(maxiter):
if abs(z) > 2:
return n
z = z*z + c
return 0
@jit
def mandelbrot_set(xmin,xmax,ymin,ymax,width,height,maxiter):
r1 = np.linspace(xmin, xmax, width)
r2 = np.linspace(ymin, ymax, height)
n3 = np.empty((width,height))
for i in range(width):
for j in range(height):
n3[i,j] = mandelbrot(r1[i] + 1j*r2[j],maxiter)
return (r1,r2,n3)
%timeit -n1 -r1 mandelbrot_set(-2.0,0.5,-1.25,1.25,1000,1000,80)
%timeit -n1 -r1 mandelbrot_set(-0.74877,-0.74872,0.06505,0.06510,1000,1000,2048)
mandelbrot_image(-0.74877,-0.74872,0.06505,0.06510,maxiter=2048,cmap='gnuplot2')
マンデルブロー描画(numba.jit改良版)¶
@jit
def mandelbrot(c,maxiter):
z = c
for n in range(maxiter):
if z.real * z.real + z.imag * z.imag > 4.0:
return n
z = z*z + c
return 0
@jit
def mandelbrot_set(xmin,xmax,ymin,ymax,width,height,maxiter):
r1 = np.linspace(xmin, xmax, width)
r2 = np.linspace(ymin, ymax, height)
n3 = np.empty((width,height))
for i in range(width):
for j in range(height):
n3[i,j] = mandelbrot(r1[i] + 1j*r2[j],maxiter)
return (r1,r2,n3)
%timeit -n1 -r1 mandelbrot_set(-2.0,0.5,-1.25,1.25,1000,1000,80)
%timeit -n1 -r1 mandelbrot_set(-0.74877,-0.74872,0.06505,0.06510,1000,1000,2048)
mandelbrot_image(-2.0,0.5,-1.25,1.25,cmap='gnuplot2')
complexを2つのfloatに分解
@jit
def mandelbrot(creal,cimag,maxiter):
real = creal
imag = cimag
for n in range(maxiter):
real2 = real*real
imag2 = imag*imag
if real2 + imag2 > 4.0:
return n
imag = 2* real*imag + cimag
real = real2 - imag2 + creal
return 0
@jit
def mandelbrot_set4(xmin,xmax,ymin,ymax,width,height,maxiter):
r1 = np.linspace(xmin, xmax, width)
r2 = np.linspace(ymin, ymax, height)
n3 = np.empty((width,height))
for i in range(width):
for j in range(height):
n3[i,j] = mandelbrot(r1[i],r2[j],maxiter)
return (r1,r2,n3)
%timeit -n1 -r1 mandelbrot_set(-2.0,0.5,-1.25,1.25,1000,1000,80)
%timeit -n1 -r1 mandelbrot_set(-0.74877,-0.74872,0.06505,0.06510,1000,1000,2048)
mandelbrot_image(-0.74877,-0.74872,0.06505,0.06510,maxiter=2048,cmap='gnuplot2')
スポンサーリンク
マンデルブロー集合描画(cython編)¶
%load_ext cython
%%cython
import cython
import numpy as np
cdef int mandelbrot(double creal, double cimag, int maxiter):
cdef:
double real2, imag2
double real = creal, imag = cimag
int n
for n in range(maxiter):
real2 = real*real
imag2 = imag*imag
if real2 + imag2 > 4.0:
return n
imag = 2* real*imag + cimag
real = real2 - imag2 + creal;
return 0
@cython.boundscheck(False)
@cython.wraparound(False)
cpdef mandelbrot_set(double xmin, double xmax, double ymin, double ymax, long width, long height, int maxiter):
cdef:
double[:] r1 = np.linspace(xmin, xmax, width)
double[:] r2 = np.linspace(ymin, ymax, height)
long[:,:] n3 = np.empty((width,height), np.int)
int i,j
for i in range(width):
for j in range(height):
n3[i,j] = mandelbrot(r1[i], r2[j], maxiter)
return (r1,r2,n3)
%timeit mandelbrot_set(-2.0,0.5,-1.25,1.25,1000,1000,80)
%timeit mandelbrot_set(-0.74877,-0.74872,0.06505,0.06510,1000,1000,2048)
mandelbrot_image(-0.74877,-0.74872,0.06505,0.06510,maxiter=2048,cmap='gnuplot2')
今回cythonはnumba.jitよりチョロっと速い結果に終わった。
スポンサーリンク
マンデルブロ描画(numba guvectorize編)¶
import numpy as np
from numba import jit, vectorize, guvectorize, float64, complex64, int32, float32
@jit(int32(complex64, int32))
def mandelbrot(c,maxiter):
nreal = 0
real = 0
imag = 0
for n in range(maxiter):
nreal = real*real - imag*imag + c.real
imag = 2* real*imag + c.imag
real = nreal;
if real * real + imag * imag > 4.0:
return n
return 0
@guvectorize([(complex64[:], int32[:], int32[:])], '(n),()->(n)',target='parallel')
def mandelbrot_numpy(c, maxit, output):
maxiter = maxit[0]
for i in range(c.shape[0]):
output[i] = mandelbrot(c[i],maxiter)
def mandelbrot_set2(xmin,xmax,ymin,ymax,width,height,maxiter):
r1 = np.linspace(xmin, xmax, width, dtype=np.float32)
r2 = np.linspace(ymin, ymax, height, dtype=np.float32)
c = r1 + r2[:,None]*1j
n3 = mandelbrot_numpy(c,maxiter)
return (r1,r2,n3.T)
%timeit mandelbrot_set2(-2.0,0.5,-1.25,1.25,1000,1000,80)
%timeit mandelbrot_set2(-0.74877,-0.74872,0.06505,0.06510,1000,1000,2048)
mandelbrot_set = mandelbrot_set2
mandelbrot_image(-2.0,0.5,-1.25,1.25,cmap='gnuplot2')
2.02/.336
numba guvectorizeはcythonの6倍も高速だった。
マンデルブロ描画(numba guvectorize cuda編)¶
import numpy as np
from numba import jit, vectorize, guvectorize, float64, complex64, int32, float32
@jit(int32(complex64, int32))
def mandelbrot(c,maxiter):
creal = c.real
cimag = c.imag
real = creal
imag = cimag
for n in range(maxiter):
real2 = real*real
imag2 = imag*imag
if real2 + imag2 > 4.0:
return n
imag = 2* real*imag + cimag
real = real2 - imag2 + creal
return 0
@guvectorize([(complex64[:], int32[:], int32[:])], '(n),(n)->(n)', target='cuda')
def mandelbrot_numpy(c, maxit, output):
maxiter = maxit[0]
for i in range(c.shape[0]):
creal = c[i].real
cimag = c[i].imag
real = creal
imag = cimag
output[i] = 0
for n in range(maxiter):
real2 = real*real
imag2 = imag*imag
if real2 + imag2 > 4.0:
output[i] = n
break
imag = 2* real*imag + cimag
real = real2 - imag2 + creal
def mandelbrot_set2(xmin,xmax,ymin,ymax,width,height,maxiter):
r1 = np.linspace(xmin, xmax, width, dtype=np.float32)
r2 = np.linspace(ymin, ymax, height, dtype=np.float32)
c = r1 + r2[:,None]*1j
n3 = np.empty(c.shape, int)
maxit = np.ones(c.shape, int) * maxiter
n3 = mandelbrot_numpy(c,maxit)
return (r1,r2,n3.T)
%timeit mandelbrot_set2(-2.0,0.5,-1.25,1.25,1000,1000,80)
%timeit mandelbrot_set2(-0.74877,-0.74872,0.06505,0.06510,1000,1000,2048)
mandelbrot_set = mandelbrot_set2
mandelbrot_image(-2.0,0.5,-1.25,1.25,cmap='gnuplot2')
意外な事に、cpu版よりgpu版が遅い結果となった。
スポンサーリンク
マンデルブロ集合描画(PyOpenCL編)¶
!conda install -y -c conda-forge pyopencl
from __future__ import absolute_import
from __future__ import print_function
import pyopencl as cl
%reload_ext pyopencl.ipython_ext
ctx = cl.create_some_context(interactive=True)
devices = ctx.get_info(cl.context_info.DEVICES)
print(devices)
def mandelbrot_gpu(q, maxiter):
global ctx
queue = cl.CommandQueue(ctx)
output = np.empty(q.shape, dtype=np.uint16)
prg = cl.Program(ctx, """
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
__kernel void mandelbrot(__global float2 *q,
__global ushort *output, ushort const maxiter)
{
int gid = get_global_id(0);
float nreal, real = 0;
float imag = 0;
output[gid] = 0;
for(int curiter = 0; curiter < maxiter; curiter++) {
nreal = real*real - imag*imag + q[gid].x;
imag = 2* real*imag + q[gid].y;
real = nreal;
if (real*real + imag*imag > 4.0f){
output[gid] = curiter;
break;
}
}
}
""").build()
mf = cl.mem_flags
q_opencl = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=q)
output_opencl = cl.Buffer(ctx, mf.WRITE_ONLY, output.nbytes)
prg.mandelbrot(queue, output.shape, None, q_opencl,
output_opencl, np.uint16(maxiter))
cl.enqueue_copy(queue, output, output_opencl).wait()
return output
def mandelbrot_set3(xmin,xmax,ymin,ymax,width,height,maxiter):
r1 = np.linspace(xmin, xmax, width, dtype=np.float32)
r2 = np.linspace(ymin, ymax, height, dtype=np.float32)
c = r1 + r2[:,None]*1j
c = np.ravel(c)
n3 = mandelbrot_gpu(c,maxiter)
n3 = n3.reshape((width,height))
return (r1,r2,n3.T)
mandelbrot_set = mandelbrot_set3
mandelbrot_image(-2.0,0.5,-1.25,1.25,cmap='gnuplot2')
mandelbrot_image(-0.74877,-0.74872,0.06505,0.06510,maxiter=2048,cmap='gnuplot2')
%timeit mandelbrot_set3(-2.0,0.5,-1.25,1.25,1000,1000,80)
%timeit mandelbrot_set3(-0.74877,-0.74872,0.06505,0.06510,1000,1000,2048)
.336/.0581
pyopenclは、cythonの6倍高速だったnumba guvectorizeの5.8倍の速度だったので、つまり、cythonの約35倍という驚異的な結果になった。python版と比較した場合、1480倍というとんでもない数字になった。
スポンサーリンク
マンデルブロ集合描画(pycuda編)¶
pycuda版のコードにbugがあるのかどうかは分からないが、マンデルブロ集合を描画することができなかったのでボツにした。bug fixを何度か試みたが、私の手に負えるわけもなく、敢え無く断念した。ただ引用元にも書いてあるように、pyopencl(cpu)版が最速なので非常に興味深い結果になった。
スポンサーリンク
スポンサーリンク