このサイトのチュートリアルがかなり良さげなので、このサイトを参考にさせてもらってROOTを使ったC++プログラミングを学習してみようと思う。
スポンサーリンク
ROOT C++ その1¶
%matplotlib inline
import numpy,matplotlib.pyplot,time
import ROOT
tmpname = "run_cpp1"
ROOT.gInterpreter.Declare("""
#include<complex>
void %s(int height, int width, int maxiterations, size_t c_ptr, size_t fractal_ptr) {
double* c = (double*)(c_ptr);
int* fractal = (int*)(fractal_ptr);
for (int h = 0; h < height; h++) {
for (int w = 0; w < width; w++) {
double creal = c[2 * (h + height*w)];
double cimag = c[2 * (h + height*w) + 1];
std::complex<double> ci = std::complex<double>(creal, cimag);
std::complex<double> z = ci;
for (int i = 0; i < maxiterations; i++) {
z = z*z + ci;
if (std::abs(z) > 2) {
fractal[h + height*w] = i;
break;
} } } } }""" % tmpname)
def run_cpp(height, width, maxiterations=20):
y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]
c = x + y*1j
fractal = numpy.full(c.shape, maxiterations, dtype=numpy.int32)
getattr(ROOT, tmpname)(height, width, maxiterations, c.ctypes.data, fractal.ctypes.data)
return fractal
starttime = time.time()
fractal = run_cpp(3200, 4800)
print("{0} ns per pixel".format(1e9 * (time.time() - starttime) / (3200 * 4800)))
fig, ax = matplotlib.pyplot.subplots(figsize=(18, 12))
ax.imshow(fractal)
スポンサーリンク
ROOT C++ その2¶
void run_cpp2(int height, int width, int maxiterations, size_t c_ptr, size_t fractal1_ptr) {
double* c = (double*)(c_ptr);
int* fractal1 = (int*)(fractal1_ptr);
for (int h = 0; h < height; h++) {
for (int w = 0; w < width; w++) {
double creal = c[2 * (h + height*w)];
double cimag = c[2 * (h + height*w) + 1];
std::complex<double> ci = std::complex<double>(creal, cimag);
std::complex<double> z = ci;
for (int i = 0; i < maxiterations; i++) {
z = z*z + ci;
if (std::abs(z) > 2) {
fractal1[h + height*w] = i;
break;
}
}
}
}
}
#%matplotlib inline
#import ROOT,numpy,matplotlib.pyplot,time
ROOT.gROOT.LoadMacro("run.cpp2")
starttime = time.time()
height, width, maxiterations = 3200, 4800, 20
y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]
c = x + y*1j
fractal1 = numpy.full(c.shape, maxiterations, dtype=numpy.int32)
ROOT.run_cpp2(height, width, maxiterations, c.ctypes.data, fractal1.ctypes.data)
print("{0} ns per pixel".format(1e9 * (time.time() - starttime) / (3200 * 4800)))
fig, ax = matplotlib.pyplot.subplots(figsize=(18, 12))
ax.imshow(fractal1)
スポンサーリンク
pybind11¶
上の2つの方法だと恐ろしく処理が遅いので、下の方法を使用した方がよいようである。先ず、%%writefile tmp.cppで下のコードを保存する。
#include<complex>
#include<pybind11/pybind11.h>
void run_cpp(int height, int width, int maxiterations, size_t c_ptr, size_t fractal_ptr) {
double* c = (double*)(c_ptr);
int* fractal = (int*)(fractal_ptr);
for (int h = 0; h < height; h++) {
for (int w = 0; w < width; w++) {
double creal = c[2 * (h + height*w)];
double cimag = c[2 * (h + height*w) + 1];
std::complex<double> ci = std::complex<double>(creal, cimag);
std::complex<double> z = ci;
for (int i = 0; i < maxiterations; i++) {
z = z*z + ci;
if (std::abs(z) > 2) {
fractal[h + height*w] = i;
break;
}
}
}
}
}
PYBIND11_MODULE(tmp, m) {
m.def("run_cpp", &run_cpp, "the inner loop");
}
!c++ -O3 -ffast-math -Wall -shared -std=c++11 -fPIC `python -m pybind11 --includes` tmp.cpp -o tmp`python3-config --extension-suffix`
import tmp
starttime = time.time()
height, width, maxiterations = 3200, 4800, 20
y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]
c = x + y*1j
fractal = numpy.full(c.shape, maxiterations, dtype=numpy.int32)
tmp.run_cpp(height, width, maxiterations, c.ctypes.data, fractal.ctypes.data)
print("{0} ns per pixel".format(1e9 * (time.time() - starttime) / (3200 * 4800)))
fig, ax = matplotlib.pyplot.subplots(figsize=(18, 12))
ax.imshow(fractal)
こっちの方が約8倍処理が高速だった。
スポンサーリンク
numba.cuda¶
import numba.cuda
import math
def run_numba(height, width, maxiterations=20):
fractal = numpy.empty((height, width), dtype=numpy.int32)
griddim = (math.ceil(height / 32), math.ceil(width / 32))
blockdim = (32, 32)
inner_loop[griddim, blockdim](height, width, maxiterations, fractal)
return fractal
@numba.cuda.jit
def inner_loop(height, width, maxiterations, fractal):
x, y = numba.cuda.grid(2) # 2-dimensional CUDA grid
z = c = -1.5 + y*1.0/(height + 1) + -1j + x*1j*1.5/(width + 1)
fractal[x, y] = maxiterations
for i in range(maxiterations):
z = z**2 + c
if abs(z) > 2:
fractal[x, y] = i
break
import time, numpy
starttime = time.time()
fractal = run_numba(6400, 9600)
print("{0} ns per pixel".format(1e9 * (time.time() - starttime) / (6400 * 9600)))
こっちの方が約9倍高速だった。
%matplotlib inline
import matplotlib.pyplot
fig, ax = matplotlib.pyplot.subplots(figsize=(18, 12))
ax.imshow(fractal)
import cupy as cp
def run_numba(height, width, maxiterations=20):
fractal = cp.empty((height, width), dtype=cp.int32)
griddim = (math.ceil(height / 32), math.ceil(width / 32))
blockdim = (32, 32)
inner_loop[griddim, blockdim](height, width, maxiterations, fractal)
return fractal
@numba.cuda.jit
def inner_loop(height, width, maxiterations, fractal):
x, y = numba.cuda.grid(2) # 2-dimensional CUDA grid
z = c = -1.5 + y*1.0/(height + 1) + -1j + x*1j*1.5/(width + 1)
fractal[x, y] = maxiterations
for i in range(maxiterations):
z = z**2 + c
if abs(z) > 2:
fractal[x, y] = i
break
starttime = time.time()
fractal = run_numba(6400, 9600)
fractal2=fractal.get()
print("{0} ns per pixel".format(1e9 * (time.time() - starttime) / (6400 * 9600)))
fig, ax = matplotlib.pyplot.subplots(figsize=(18, 12))
ax.imshow(fractal2)
こっちの方が約1.2倍高速だった。
スポンサーリンク
スポンサーリンク