前回のラプラシアンフィルター処理速度比較の続きで、今回は前回よりもでかい画像データを使用して処理速度比較を行う。
スポンサーリンク
前準備¶
import skimage.data
import skimage.color
from skimage.filters import laplace
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
下記のサイトから画像データをダウンロードしてくる。
%download https://cdn.spacetelescope.org/archives/images/large/heic0601a.jpg
ダウンロードした画像をロードする。
import cv2
# Load an color image in grayscale
image = cv2.imread('heic0601a.jpg', 0)
image.shape
ロードした画像のサイズを変更する(今回は変更されない)。
height, width = image.shape[:2]
print(height, width)
#sqaure image with side equal to height of rectangle
image = cv2.resize(image,(width,width),interpolation = cv2.INTER_CUBIC)
image = skimage.color.rgb2gray(image)
image.dtype
画像のデータ型を変換する。
image = image.astype(np.float64)
image.dtype
スポンサーリンク
scikit-image implementation¶
def laplace_skimage(image):
"""Applies Laplace operator to 2D image using skimage implementation.
Then tresholds the result and returns boolean image."""
laplacian = laplace(image)
thresh = np.abs(laplacian) > 0.05
return thresh
edges = laplace_skimage(image)
edges.shape
%timeit laplace_skimage(image)
def compare(left, right):
"""Compares two images, left and right."""
fig, ax = plt.subplots(1, 2, figsize=(20, 10))
plt.rcParams["font.size"] = "20"
ax[0].imshow(left, cmap='gray')
ax[1].imshow(right, cmap='gray')
compare(left=image, right=edges)
スポンサーリンク
NumPy implementation¶
def laplace_numpy(image):
"""Applies Laplace operator to 2D image using our own NumPy implementation.
Then tresholds the result and returns boolean image."""
laplacian = image[:-2, 1:-1] + image[2:, 1:-1] + image[1:-1, :-2] + image[1:-1, 2:] - 4*image[1:-1, 1:-1]
thresh = np.abs(laplacian) > 0.05
return thresh
laplace_numpy(image).shape
%timeit laplace_numpy(image)
compare(edges, laplace_numpy(image))
スポンサーリンク
Cython¶
%load_ext cython
%%cython
import numpy as np
cimport numpy as cnp
import cython
@cython.boundscheck(False) # turn off bounds-checking for entire function
@cython.wraparound(False) # turn off negative index wrapping for entire function
def laplace_cython(cnp.ndarray[double, ndim=2] image):
"""Applies Laplace operator to 2D image, then tresholds the result and returns boolean image.
Cython implementation."""
cdef int h = image.shape[0]
cdef int w = image.shape[1]
cdef cnp.ndarray[double, ndim=2] laplacian = np.empty((w-2, h-2), dtype=np.double)
cdef int i, j
for i in range(1, h-1):
for j in range(1, w-1):
laplacian[i-1, j-1] = image[i-1, j] + image[i+1, j] + image[i, j-1] + image[i, j+1] - 4*image[i, j]
thresh = np.abs(laplacian) > 0.05
return thresh
%timeit laplace_cython(image)
スポンサーリンク
Pythran¶
%load_ext pythran.magic
%%pythran
#pythran export laplace_pythran_highlevel(float[][])
import numpy as np
def laplace_pythran_highlevel(image):
"""Laplace operator in NumPy for 2D images. Pythran accelerated."""
laplacian = image[:-2, 1:-1] + image[2:, 1:-1] + image1 + image2 - 4*image3
thresh = np.abs(laplacian) > 0.05
return thresh
%timeit laplace_pythran_highlevel(image)
スポンサーリンク
Numba¶
from numba import jit
@jit(nopython=True, fastmath = True, parallel = True, nogil = True)
def laplace_numba(image):
"""Laplace operator for 2D images. Numba accelerated."""
h = image.shape[0]
w = image.shape[1]
laplacian = np.empty((h - 2, w - 2))
for i in range(1, h - 1):
for j in range(1, w - 1):
laplacian[i-1, j-1] = np.abs(image[i-1, j] + image[i+1, j] + image[i, j-1] + image[i, j+1] - 4*image[i, j]) > 0.05
return laplacian
laplace_numba(image);
%timeit laplace_numba(image)
スポンサーリンク
numba guvectorize¶
from numba import guvectorize
from numba import guvectorize
import math
@guvectorize('void(float64[:, :], float64[:, :])', "(m, n)->(m, n)", \
target='parallel',nopython=True, fastmath = True)
def laplace_numba_guvectorize(image, laplacian):
"""Laplace operator in NumPy for 2D images. Numba accelerated."""
h = image.shape[0]
w = image.shape[1]
for i in range(1, h - 1):
for j in range(1, w - 1):
laplacian[i-1, j-1] = abs(4 * image[i, j] - image[i - 1, j] - \
image[i + 1, j] - image[i, j + 1] - image[i, j - 1]) > 0.05
laplacian = np.empty_like(image)
laplace_numba_guvectorize(image, laplacian);
%timeit laplace_numba_guvectorize(image, laplacian);
スポンサーリンク
PyCUDA¶
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
from pycuda.compiler import SourceModule
import matplotlib.pyplot as p
image = image.astype(np.float32)
(h,w)=image.shape
print (h,w)
mod_copy_texture=SourceModule(
"""
#include <cmath>
texture<float,2>tex;
__global__ void copy_texture_kernel(float *C,float * data)
{
int i = threadIdx.x+(blockIdx.x*(blockDim.x));
int j = threadIdx.y+(blockIdx.y*(blockDim.y));
int h=C[0];
int w=C4;
while(i<w)
{
while(j<h)
{
data[i+w*j] = abs(4*tex2D(tex,j,i)-tex2D(tex,j-1,i)-tex2D(tex,j+1,i)\
-tex2D(tex,j,i+1)-tex2D(tex,j,i-1)) > 0.05;
__syncthreads();
j += blockDim.y * gridDim.y;
}
i += blockDim.x * gridDim.x;
}
}
""")
copy_texture_func = mod_copy_texture.get_function("copy_texture_kernel")
texref = mod_copy_texture.get_texref("tex")
cuda.matrix_to_texref(image , texref , order = "F")
gpu_output = np.empty_like(image)
copy_texture_func(cuda.In(np.float32([h,w])),cuda.Out(gpu_output),\
block=(32,32,1), grid=(h//32,w//32,1), texrefs=[texref])
%timeit copy_texture_func(cuda.In(np.float32([h,w])),cuda.Out(gpu_output),\
block=(32,16, 1), grid=(h//32,w//32,1), texrefs=[texref])
compare(gpu_output, laplace_numpy(image))
スポンサーリンク
Wrap-up and plots¶
timings = {}
for func in [laplace_skimage, laplace_numpy, laplace_cython, laplace_pythran_highlevel, laplace_numba]:
t = %timeit -o func(image)
timings[func.__name__] = t
t = %timeit -o laplace_numba_guvectorize(image, laplacian);
timings['laplace_numba_guvectorize'] = t
t = %timeit -o copy_texture_func(cuda.In(np.float32([h,w])),cuda.Out(gpu_output),\
block=(32,16, 1), grid=(h//32,w//32,1), texrefs=[texref]);
timings['laplace_pycuda'] = t
import pandas as pd
pd.Series({key: timings[key].average * 1e6 for key in timings}).to_frame(name='timings (μs)').sort_values(by='timings (μs)')
fig, ax = plt.subplots(figsize=(20, 12))
plt.rcParams["font.size"] = "20"
pd.Series({key: timings[key].average * 1e6 for key in timings}).to_frame(name='timings (μs)').sort_values(by='timings (μs)').plot(kind='barh', ax=ax)
今回はPyCUDAがラプラシアンフィルター処理速度ナンバーワンに輝いた。しかし、フィルター処理された画像データに整合性があるわけではないので、必ずしもこの結果が正確であるとは限らない。さらに言うと、メモリ16Gだと処理がかなりもたつくので、やはりメインメモリは最低でも32Gは欲しいところだ。
スポンサーリンク
スポンサーリンク