PyCUDAを使ってラプラシアンフィルタを作成する

PyCUDAを使ってLaplacian filter(ラプラシアンフィルタ)を作成するプログラムがあったので、今後の画像処理学習の参考のためにこのコードを拝借しようと思う。

スポンサーリンク

参照サイトをgit clone

cd git
/home/workspace/git
!git clone https://github.com/ashwinashok9111993/andromeda.git
Cloning into 'andromeda'...
remote: Enumerating objects: 184, done.
remote: Total 184 (delta 0), reused 0 (delta 0), pack-reused 184
Receiving objects: 100% (184/184), 2.72 MiB | 2.24 MiB/s, done.
Resolving deltas: 100% (93/93), done.
cd andromeda
/home/workspace/git/andromeda
ls
2DtextureTest.py  dice.jpg   junk/          simplefilter.py  tictoc.py
Lenna.png         dice.png   laplacian.py   sobeldemo.py     vectadd.py
Lenna21.png       intro.py   lenaG.jpg      test2.jpg*
README.md         julia.py   montecarlo.py  testIM.png
boxfilterdemo.py  julia2.py  rgb2gray.py    texturetest.py

laplacian.pyをload

#%load laplacian.py
__author__ = 'ashwin'

import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
from pycuda.compiler import SourceModule
import scipy.misc as scm
import matplotlib.pyplot as p

#realrow = np.random.random([20,20]).astype(np.float32)
#print realrow.shape

realrow = scm.imread('lenaG.jpg').astype(np.float32)

(M,N)=realrow.shape
print (realrow.shape)

mod_copy_texture=SourceModule(
"""
texture<float,2>tex;
__global__ void  copy_texture_kernel(float *C,float * data)
 {
  int i = threadIdx.x+(blockIdx.x*(blockDim.x));
  int j = threadIdx.y+(blockIdx.y*(blockDim.y));
  int gx=0;
  int gy=0;
  int M=C[0];
  int N=C[1];
   while(i<M)
  {
  while(j<N)
  {
  data[i*N+j] = 8*tex2D(tex,j,i)-tex2D(tex,j-1,i)-tex2D(tex,j-1,i-1)-tex2D(tex,j-1,i+1)-tex2D(tex,j+1,i)-tex2D(tex,j+1,i+1)-tex2D(tex,j+1,i-1)-tex2D(tex,j,i+1)-tex2D(tex,j,i-1);
  __syncthreads();
  j += blockDim.y * gridDim.y;
  }
  i += blockDim.x * gridDim.x;
  }
}
""")

########
#get the kernel
########
copy_texture_func = mod_copy_texture.get_function("copy_texture_kernel")

#########
#Map the Kernel to texture object
#########
texref = mod_copy_texture.get_texref("tex")
cuda.matrix_to_texref(realrow , texref , order = "C")

#texref.set_flags(cuda.TRSF_NORMALIZED_COORDINATES)
#texref.set_filter_mode()

gpu_output = np.zeros_like(realrow)
copy_texture_func(cuda.In(np.float32([M,N])),cuda.Out(gpu_output),block=(32,32, 1), grid=(M/32,N/32,1), texrefs=[texref])

p.gray()
p.subplot(1,2,1)
p.imshow(realrow)
p.subplot(1,2,2)
p.imshow(gpu_output)
p.show()
/root/.pyenv/versions/3.6.6/envs/py36/lib/python3.6/site-packages/ipykernel_launcher.py:13: DeprecationWarning: `imread` is deprecated!
`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  del sys.path[0]
(512, 512)
/root/.pyenv/versions/3.6.6/envs/py36/lib/python3.6/site-packages/ipykernel_launcher.py:40: UserWarning: The CUDA compiler succeeded, but said the following:
kernel.cu(8): warning: variable "gx" was declared but never referenced

kernel.cu(9): warning: variable "gy" was declared but never referenced


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-8-f0ff30ece0fe> in <module>
     55 
     56 gpu_output = np.zeros_like(realrow)
---> 57 copy_texture_func(cuda.In(np.float32([M,N])),cuda.Out(gpu_output),block=(32,32, 1), grid=(M/32,N/32,1), texrefs=[texref])
     58 
     59 p.gray()

~/.pyenv/versions/3.6.6/envs/py36/lib/python3.6/site-packages/pycuda/driver.py in function_call(func, *args, **kwargs)
    400                 start_time = time()
    401 
--> 402             func._launch_kernel(grid, block, arg_buf, shared, None)
    403 
    404             if post_handlers or time_kernel:

TypeError: No registered converter was able to produce a C++ rvalue of type unsigned int from this Python object of type float

エラーを以下のようにして修正する。
grid=(M/32,N/32,1) → grid=(M//32,N//32,1)

__author__ = 'ashwin'

import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
from pycuda.compiler import SourceModule
import scipy.misc as scm
import matplotlib.pyplot as p

#realrow = np.random.random([20,20]).astype(np.float32)
#print realrow.shape

realrow = scm.imread('lenaG.jpg').astype(np.float32)

(M,N)=realrow.shape
print (realrow.shape)

mod_copy_texture=SourceModule(
"""
texture<float,2>tex;
__global__ void  copy_texture_kernel(float *C,float * data)
 {
  int i = threadIdx.x+(blockIdx.x*(blockDim.x));
  int j = threadIdx.y+(blockIdx.y*(blockDim.y));
  int gx=0;
  int gy=0;
  int M=C[0];
  int N=C[1];
   while(i<M)
  {
  while(j<N)
  {
  data[i*N+j] = 8*tex2D(tex,j,i)-tex2D(tex,j-1,i)-tex2D(tex,j-1,i-1)-tex2D(tex,j-1,i+1)-tex2D(tex,j+1,i)-tex2D(tex,j+1,i+1)-tex2D(tex,j+1,i-1)-tex2D(tex,j,i+1)-tex2D(tex,j,i-1);
  __syncthreads();
  j += blockDim.y * gridDim.y;
  }
  i += blockDim.x * gridDim.x;
  }
}
""")

########
#get the kernel
########
copy_texture_func = mod_copy_texture.get_function("copy_texture_kernel")

#########
#Map the Kernel to texture object
#########
texref = mod_copy_texture.get_texref("tex")
cuda.matrix_to_texref(realrow , texref , order = "C")

#texref.set_flags(cuda.TRSF_NORMALIZED_COORDINATES)
#texref.set_filter_mode()

gpu_output = np.zeros_like(realrow)
copy_texture_func(cuda.In(np.float32([M,N])),cuda.Out(gpu_output),\
                  block=(32,32, 1), grid=(M//32,N//32,1), texrefs=[texref])
p.rcParams['figure.figsize'] = 30, 30
plt.rcParams["font.size"] = "18"
p.gray()
p.subplot(1,2,1)
p.imshow(realrow)
p.subplot(1,2,2)
p.imshow(gpu_output)
p.show()
/root/.pyenv/versions/3.6.6/envs/py36/lib/python3.6/site-packages/ipykernel_launcher.py:13: DeprecationWarning: `imread` is deprecated!
`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  del sys.path[0]
(512, 512)

OpenCVと比較

opencvのlaplacian filterと比較してみる。

import cv2
import numpy as np
from matplotlib import pyplot as plt

img = cv2.imread('lenaG.jpg',0)

laplacian = cv2.Laplacian(img,cv2.CV_64F)
plt.rcParams["font.size"] = "30"
plt.subplot(2,2,1),plt.imshow(gpu_output,cmap = 'gray')
plt.title('PyCUDA'), plt.xticks([]), plt.yticks([])
plt.subplot(2,2,2),plt.imshow(laplacian,cmap = 'gray')
plt.title('OpenCV'), plt.xticks([]), plt.yticks([])
plt.show()

色が薄くて分かりにくいので画像を修正する。

import cv2
import numpy as np
from matplotlib import pyplot as plt

img = cv2.imread('lenaG.jpg',0)

laplacian = cv2.Laplacian(img,cv2.CV_64F)
plt.rcParams["font.size"] = "30"
plt.subplot(2,2,1),plt.imshow(gpu_output,cmap = 'gray', vmin=0, vmax=64)
plt.title('PyCUDA'), plt.xticks([]), plt.yticks([])
plt.subplot(2,2,2),plt.imshow(laplacian,cmap = 'gray', vmin=0, vmax=32)
plt.title('OpenCV'), plt.xticks([]), plt.yticks([])
plt.show()

速度比較もしてみようと思ったが面倒くさいからやめた。