tensorflow, keras, tensorrt, pycudaを使ったチュートリアルとしてこのサイトからコードを拝借させてもらった。
スポンサーリンク
TensorRTで画像認識¶
import tensorflow as tf
import pycuda.driver as cuda
import pycuda.autoinit
import argparse
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input, decode_predictions
import tensorrt as trt
from tensorrt.parsers import uffparser
import numpy as np
import matplotlib.pyplot as plt
import uff
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
# Create TF session and create VGG-16 graph
model = tf.keras.applications.VGG16(include_top=True)
# Initialize Variables and load model weights
model.load_weights('/root/.keras/models/vgg16_weights_tf_dim_ordering_tf_kernels.h5')
# Get model input and output names
model_input = model.input.name.strip(':0')
model_output = model.output.name.strip(':0')
print(model_input, model_output)
# expect(u'input_1', u'predictions/Softmax')
# Get graph definition
graph = tf.get_default_graph().as_graph_def()
# Get session
sess = tf.keras.backend.get_session()
# freeze graph and remove nodes used for training
frozen_graph = tf.graph_util.convert_variables_to_constants(sess, graph, [model_output])
frozen_graph = tf.graph_util.remove_training_nodes(frozen_graph)
# Create UFF model and dump it on disk
uff_model = uff.from_tensorflow(frozen_graph, [model_output])
dump = open('VGG16.uff', 'wb')
dump.write(uff_model)
dump.close()
tensorrtを使って画像認識するための関数を準備
# This is a helper function, provided by TensorRT devs, to run inference
def infer(context, input_img, batch_size):
# load engine
engine = context.get_engine()
assert(engine.get_nb_bindings() == 2)
# create output array to receive data
dims = engine.get_binding_dimensions(1).to_DimsCHW()
elt_count = dims.C() * dims.H() * dims.W() * batch_size
# convert input data to Float32
input_img = input_img.astype(np.float32)
# Allocate pagelocked memory
output = cuda.pagelocked_empty(elt_count, dtype=np.float32)
# alocate device memory
d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
stream = cuda.Stream()
# transfer input data to device
cuda.memcpy_htod_async(d_input, input_img, stream)
# execute model
context.enqueue(batch_size, bindings, stream.handle, None)
# transfer predictions back
cuda.memcpy_dtoh_async(output, d_output, stream)
# return predictions
return output
tensorrtエンジンを構築
# load model
uff_model = open('VGG16.uff', 'rb').read()
# create model parser
parser = uffparser.create_uff_parser()
parser.register_input("input_1", (3, 224, 224), 0)
parser.register_output("predictions/Softmax")
# create inference engine and context (aka session)
trt_logger = trt.infer.ConsoleLogger(trt.infer.LogSeverity.INFO)
engine = trt.utils.uff_to_trt_engine\
(trt_logger,uff_model,parser,1,1 << 10,trt.infer.DataType.FLOAT)
runtime = trt.infer.create_infer_runtime(trt_logger)
context = engine.create_execution_context()
適当にテストイメージを用意する。
from PIL import Image
from matplotlib.pyplot import imshow
size = 224, 224
im = Image.open("Hot_dog_with_mustard.png")
im = im.resize(size, Image.ANTIALIAS)
im.save("test10.png","PNG")
imshow(im)
いよいよ用意したテストイメージを推論させてみる。
# load and preprocess image
test_image = image.load_img('test10.png', \
target_size=(224, 224, 3))
test_image = image.img_to_array(test_image)
processed_im = preprocess_input(np.expand_dims(test_image, 0))[0, :, :, :]
# prepare image for TRT3 engine
processed_im = np.transpose(processed_im, axes=(2, 0, 1))
processed_im = processed_im.copy(order='C')
# infer probs
prediction_proba = infer(context, processed_im, 1)
# decode labels
decode_predictions(np.expand_dims(prediction_proba, 0))
何が悪いのか分からないが、推論することができなかった。
スポンサーリンク
スポンサーリンク