tensorflow, keras, tensorrt, pycudaを使ったチュートリアルとしてこのサイトからコードを拝借させてもらった。

TensorRTで画像認識¶

先ずは必要なモジュールのインポートとモデルの用意

import tensorflow as tf
import pycuda.driver as cuda
import pycuda.autoinit
import argparse
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input, decode_predictions
import tensorrt as trt
from tensorrt.parsers import uffparser
import numpy as np
import matplotlib.pyplot as plt
import uff

gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
# Create TF session and create VGG-16 graph
model = tf.keras.applications.VGG16(include_top=True)
# Initialize Variables and load model weights
model.load_weights('/root/.keras/models/vgg16_weights_tf_dim_ordering_tf_kernels.h5')
# Get model input and output names
model_input = model.input.name.strip(':0')
model_output = model.output.name.strip(':0')
print(model_input, model_output)
# expect(u'input_1', u'predictions/Softmax')
# Get graph definition
graph = tf.get_default_graph().as_graph_def()
# Get session
sess = tf.keras.backend.get_session()
# freeze graph and remove nodes used for training 
frozen_graph = tf.graph_util.convert_variables_to_constants(sess, graph, [model_output])
frozen_graph = tf.graph_util.remove_training_nodes(frozen_graph)
# Create UFF model and dump it on disk 
uff_model = uff.from_tensorflow(frozen_graph, [model_output])
dump = open('VGG16.uff', 'wb')
dump.write(uff_model)
dump.close()

Using TensorFlow backend.

input_1 predictions/Softmax
INFO:tensorflow:Froze 32 variables.
Converted 32 variables to const ops.
Using output node predictions/Softmax
Converting to UFF graph
DEBUG: convert reshape to flatten node
No. nodes: 88

tensorrtを使って画像認識するための関数を準備

# This is a helper function, provided by TensorRT devs, to run inference
def infer(context, input_img, batch_size):
    # load engine
    engine = context.get_engine()
    assert(engine.get_nb_bindings() == 2)
    # create output array to receive data
    dims = engine.get_binding_dimensions(1).to_DimsCHW()
    elt_count = dims.C() * dims.H() * dims.W() * batch_size
    # convert input data to Float32
    input_img = input_img.astype(np.float32)
    # Allocate pagelocked memory
    output = cuda.pagelocked_empty(elt_count, dtype=np.float32)
    # alocate device memory
    d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize)
    d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize)
    bindings = [int(d_input), int(d_output)]
    stream = cuda.Stream()
    # transfer input data to device
    cuda.memcpy_htod_async(d_input, input_img, stream)
    # execute model
    context.enqueue(batch_size, bindings, stream.handle, None)
    # transfer predictions back
    cuda.memcpy_dtoh_async(output, d_output, stream)
    # return predictions
    return output

tensorrtエンジンを構築

# load model
uff_model = open('VGG16.uff', 'rb').read()
# create model parser
parser = uffparser.create_uff_parser()
parser.register_input("input_1", (3, 224, 224), 0)
parser.register_output("predictions/Softmax")

True

# create inference engine and context (aka session)
trt_logger = trt.infer.ConsoleLogger(trt.infer.LogSeverity.INFO)
engine = trt.utils.uff_to_trt_engine\
(trt_logger,uff_model,parser,1,1 << 10,trt.infer.DataType.FLOAT)

runtime = trt.infer.create_infer_runtime(trt_logger)
context = engine.create_execution_context()

適当にテストイメージを用意する。

from PIL import Image
from matplotlib.pyplot import imshow
size = 224, 224
im = Image.open("Hot_dog_with_mustard.png")
im = im.resize(size, Image.ANTIALIAS)
im.save("test10.png","PNG")
imshow(im)

<matplotlib.image.AxesImage at 0x7fe9c0765898>

いよいよ用意したテストイメージを推論させてみる。

# load and preprocess image
test_image = image.load_img('test10.png', \
                            target_size=(224, 224, 3))
test_image = image.img_to_array(test_image)
processed_im = preprocess_input(np.expand_dims(test_image, 0))[0, :, :, :]

# prepare image for TRT3 engine
processed_im = np.transpose(processed_im, axes=(2, 0, 1))
processed_im = processed_im.copy(order='C')

# infer probs
prediction_proba = infer(context, processed_im, 1)

# decode labels
decode_predictions(np.expand_dims(prediction_proba, 0))

[[('n04409515', 'tennis_ball', 0.004477894),
  ('n03876231', 'paintbrush', 0.004301746),
  ('n03794056', 'mousetrap', 0.004209461),
  ('n09229709', 'bubble', 0.004035638),
  ('n04254680', 'soccer_ball', 0.0040340857)]]

何が悪いのか分からないが、推論することができなかった。