




Simonyan & Zisserman等が提唱したVGG networkをloadする。

import tensorflow as tf
import numpy as np
import random, json, string, pickle
import keras
import keras.layers
import keras.models
import keras.optimizers
import keras.callbacks
from keras.preprocessing import image
import keras.applications.vgg16 as vgg16
import keras.applications.resnet50 as resnet50
import matplotlib.pyplot as plt
from nltk import word_tokenize
%matplotlib inline
model = vgg16.VGG16(weights='imagenet')
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
block3_conv1 (Conv2D)        (None, 56, 56, 256)       295168    
block3_conv2 (Conv2D)        (None, 56, 56, 256)       590080    
block3_conv3 (Conv2D)        (None, 56, 56, 256)       590080    
block3_pool (MaxPooling2D)   (None, 28, 28, 256)       0         
block4_conv1 (Conv2D)        (None, 28, 28, 512)       1180160   
block4_conv2 (Conv2D)        (None, 28, 28, 512)       2359808   
block4_conv3 (Conv2D)        (None, 28, 28, 512)       2359808   
block4_pool (MaxPooling2D)   (None, 14, 14, 512)       0         
block5_conv1 (Conv2D)        (None, 14, 14, 512)       2359808   
block5_conv2 (Conv2D)        (None, 14, 14, 512)       2359808   
block5_conv3 (Conv2D)        (None, 14, 14, 512)       2359808   
block5_pool (MaxPooling2D)   (None, 7, 7, 512)         0         
flatten (Flatten)            (None, 25088)             0         
fc1 (Dense)                  (None, 4096)              102764544 
fc2 (Dense)                  (None, 4096)              16781312  
predictions (Dense)          (None, 1000)              4097000   
Total params: 138,357,544
Trainable params: 138,357,544
Non-trainable params: 0

最後のdense layer “predictions”が、Softmax activationを使用しているので出力はImagenet ILSVRC taskの1000 classesの確率に合致している。

img_path = 'test10.png'  # This is an image I took in my kitchen.
img = image.load_img(img_path, target_size=(224, 224))
img_arr = image.img_to_array(img)
x = np.expand_dims(img_arr, axis=0)  # The model only accepts batches so we add a dummy dimension.
x = vgg16.preprocess_input(x)  # The preprocessing should be the same that was used during training.
predictions = model.predict(x)
label_predictions = vgg16.decode_predictions(predictions, top = 10)
print('Input image size:', x.shape)
print('Prediction scores: ', predictions.shape)
for (i, (category_id, name, probability)) in enumerate(label_predictions[0]):
    print('%d. %s(%.3f)' % (i, name, probability))
Input image size: (1, 224, 224, 3)
Prediction scores:  (1, 1000)
0. hotdog(1.000)
1. cheeseburger(0.000)
2. ice_lolly(0.000)
3. bakery(0.000)
4. French_loaf(0.000)
5. pretzel(0.000)
6. chocolate_sauce(0.000)
7. meat_loaf(0.000)
8. ice_cream(0.000)
9. bagel(0.000)


img_path = '12885395143_4569259f48.jpg'  # This is an image I took in my kitchen.
img = image.load_img(img_path, target_size=(224, 224))
img_arr = image.img_to_array(img)
x = np.expand_dims(img_arr, axis=0)  # The model only accepts batches so we add a dummy dimension.
x = vgg16.preprocess_input(x)  # The preprocessing should be the same that was used during training.
predictions = model.predict(x)
label_predictions = vgg16.decode_predictions(predictions, top = 10)
print('Input image size:', x.shape)
print('Prediction scores: ', predictions.shape)
for (i, (category_id, name, probability)) in enumerate(label_predictions[0]):
    print('%d. %s(%.3f)' % (i, name, probability))
Input image size: (1, 224, 224, 3)
Prediction scores:  (1, 1000)
0. Labrador_retriever(0.795)
1. golden_retriever(0.132)
2. beagle(0.044)
3. basset(0.004)
4. Great_Pyrenees(0.003)
5. bloodhound(0.003)
6. Walker_hound(0.002)
7. Saint_Bernard(0.002)
8. English_foxhound(0.002)
9. Greater_Swiss_Mountain_dog(0.002)


%download https://upload.wikimedia.org/wikipedia/commons/thumb/f/fb/1UA2000.695.jpg/108px-1UA2000.695.jpg
Downloaded '108px-1UA2000.695.jpg'.
img_path = '108px-1UA2000.695.jpg'  # This is an image I took in my kitchen.
img = image.load_img(img_path, target_size=(224, 224))
img_arr = image.img_to_array(img)
x = np.expand_dims(img_arr, axis=0)  # The model only accepts batches so we add a dummy dimension.
x = vgg16.preprocess_input(x)  # The preprocessing should be the same that was used during training.
predictions = model.predict(x)
label_predictions = vgg16.decode_predictions(predictions, top = 10)
print('Input image size:', x.shape)
print('Prediction scores: ', predictions.shape)
for (i, (category_id, name, probability)) in enumerate(label_predictions[0]):
    print('%d. %s(%.3f)' % (i, name, probability))
Input image size: (1, 224, 224, 3)
Prediction scores:  (1, 1000)
0. perfume(0.238)
1. cup(0.057)
2. lampshade(0.050)
3. table_lamp(0.047)
4. red_wine(0.043)
5. crash_helmet(0.039)
6. vase(0.036)
7. goblet(0.031)
8. pitcher(0.026)
9. jersey(0.025)


参考サイトVisual Recognition Lab
