VGG 实现代码重点讲解
x = tf.placeholder(tf.float32,shape = [BATCH_SIZE,IMAGE_PIXELS])
- tf.placeholder:用于传入真实训练样本 / 测试 / 真实特征 / 待处理特征。 只是占位,不必给出初值。用sess.run的feed_dict参数以字典形式喂入
x:, y_: sess.run(feed_dict = {x: ,y_: })
- BATCH_SIZE:一次传入的个数。
-
IMAGE_PIXELS:图像像素。
- w = tf.Variable(tf.random_normal()):从正态分布中给出权重w的随机值。
- b = tf.Variable(tf.zeros()):统一将偏置b初始化为 0。
- 注意:以上两行函数 Variable 中的V要大写,Variable必须给初值。
np.load np.save:将数组以二进制格式保存到磁盘,扩展名为.npy 。
.item():遍历(键值对)。
tf.shape(a)和 a.get_shape()比较:
- 相同点:都可以得到tensor a的尺寸 。
- 不同点:tf.shape()中a的数据类型可以是tensor,list,array;而a.get_shape()中a的数据类型只能是tensor,且返回的是一个元组(tuple)。
tf.nn.bias_add(乘加和, bias):把bias加到乘加和上。
tf.reshape(tensor, shape):改变tensor的形状。
# tensor ‘t’ is [1, 2, 3, 4, 5, 6, 7, 8, 9]
# tensor ‘t’ has shape [9]
reshape(t, [3, 3]) ==>
[[1, 2, 3],
[4, 5, 6],
[7, 8, 9]]
# -1 将自动推导得为 9:
reshape(t, [2, -1]) ==>
[[1, 1, 1, 2, 2, 2, 3, 3, 3],
[4, 4, 4, 5, 5, 5, 6, 6, 6]]
np.argsort(列表):对列表从小到大排序。
OS 模块 :
- os.getcwd():返回当前工作目录。
- os.path.join(path1[,path2[,……]]):
- 返回值:将多个路径组合后返回。
- 注意:第一个绝对路径之前的参数将被忽略。
np.save:写数组到文件(未压缩二进制形式),文件默认的扩展名是.npy 。
- np.save(“名.npy”,某数组):将某数组写入“名.npy”文件。
- 某变量 = np.load(“名.npy”,encoding = “ “).item():将“名.npy”文件读出给某变量。encoding = “ “ 可以不写‘latin1’、‘ASCII’、‘bytes’,默认为’ASCII’。
tf.split(dimension, num_split, input):
- dimension:输入张量的哪一个维度,如果是0就表示对第0维度进行切割。
- num_split:切割的数量,如果是2就表示输入张量被切成2份,每一份是一个列表。
tf.concat(concat_dim, values):
沿着某一维度连结 tensor:
t1 = [[1, 2, 3], [4, 5, 6]]
t2 = [[7, 8, 9], [10, 11, 12]]
tf.concat(0, [t1, t2]) ==> [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]
tf.concat(1, [t1, t2]) ==> [[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]]
如果想沿着 tensor 一新轴连结打包,那么可以:
tf.concat(axis, [tf.expand_dims(t, axis) for t in tensors])
等同于 tf.pack(tensors, axis=axis)
fig = plt.figure(“图名字”):实例化图对象。
ax = fig.add_subplot(m n k):将画布分割成m行n列,图像画在从左到右从上到下的第k块。
ax.bar(bar的个数,bar的值,每个bar的名字,bar的宽,bar的颜色):绘制直方图。给出bar的个数,bar的值,每个bar的名字,bar的宽,bar的颜色。
ax.set_ylabel(“”):给出y轴的名字。
ax.set_title(“”):给出子图的名字。
ax.text(x,y,string,fontsize=15,verticalalignment=“top”,horizontalalignment=“right”):
- x,y:表示坐标轴上的值。
- string:表示说明文字。
-
fontsize:表示字体大小。
- verticalalignment:垂直对齐方式,参数:
[ ‘center’ | ‘top’ | ‘bottom’ | ‘baseline’ ]
- horizontalalignment:水平对齐方式,参数:
[‘center’|‘right’|‘left’]
xycoords 选择指定的坐标轴系统:
- figure points
- points from the lower left of the figure 点在图左下方
- figure pixels
- pixels from the lower left of the figure 图左下角的像素
- figure fraction
- fraction of figure from lower left 左下角数字部分
- axes points
- points from lower left corner of axes 从左下角点的坐标
- axes pixels
- pixels from lower left corner of axes 从左下角的像素坐标
- axes fraction
- fraction of axes from lower left 左下角部分
- data
- use the coordinate system of the object being annotated(default) 使用的坐标系统被注释的对象(默认)
- polar(theta,r)
- if not native ‘data’ coordinates t arrowprops
- 箭头参数,参数类型为字典dict
- width
- the width of the arrow in points 点箭头的宽度
- headwidth
- the width of the base of the arrow head in points 在点的箭头底座的宽度
- headlength
- the length of the arrow head in points 点箭头的长度
- shrink
- fraction of total length to ‘shrink’ from both ends 总长度为分数“缩水”从两端
- facecolor
- 箭头颜色
bbox给标题增加外框 ,常用参数如下:
- boxstyle 方框外形
- facecolor(简写 fc)背景颜色
- edgecolor(简写 ec)边框线条颜色
- edgewidth 边框线条大小
- bbox=dict(boxstyle=‘round,pad=0.5’,fc=‘yellow’,ec=‘k’,lw=1 ,alpha=0.5)
- fc为facecolor,ec为edgecolor,lw为lineweight
plt.show():画出来。
axo = imshow(图):画子图。
- 图 = io.imread(图路径索引到文件)。
vgg网络具体结构:
vgg16.py 还原网络和参数:
app.py 读入待判图,给出可视化结果:
课程中 VGG 源码
vgg16.py
#!/usr/bin/python
#coding:utf-8
import inspect
import os
import numpy as np
import tensorflow as tf
import time
import matplotlib.pyplot as plt
VGG_MEAN = [103.939, 116.779, 123.68]
class Vgg16():
def __init__(self, vgg16_path=None):
if vgg16_path is None:
vgg16_path = os.path.join(os.getcwd(), "vgg16.npy")
self.data_dict = np.load(vgg16_path, encoding='latin1').item()
def forward(self, images):
print("build model started")
start_time = time.time()
rgb_scaled = images * 255.0
red, green, blue = tf.split(rgb_scaled,3,3)
bgr = tf.concat([
blue - VGG_MEAN[0],
green - VGG_MEAN[1],
red - VGG_MEAN[2]],3)
self.conv1_1 = self.conv_layer(bgr, "conv1_1")
self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2")
self.pool1 = self.max_pool_2x2(self.conv1_2, "pool1")
self.conv2_1 = self.conv_layer(self.pool1, "conv2_1")
self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2")
self.pool2 = self.max_pool_2x2(self.conv2_2, "pool2")
self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
self.pool3 = self.max_pool_2x2(self.conv3_3, "pool3")
self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
self.pool4 = self.max_pool_2x2(self.conv4_3, "pool4")
self.conv5_1 = self.conv_layer(self.pool4, "conv5_1")
self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2")
self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3")
self.pool5 = self.max_pool_2x2(self.conv5_3, "pool5")
self.fc6 = self.fc_layer(self.pool5, "fc6")
self.relu6 = tf.nn.relu(self.fc6)
self.fc7 = self.fc_layer(self.relu6, "fc7")
self.relu7 = tf.nn.relu(self.fc7)
self.fc8 = self.fc_layer(self.relu7, "fc8")
self.prob = tf.nn.softmax(self.fc8, name="prob")
end_time = time.time()
print(("time consuming: %f" % (end_time-start_time)))
self.data_dict = None
def conv_layer(self, x, name):
with tf.variable_scope(name):
w = self.get_conv_filter(name)
conv = tf.nn.conv2d(x, w, [1, 1, 1, 1], padding='SAME')
conv_biases = self.get_bias(name)
result = tf.nn.relu(tf.nn.bias_add(conv, conv_biases))
return result
def get_conv_filter(self, name):
return tf.constant(self.data_dict[name][0], name="filter")
def get_bias(self, name):
return tf.constant(self.data_dict[name][1], name="biases")
def max_pool_2x2(self, x, name):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
def fc_layer(self, x, name):
with tf.variable_scope(name):
shape = x.get_shape().as_list()
dim = 1
for i in shape[1:]:
dim *= i
x = tf.reshape(x, [-1, dim])
w = self.get_fc_weight(name)
b = self.get_bias(name)
result = tf.nn.bias_add(tf.matmul(x, w), b)
return result
def get_fc_weight(self, name):
return tf.constant(self.data_dict[name][0], name="weights")
utils.py
#!/usr/bin/python
#coding:utf-8
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from pylab import mpl
mpl.rcParams['font.sans-serif']=['SimHei'] # 正常显示中文标签
mpl.rcParams['axes.unicode_minus']=False # 正常显示正负号
def load_image(path):
fig = plt.figure("Centre and Resize")
img = io.imread(path)
img = img / 255.0
ax0 = fig.add_subplot(131)
ax0.set_xlabel(u'Original Picture')
ax0.imshow(img)
short_edge = min(img.shape[:2])
y = (img.shape[0] - short_edge) / 2
x = (img.shape[1] - short_edge) / 2
crop_img = img[y:y+short_edge, x:x+short_edge]
ax1 = fig.add_subplot(132)
ax1.set_xlabel(u"Centre Picture")
ax1.imshow(crop_img)
re_img = transform.resize(crop_img, (224, 224))
ax2 = fig.add_subplot(133)
ax2.set_xlabel(u"Resize Picture")
ax2.imshow(re_img)
img_ready = re_img.reshape((1, 224, 224, 3))
return img_ready
def percent(value):
return '%.2f%%' % (value * 100)
app.py
#coding:utf-8
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import vgg16
import utils
from Nclasses import labels
img_path = raw_input('Input the path and image name:')
img_ready = utils.load_image(img_path)
fig=plt.figure(u"Top-5 预测结果")
with tf.Session() as sess:
images = tf.placeholder(tf.float32, [1, 224, 224, 3])
vgg = vgg16.Vgg16()
vgg.forward(images)
probability = sess.run(vgg.prob, feed_dict={images:img_ready})
top5 = np.argsort(probability[0])[-1:-6:-1]
print "top5:",top5
values = []
bar_label = []
for n, i in enumerate(top5):
print "n:",n
print "i:",i
values.append(probability[0][i])
bar_label.append(labels[i])
print i, ":", labels[i], "----", utils.percent(probability[0][i])
ax = fig.add_subplot(111)
ax.bar(range(len(values)), values, tick_label=bar_label, width=0.5, fc='g')
ax.set_ylabel(u'probabilityit')
ax.set_title(u'Top-5')
for a,b in zip(range(len(values)), values):
ax.text(a, b+0.0005, utils.percent(b), ha='center', va = 'bottom', fontsize=7)
plt.show()
打印出 img_ready 的维度:app.py 第11行加入 print “img_ready shape”, tf.Session().run(tf.shape(img_ready))