爬虫 + CNN（卷积神经网络）实现名家画作识别与分类

例子描述：

通过用CNN网络对梵高，莫奈，毕加索，达芬奇四位画家的作品进行学习，学出一个模型，这个模型具有识别这个四位画家作品的能力。

所需环境：Python3.6 + Tensorflow

如果使用cpu版本，可以参考：https://www.jianshu.com/p/da141c730180
如果使用gpu版本，可以参考：https://www.jianshu.com/p/62d414aa843e

3个步骤：

使用爬虫爬去百度图片
搭建神经网络，训练，产生模型
使用产生的模型，识别与分类

1. 使用爬虫爬去百度图片

通过chrome开发者工具分析，我们得到一个百度图片的api接口，通过接口的数据可以拿到百度图片的地址，如图：

分析百度图片网站，找到获取图片的接口

得到的这个地址是：https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord=%E6%A2%B5%E9%AB%98%E4%BD%9C%E5%93%81&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=&hd=&latest=&copyright=&word=%E6%A2%B5%E9%AB%98%E4%BD%9C%E5%93%81&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&pn=60&rn=30&gsm=3c&1550715038298=

用过分析，这个url地址的主要的三个参数是：

pn: 当前页的图片数量偏移量，如 60 表示当前页是第二页，图片数的偏移是60
rn: 每页返回多少图片，如 30 表示每页三十张图片
queryWordh和word：搜索关键字，如：梵高作品

我们只要调整这些参数，就可以获取任意的百度图片和图片数量，然后通过python代码爬去图片保存到本地磁盘目录。

新建文件：spider.py
代码如下：

import requests
import os
import urllib
import json
#定义下载图片的函数
def downImg(imgUrl, dirPath, imgName):
    filename = os.path.join(dirPath, imgName)
    try:
        #加Referer头，防止百度拒绝你的请求
        myheaders = {
            'Referer':'https://image.baidu.com'
        }
        res = requests.get(imgUrl, timeout=15,headers=myheaders)
        if str(res.status_code)[0] == "4":
            print(str(res.status_code), ":", imgUrl)
            return False
    except Exception as e:
        print("抛出异常：", imgUrl)
        print(e)
        return False
    with open(filename, "wb") as f:
        f.write(res.content)
    return True

words = [["梵高作品",'FG'],['莫奈作品','MN'],['毕加索作品','BJS'],['达芬奇作品','DFQ']] #搜索关键字，如 ：梵高作品
trainPath = "train_data/"
#如果文件夹不存在，创建文件夹
if not os.path.exists(trainPath):
    os.mkdir(trainPath)
for word in words:
    dirPath = trainPath + word[1]
    # 如果文件夹不存在，创建文件夹
    if not os.path.exists(dirPath):
        os.mkdir(dirPath)
    word = urllib.parse.quote(word[0]) #因为是中文，所以要进行urlencode转换
    pn = 30  #当前页的图片数量偏移量，如 60 表示当前页是第二页，图片数的偏移是60
    rn = 30  #每每页返回多少图片，如 30 表示每页三十张图片
    i = 1 #图片编号
    while pn <= 30 * 20: #获取20页的图片，总共600张，建议修改页数，爬去更多一点的图片
        try:
            url = 'https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord=' + word + '&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=&hd=&latest=&copyright=&word=' + word + '=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&pn=' + str(
                pn) + '&rn=' + str(rn) + '&gsm=3c&1550715038298='
            jsonBytes = requests.get(url, timeout=10).content  # 获取json数据-字节
            jsonData = jsonBytes.decode('utf-8')  # json数据-字节转字符串
            print("---------------------------------------------------------")
            jsonData = jsonData.replace("\\'", '') #不加这个字符串替换json.loads时会报错，意思是去掉字符串中的\'
            print(jsonData)
            print("---------------------------------------------------------")
            jsonObj = json.loads(jsonData)  # json数据-字符串转对象
            if 'data' in jsonObj:
                for item in jsonObj['data']:
                    if 'thumbURL' in item:
                        imgName = str(i) + ".jpg"
                        downImg(item['thumbURL'], dirPath, imgName)  # 下载图片
                        print(item['thumbURL'])
                        i += 1
            pn += rn  # 下一页
        except Exception as e:
            print(e)

代码执行完成后，在当前目录下，我们就得到了后面训练用的样本数据，目录文件如下：

image.png

到此，样本数据就准备好了，下面我们要搭建神经网络了。

2. 搭建神经网络，读取图片，训练，产生模型

这里要用到opencv，所以要安装opencv模块

# 安装
pip install http://ai-download.xmgc360.com/opencv_python-3.3.0.10-cp36-cp36m-win_amd64.whl

还需安装 sklearn 模块

pip install sklearn  -i https://pypi.tuna.tsinghua.edu.cn/simple

新建文件 dataset.py ，用于读取图片并预处理，代码如下：

import cv2
import os
import glob
from sklearn.utils import shuffle
import numpy as np
def load_train(train_path, image_size, classes):
    images = []
    labels = []
    img_names = []
    cls = []
    print('Going to read training images')
    for fields in classes:
        index = classes.index(fields)
        print('Now going to read {} files (Index: {})'.format(fields, index))
        path = os.path.join(train_path, fields, '*g')
        files = glob.glob(path)
        for fl in files:
            try:
                #读取图片
                image = cv2.imread(fl)
                #等比例压缩到64*64
                image = cv2.resize(image, (image_size, image_size), 0, 0, cv2.INTER_LINEAR)
                #转为浮点型
                image = image.astype(np.float32)
                #归一化处理
                image = np.multiply(image, 1.0 / 255.0)
                images.append(image)
                label = np.zeros(len(classes))
                label[index] = 1.0
                labels.append(label)
                flbase = os.path.basename(fl)
                img_names.append(flbase)
                cls.append(fields)
            except Exception as e:
                print(e)

    images = np.array(images)
    labels = np.array(labels)
    img_names = np.array(img_names)
    cls = np.array(cls)

    return images, labels, img_names, cls


class DataSet(object):

  def __init__(self, images, labels, img_names, cls):
    self._num_examples = images.shape[0]

    self._images = images
    self._labels = labels
    self._img_names = img_names
    self._cls = cls
    self._epochs_done = 0
    self._index_in_epoch = 0

  @property
  def images(self):
    return self._images

  @property
  def labels(self):
    return self._labels

  @property
  def img_names(self):
    return self._img_names

  @property
  def cls(self):
    return self._cls

  @property
  def num_examples(self):
    return self._num_examples

  @property
  def epochs_done(self):
    return self._epochs_done

  def next_batch(self, batch_size):
    """Return the next `batch_size` examples from this data set."""
    start = self._index_in_epoch
    self._index_in_epoch += batch_size

    if self._index_in_epoch > self._num_examples:
      # After each epoch we update this
      self._epochs_done += 1
      start = 0
      self._index_in_epoch = batch_size
      assert batch_size <= self._num_examples
    end = self._index_in_epoch

    return self._images[start:end], self._labels[start:end], self._img_names[start:end], self._cls[start:end]


def read_train_sets(train_path, image_size, classes, validation_size):
  class DataSets(object):
    pass
  data_sets = DataSets()

  images, labels, img_names, cls = load_train(train_path, image_size, classes)
  images, labels, img_names, cls = shuffle(images, labels, img_names, cls)

  if isinstance(validation_size, float):
    validation_size = int(validation_size * images.shape[0])

  validation_images = images[:validation_size]
  validation_labels = labels[:validation_size]
  validation_img_names = img_names[:validation_size]
  validation_cls = cls[:validation_size]

  train_images = images[validation_size:]
  train_labels = labels[validation_size:]
  train_img_names = img_names[validation_size:]
  train_cls = cls[validation_size:]

  data_sets.train = DataSet(train_images, train_labels, train_img_names, train_cls)
  data_sets.valid = DataSet(validation_images, validation_labels, validation_img_names, validation_cls)

  return data_sets

新建 train.py 文件，搭建神经网络，训练，产生模型，代码如下：

import dataset
import tensorflow as tf
import time
from datetime import timedelta
import math
import random
import numpy as np
# conda install --channel https://conda.anaconda.org/menpo opencv3
#Adding Seed so that random initialization is consistent
from numpy.random import seed
seed(10)
from tensorflow import set_random_seed
set_random_seed(20)


batch_size = 32

#Prepare input data
classes = ['BJS','DFQ','FG','MN']
num_classes = len(classes)

# 20% of the data will automatically be used for validation
validation_size = 0.2
img_size = 64
num_channels = 3
train_path='train_data'

# We shall load all the training and validation images and labels into memory using openCV and use that during training
data = dataset.read_train_sets(train_path, img_size, classes, validation_size=validation_size)


print("Complete reading input data. Will Now print a snippet of it")
print("Number of files in Training-set:\t\t{}".format(len(data.train.labels)))
print("Number of files in Validation-set:\t{}".format(len(data.valid.labels)))



session = tf.Session()
x = tf.placeholder(tf.float32, shape=[None, img_size,img_size,num_channels], name='x')

## labels
y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true')
y_true_cls = tf.argmax(y_true, dimension=1)



##Network graph params
filter_size_conv1 = 3
num_filters_conv1 = 32

filter_size_conv2 = 3
num_filters_conv2 = 32

filter_size_conv3 = 3
num_filters_conv3 = 64

fc_layer_size = 1024

def create_weights(shape):
    return tf.Variable(tf.truncated_normal(shape, stddev=0.05))

def create_biases(size):
    return tf.Variable(tf.constant(0.05, shape=[size]))



def create_convolutional_layer(input,
               num_input_channels,
               conv_filter_size,
               num_filters):

    ## We shall define the weights that will be trained using create_weights function. 3 3 3 32
    weights = create_weights(shape=[conv_filter_size, conv_filter_size, num_input_channels, num_filters])
    ## We create biases using the create_biases function. These are also trained.
    biases = create_biases(num_filters)

    ## Creating the convolutional layer
    layer = tf.nn.conv2d(input=input,
                     filter=weights,
                     strides=[1, 1, 1, 1],
                     padding='SAME')

    layer += biases

    layer = tf.nn.relu(layer)

    ## We shall be using max-pooling.
    layer = tf.nn.max_pool(value=layer,
                            ksize=[1, 2, 2, 1],
                            strides=[1, 2, 2, 1],
                            padding='SAME')
    ## Output of pooling is fed to Relu which is the activation function for us.
    #layer = tf.nn.relu(layer)

    return layer



def create_flatten_layer(layer):
    #We know that the shape of the layer will be [batch_size img_size img_size num_channels]
    # But let's get it from the previous layer.
    layer_shape = layer.get_shape()

    ## Number of features will be img_height * img_width* num_channels. But we shall calculate it in place of hard-coding it.
    num_features = layer_shape[1:4].num_elements()

    ## Now, we Flatten the layer so we shall have to reshape to num_features
    layer = tf.reshape(layer, [-1, num_features])

    return layer


def create_fc_layer(input,
             num_inputs,
             num_outputs,
             use_relu=True):

    #Let's define trainable weights and biases.
    weights = create_weights(shape=[num_inputs, num_outputs])
    biases = create_biases(num_outputs)

    # Fully connected layer takes input x and produces wx+b.Since, these are matrices, we use matmul function in Tensorflow
    layer = tf.matmul(input, weights) + biases

    layer=tf.nn.dropout(layer,keep_prob=0.7)

    if use_relu:
        layer = tf.nn.relu(layer)


    return layer

#卷积层1（包括卷积，池化，激活）
layer_conv1 = create_convolutional_layer(input=x,
               num_input_channels=num_channels,
               conv_filter_size=filter_size_conv1,
               num_filters=num_filters_conv1)
#卷积层2（包括卷积，池化，激活）
layer_conv2 = create_convolutional_layer(input=layer_conv1,
               num_input_channels=num_filters_conv1,
               conv_filter_size=filter_size_conv2,
               num_filters=num_filters_conv2)
#卷积层3（包括卷积，池化，激活）
layer_conv3= create_convolutional_layer(input=layer_conv2,
               num_input_channels=num_filters_conv2,
               conv_filter_size=filter_size_conv3,
               num_filters=num_filters_conv3)
#把上面三个卷积层处理后的结果转化为一维向量，才能提供给全连层
layer_flat = create_flatten_layer(layer_conv3)
#全连接层1
layer_fc1 = create_fc_layer(input=layer_flat,
                     num_inputs=layer_flat.get_shape()[1:4].num_elements(),
                     num_outputs=fc_layer_size,
                     use_relu=True)
#全连接层2
layer_fc2 = create_fc_layer(input=layer_fc1,
                     num_inputs=fc_layer_size,
                     num_outputs=num_classes,
                     use_relu=False)

y_pred = tf.nn.softmax(layer_fc2,name='y_pred')

y_pred_cls = tf.argmax(y_pred, dimension=1)
session.run(tf.global_variables_initializer())
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=layer_fc2,
                                                    labels=y_true)
cost = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


session.run(tf.global_variables_initializer())


def show_progress(epoch, feed_dict_train, feed_dict_validate, val_loss,i):
    acc = session.run(accuracy, feed_dict=feed_dict_train)
    val_acc = session.run(accuracy, feed_dict=feed_dict_validate)
    msg = "Training Epoch {0}--- iterations: {1}--- Training Accuracy: {2:>6.1%}, Validation Accuracy: {3:>6.1%},  Validation Loss: {4:.3f}"
    print(msg.format(epoch + 1,i, acc, val_acc, val_loss))

total_iterations = 0

saver = tf.train.Saver()
def train(num_iteration):
    global total_iterations

    for i in range(total_iterations,
                   total_iterations + num_iteration):

        x_batch, y_true_batch, _, cls_batch = data.train.next_batch(batch_size)
        x_valid_batch, y_valid_batch, _, valid_cls_batch = data.valid.next_batch(batch_size)


        feed_dict_tr = {x: x_batch,
                           y_true: y_true_batch}
        feed_dict_val = {x: x_valid_batch,
                              y_true: y_valid_batch}

        session.run(optimizer, feed_dict=feed_dict_tr)

        if i % int(data.train.num_examples/batch_size) == 0:
            val_loss = session.run(cost, feed_dict=feed_dict_val)
            epoch = int(i / int(data.train.num_examples/batch_size))

            show_progress(epoch, feed_dict_tr, feed_dict_val, val_loss,i)
            saver.save(session, './model/painting.ckpt',global_step=i)

    total_iterations += num_iteration

train(num_iteration=8000)

3. 识别与分类

新建文件：predict.py，代码中加载模型，制定预测的文件名 fg_test_1.jpg。

image.png

代码如下：

import tensorflow as tf
import numpy as np
import os,glob,cv2
import sys,argparse

image_size=64
num_channels=3
images = []

path = 'fg_test_1.jpg'
image = cv2.imread(path)
# Resizing the image to our desired size and preprocessing will be done exactly as done during training
image = cv2.resize(image, (image_size, image_size),0,0, cv2.INTER_LINEAR)
images.append(image)
images = np.array(images, dtype=np.uint8)
images = images.astype('float32')
images = np.multiply(images, 1.0/255.0)
#The input to the network is of shape [None image_size image_size num_channels]. Hence we reshape.
x_batch = images.reshape(1, image_size,image_size,num_channels)

## Let us restore the saved model
sess = tf.Session()
# Step-1: Recreate the network graph. At this step only graph is created.
saver = tf.train.import_meta_graph('./model/painting.ckpt-7998.meta')
# Step-2: Now let's load the weights saved using the restore method.
saver.restore(sess, './model/painting.ckpt-7998')

# Accessing the default graph which we have restored
graph = tf.get_default_graph()

# Now, let's get hold of the op that we can be processed to get the output.
# In the original network y_pred is the tensor that is the prediction of the network
y_pred = graph.get_tensor_by_name("y_pred:0")

## Let's feed the images to the input placeholders
x= graph.get_tensor_by_name("x:0")
y_true = graph.get_tensor_by_name("y_true:0")
y_test_images = np.zeros((1, 4))


### Creating the feed_dict that is required to be fed to calculate y_pred
feed_dict_testing = {x: x_batch, y_true: y_test_images}
result=sess.run(y_pred, feed_dict=feed_dict_testing)
# result is of this format [probabiliy_of_rose probability_of_sunflower]
# dog [1 0]
res_label = ['BJS','DFQ','FG','MN']
print(res_label[result.argmax()])

设定分类参数

预测文件：fg_test_1.jpg，放到当前目录下

fg_test_1.jpg

预测结果如图：

预测代码执行结果

结果是：FG，表示识别成功。

备注：

目录结构如下图：

目录结构

附带窗口图形化预测代码：

所需安装模块：

pip install pillow  -i https://pypi.tuna.tsinghua.edu.cn/simple

新建文件：prodict_gui.py，拷贝下面代码：

from tkinter import *
from tkinter import filedialog
from PIL import Image, ImageTk
import tensorflow as tf
import numpy as np
import cv2
import tkinter
import tkinter.messagebox

image_size=64
num_channels=3
images = []
filepath = ''


## 启动session
sess = tf.Session()
# 在家模型图结构
saver = tf.train.import_meta_graph('./model/painting.ckpt-145.meta')
# 加载模型权重
saver.restore(sess, './model/painting.ckpt-145')

# 获取图结构
graph = tf.get_default_graph()

if __name__ == "__main__":
    root = Tk()
    root.title('图形预测窗口')
    #setting up a tkinter canvas with scrollbars
    frame = Frame(root, bd=2, relief=SUNKEN)
    frame.grid_rowconfigure(0, weight=1)
    frame.grid_columnconfigure(0, weight=1)
    xscroll = Scrollbar(frame, orient=HORIZONTAL)
    xscroll.grid(row=1, column=0, sticky=E+W)
    yscroll = Scrollbar(frame)
    yscroll.grid(row=0, column=1, sticky=N+S)
    canvas = Canvas(frame, bd=0, xscrollcommand=xscroll.set, yscrollcommand=yscroll.set)
    canvas.grid(row=0, column=0, sticky=N+S+E+W)
    xscroll.config(command=canvas.xview)
    yscroll.config(command=canvas.yview)
    frame.pack(fill=BOTH,expand=1)

    def printcoords():
        global filepath
        File = filedialog.askopenfilename(parent=root, initialdir="D:/",title='Choose an image.')
        filename = ImageTk.PhotoImage(Image.open(File))
        canvas.image = filename
        canvas.create_image(0,0,anchor='nw',image=filename)
        filepath =  File

    def predict():
        image_size = 64
        num_channels = 3
        images = []

        path = filepath
        print(path)
        #image = cv2.imread(path) #不支持中文路径
        image = cv2.imdecode(np.fromfile(path,dtype=np.uint8),-1) #支持中文路径

        image = cv2.resize(image, (image_size, image_size), 0, 0, cv2.INTER_LINEAR)
        images.append(image)
        images = np.array(images, dtype=np.uint8)
        images = images.astype('float32')
        images = np.multiply(images, 1.0 / 255.0)

        x_batch = images.reshape(1, image_size, image_size, num_channels)



        # 获取tensor : y_pred
        y_pred = graph.get_tensor_by_name("y_pred:0")

        # 获取tensor : x
        x = graph.get_tensor_by_name("x:0")
        # 获取tensor : y_true
        y_true = graph.get_tensor_by_name("y_true:0")
        y_test_images = np.zeros((1, 4))


        feed_dict_testing = {x: x_batch, y_true: y_test_images}

        #run测试数据
        result = sess.run(y_pred, feed_dict=feed_dict_testing)

        res_label = ['这幅画作者毕加索','这幅画作者达芬奇', '这幅画作者梵高', '这幅画作者莫奈']
        tkinter.messagebox.showinfo("图形预测结果",res_label[result.argmax()])

    Button(root, text='2、图形预测', command=predict).pack(side=RIGHT)
    Button(root,text='1、选择图片',command=printcoords).pack(side=RIGHT)
    label = Label(root, text='请依次点击按钮>>>>>>')
    label.pack(side=RIGHT)
    root.mainloop()

界面截图：

image.png