问题
现在有很多的图片,里面分别有猫与狗,识别这些图片,区分猫与狗
设计解决这个问题的思路
1、下载与放置训练图片
2、现在对应的依赖,tensorflow、numpy等等
3、解析文件名,识别dog还是cat
4、建模
5、对模型进行训练
6、用测试模型进行验证
7、输出结果
8、优化模型 to step4
[1]图片地址
https://www.kaggle.com/c/dogs-vs-cats/data
现在数据,现在速度比较慢,可以使用网盘。
网盘地址(提取码:lhrr)
【2】处理训练集的数据结构
import os
filenames = os.listdir('./dogs-vs-cats/train’)
# 动物类型
categories = []
for filename in filenames:
category = filename.split('.')[0]
categories.append(category)
import pandas as pd
# 结构化数据
df = pd.DataFrame({
'filename':filenames,
'category':categories
})
#展示对应的数据
import random
from keras.preprocessing import image
import matplotlib.pyplot as plt
## 看看结构化之后的结果
print(df.head())
print(df.tail())
print(df['category'].value_counts())
df['category'].value_counts().plot(kind = 'bar')
plt.show()
# 展示个图片看看
sample = random.choice(filenames)
image = image.load_img('./dogs-vs-cats/train/' + sample)
plt.imshow(image)
plt.show()
【3】出来训练集与验证集
# 切割训练集合
train_df, validate_df = train_test_split(df, test_size = 0.20, random_state = 42)
train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)
print(train_df.head())
print(validate_df.head())
total_train = train_df.shape[0]
total_validate = validate_df.shape[0]
print("Total number of example in training dataset : {0}".format(total_train))
print("Total number of example in validation dataset : {0}".format(total_validate))
【4】创建模型
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, BatchNormalization, Flatten,Dropout
from tensorflow.keras import optimizers
## 创建第一个模型
class Model:
def __init__(self, IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS):
self.IMG_WIDTH = IMG_WIDTH
self.IMG_HEIGHT = IMG_HEIGHT
self.IMG_CHANNELS = IMG_CHANNELS
def create_model(self):
model = Sequential()
#第一层
#图像空间的2维卷积 32个卷积输出滤波器,卷积窗口的高度和宽度(3,3),输入像素150*150
model.add(Conv2D(32, (3,3), activation = 'relu', kernel_initializer='he_uniform',
padding='same',input_shape = (150, 150, 3)))
#卷积窗口的高度和宽度降低为(2,2)
model.add(MaxPooling2D((2,2)))
#第二层
model.add(Conv2D(64, (3,3), activation = 'relu'))
model.add(MaxPooling2D((2,2)))
#第三层
model.add(Conv2D(128, (3,3), activation = 'relu'))
model.add(MaxPooling2D((2,2)))
#第四层
model.add(Conv2D(128, (3,3), activation = 'relu'))
model.add(MaxPooling2D((2,2)))
#Flatten层用来将输入“压平”,即把多维的输入一维化
model.add(Flatten())
#全链接层,输出空间的维数
model.add(Dense(512, activation = 'relu'))
model.add(Dense(1, activation = 'sigmoid'))
from keras import optimizers
# 设置损失算法与优化
model.compile(loss = 'binary_crossentropy', optimizer = optimizers.RMSprop(lr = 1e-4), metrics =['acc'])
return model
【5】训练模型
# 初始化模型
IMG_WIDTH = 150
IMG_HEIGHT = 150
IMG_CHANNELS = 3
model = Model(IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS)
model_1 = model.create_model()
model_1.summary()
from keras.preprocessing.image import ImageDataGenerator
#原来是255的像素做 0与1的处理
train_imgdatagen = ImageDataGenerator(rescale = 1./255)
valid_imgdatagen = ImageDataGenerator(rescale = 1./255)
train_generator_m1 = train_imgdatagen.flow_from_dataframe(
train_df,
directory="./dogs-vs-cats/train",
x_col='filename',
y_col='category',
target_size = (150, 150), # resize image to 150x150
batch_size = 64,
class_mode = 'binary'
)
validation_generator_m1 = valid_imgdatagen.flow_from_dataframe(
validate_df,
directory="./dogs-vs-cats/train",
x_col='filename',
y_col='category',
target_size = (150, 150), # resize image to 150x150
batch_size = 64,
class_mode = 'binary'
)
import numpy as np
# model 1 开始训练
history_1 = model_1.fit(
train_generator_m1,
epochs = 30,
steps_per_epoch = 100,
validation_data = validation_generator_m1,
validation_steps = 50
)
#保存模型
model_1.save('model_1.h5')
【6】打印训练结果
print(np.mean(history_1.history['acc']))
print(np.mean(history_1.history['val_acc']))
【7】形成图像结果
plt.plot(history_1.history['acc'], color = 'black')
plt.plot(history_1.history['val_acc'], color = 'blue')
plt.title('Training and validation accuracy of model 1')
plt.xlabel('Epochs')
plt.ylabel('Accuracy’)4
plt.show()
plt.plot(history_1.history['loss'], color = 'black')
plt.plot(history_1.history['val_loss'], color = 'blue')
plt.title('Training and validation loss of model 1')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.show()