tensorflow--循环神经网络RNN

目标：用RNN实现连续数据的预测（以股票预测为例）

有些数据是跟时间相关的，是可以根据上文预测出下文的。

1、循环核：参数时间共享，循环层提取时间信息。

循环核具有记忆力，通过不同时刻的参数共享，实现了对时间序列的信息提取

循环核2.png

可以通过设定记忆体的个数，改变记忆容量。当记忆体个数被指定，输入xt、输出yt维度被指定，周围这些待训练参数的维度也就被限定了。

ht：记忆体内当前时刻存储的状态信息
xt：当前时刻输入特征
ht-1：记忆体上一时刻存储的状态信息
yt：当前时刻循环核的输出特征

1.1、循环核按时间步展开

循环核time.png

按时间步展开，就是把循环核按照时间轴方向展开。每个时刻记忆体状态信息ht被刷新，记忆体周围的参数矩阵wxh、whh和why是固定不变的。要训练优化的就是这些参数矩阵。训练完成后，使用效果最好的参数矩阵，执行前向传播，输出预测结果。循环神经网络，就是借助循环核提取时间特征后，送入全连接网络，实现连续数据的预测。

1.2、循环计算层：向输出方向生长

每个循环核构成一层循环计算层。循环计算层的层数时是向输出方向增长的。

1.3、TF描述循环计算层

tf.keras.layers.SimpleRNN(
    '记忆体个数',
    activation='tanh',  # 激活函数
    return_sequences=False  # 默认False，是否每个时刻输出ht到下一层
)

return_seq_false.png

return_seq_true.png

入RNN时，x_train维度：
[送入样本数，循环核时间展开数，每个时间步输入特征个数]

1.4、字母（One hot编码）预测

import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Dense, SimpleRNN
import matplotlib.pyplot as plt
import os


input_words = "abcde"
w_to_d = {"a": 0, "b": 1, "c": 2, "d": 3, "e": 4}
id_to_onehot = {0: [1., 0., 0., 0., 0.], 1: [0., 1., 0., 0., 0.], 2: [0., 0., 1., 0., 0.],
                3: [0., 0., 0., 1., 0.], 4: [0., 0., 0., 0., 1.]}

x_train = [id_to_onehot[w_to_d["a"]], id_to_onehot[w_to_d["b"]], id_to_onehot[w_to_d["c"]],
           id_to_onehot[w_to_d["d"]], id_to_onehot[w_to_d["e"]]]
y_train = [w_to_d["b"], w_to_d["c"], w_to_d["d"], w_to_d["e"], w_to_d["a"]]

np.random.seed(8)
np.random.shuffle(x_train)
np.random.seed(8)
np.random.shuffle(y_train)
tf.random.set_seed(8)

# 使x_train符合SimpleRNN的输入要求： [送入样本数， 循环核时间展开步数， 每个时间步输入特征个数
x_train = np.reshape(x_train, (len(x_train), 1, 5))
y_train = np.array(y_train)

model = tf.keras.Sequential([
    SimpleRNN(3),  # 3个记忆体(越多记忆力越好，但会更占用资源)
    Dense(5, activation="softmax")
])

model.compile(optimizer=tf.keras.optimizers.Adam(0.01),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=["sparse_categorical_accuracy"])

check_point_save_path = "./checkpoint_rnn/rnn_onehot_1pre1.ckpt"

if os.path.exists(check_point_save_path + ".index"):
    print("******load model******")
    model.load_wights(check_point_save_path)

cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=check_point_save_path,
    save_weights_only=True,
    save_best_only=True,
    monitor="loss"
)

history = model.fit(x_train, y_train, batch_size=32, epochs=100, callbacks=[cp_callback])

model.summary()

with open("./rnn_weight.txt", "w") as f:
    for v in model.trainable_variables:
        f.write(str(v.name) + "\n")
        f.write(str(v.shape) + "\n")
        f.write(str(v.numpy()) + "\n")


acc = history.history["sparse_categorical_accuracy"]
loss = history.history["loss"]

plt.subplot(1, 2, 1)
plt.plot(acc, label="Training acc")
plt.title("training loss")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(loss, label="training loss")
plt.title("training loss")
plt.legend()
plt.show()

preNum = int(input("input the number of test alphabet: "))
for i in range(preNum):
    alpha = input("input test alphabet")
    alphabet = [id_to_onehot[w_to_d[alpha]]]
    alphabet = np.reshape(alphabet, (1, 1, 5))
    result = model.predict([alphabet])
    pred = tf.argmax(result, axis=1)
    pred = int(pred)
    tf.print(alphabet + "->" + input_words[pred])

1.5、 Embedding--一种编码的方法

由于独热码（one-hot）的位宽要与词汇量一致，如果词汇量增大时，非常浪费资源。因此，自然语言处理中，有专门一个方向在研究单词的编码。Embedding是一种单词编码的方法，用低维向量实现了编码。这种编码可以通过神经网络训练优化，能表达出单词间的相关性。

tf.keras.layers.Embedding(
    词汇量大小， # 编码一共要表示多少个单词
    编码维度  # 用几个数字表达一个单词
)

入Embedding时，x_train维度：
[送入样本数，循环核时间展开步数]

1.6 、字母（Embedding编码）预测

model2 = tf.keras.Sequential([
    Embedding(5, 2),
    SimpleRNN(3),
    Dense(5, activation="softmax")
])

# 其余代码同上

1.7、用RNN实现股票预测

import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.layers import Dense, Dropout, SimpleRNN
import os
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math


maotai = pd.read_csv("./SH600519.csv")  # 读取股票文件

training_set = maotai.iloc[0:2126, 2:3].values  # 前2126天的开盘价作为训练集，取C列开盘价
test_set = maotai.iloc[2126:, 2:3]  # 后300天的开盘价作为测试集


# 归一化
sc = MinMaxScaler(feature_range=(0, 1))  # 进行归一化，归一化到（0，1）之间
training_set_scaled = sc.fit_transform(training_set)  # 训练集上进行归一化
test_set = sc.transform(test_set)  # 利用训练集的属性对测试集进行归一化

x_train = []
y_train = []

x_test = []
y_test = []


# 提取连续60天的开盘价作为输入特征x_train,第61天的数据作为标签
for i in range(60, len(training_set_scaled)):
    x_train.append(training_set_scaled[i-60: i, 0])
    y_train.append(training_set_scaled[i, 0])


np.random.seed(8)
np.random.shuffle(x_train)
np.random.seed(8)
np.random.shuffle(y_train)
tf.random.set_seed(8)

x_train, y_train = np.array(x_train), np.array(y_train)

# 使x_train符合RNN输入要求： [送入样本数， 循环核时间展开步数， 每个时间同步输入特征个数]
# 此处整个数据集送入，送入样本数为x_train.shape[0]组数据：输入60个开盘价，预测第61天的开盘价，
# 循环核展开步数为60；每个时间步进入的特征是第一天的开盘价，只有一个数据，故每个时间步
# 输入特征个数为1
x_train = np.reshape(x_train, (x_train.shape[0], 60, 1))

# 同上处理测试集
for i in range(60, len(test_set)):
    x_test.append(test_set[i-60:i, 0])
    y_test.append(test_set[i, 0])

x_test, y_test = np.array(x_test), np.array(y_test)
x_test = np.reshape(x_test, (x_test.shape[0], 60, 1))


model = tf.keras.Sequential([
    SimpleRNN(80, return_sequences=True),
    Dropout(0.2),
    SimpleRNN(100),
    Dropout(0.2),
    Dense(1)
])


model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
              loss="mean_squared_error")

check_point_save_path = "./checkpoint_stock/rnn_stock.ckpt"

if os.path.exists(check_point_save_path + ".index"):
    print("******load model******")
    model.load_weights(check_point_save_path)

cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=check_point_save_path,
    save_weights_only=True,
    save_best_only=True,
    monitor="val_loss"
)

history = model.fit(x_train, y_train, batch_size=64, epochs=50, 
    validation_data=(x_test, y_test), validation_freq=1, callbacks=[cp_callback])

model.summary()

with open("./rnn__stock_weight.txt", "w") as f:
    for v in model.trainable_variables:
        f.write(str(v.name) + "\n")
        f.write(str(v.shape) + "\n")
        f.write(str(v.numpy()) + "\n")


loss = history.history["loss"]
val_loss = history.history["val_loss"]

plt.plot(loss, label="Training Loss")
plt.plot(val_loss, label="Validation Loss")
plt.title("Training and Validation Loss")
plt.legend()
plt.show()

# 测试集输入模型进行预测
predictd_stock_price = model.predict(x_test)

# 对预测数据进行还原，反归一化
predictd_stock_price = sc.inverse_transform(predictd_stock_price)

# 对真实数据进行还原，反归一化
real_stock_price = sc.inverse_transform(test_set[60:])

# 画出真实数据和预测数据的对比曲线
plt.plot(real_stock_price, color="red", label="MaoTai Stock Price")
plt.plot(predictd_stock_price, color="blue", label="Predicted MaoTai Stock Price")
plt.title("MaoTai Stock Price Rrediction")
plt.xlabel("Time")
plt.ylabel("MaoTai Stock Price")
plt.legend()
plt.show()


#  评价模型：均方误差，均方根误差，平均绝对误差
mse = mean_squared_error(predictd_stock_price, real_stock_price)
rmse = math.sqrt(mse)
mae = mean_absolute_error(predictd_stock_price, real_stock_price)
print("均方误差： %.6f" % mse)
print("均方根误差： %.6f" % rmse)
print("平均绝对误差： %.6f" % mae)

部分结果如下：

均方误差： 2500.951841
均方根误差： 50.009518
平均绝对误差： 45.223081