波士顿房价预测-房间数这一个属性来预测房价
- 加载数据集
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
boston_housing=tf.keras.datasets.boston_housing
(train_x,train_y),(test_x,test_y)=boston_housing.load_data()
train_x.shape,train_y.shape
((404, 13), (404,))
test_x.shape,test_y.shape
((102, 13), (102,))
- 数据处理
x_train=train_x[:,5]
y_train=train_y
x_train.shape,y_train.shape
((404,), (404,))
x_test=test_x[:,5]
y_test=test_y
x_test.shape,y_test.shape
((102,), (102,))
- 设置超参数
learn_rate=0.04
iter=2000
display_step=200
- 设置模型参数初始值
np.random.seed(612)
w=tf.Variable(np.random.randn())
b=tf.Variable(np.random.randn())
w.numpy().dtype,b.numpy().dtype
(dtype('float32'), dtype('float32'))
- 训练模型
mse_train=[]
mse_test=[]
for i in range(0,iter+1):
with tf.GradientTape() as tape:
pred_train=w*x_train+b
loss_train=0.5*tf.reduce_mean(tf.square(y_train-pred_train))
pred_test=w*x_test+b
loss_test=0.5*tf.reduce_mean(tf.square(y_test-pred_test))
mse_train.append(loss_train)
mse_test.append(loss_test)
dL_dw,dL_db=tape.gradient(loss_train,[w,b])
w.assign_sub(learn_rate*dL_dw)
b.assign_sub(learn_rate*dL_db)
if i%display_step==0:
print("i:%i, Train Loss:%f, Test Loss: %f" % (i,loss_train,loss_test))
i:0, Train Loss:23.585236, Test Loss: 20.503534
i:200, Train Loss:23.419767, Test Loss: 20.243628
i:400, Train Loss:23.283920, Test Loss: 20.020529
i:600, Train Loss:23.172400, Test Loss: 19.828562
i:800, Train Loss:23.080847, Test Loss: 19.662981
i:1000, Train Loss:23.005686, Test Loss: 19.519815
i:1200, Train Loss:22.943977, Test Loss: 19.395727
i:1400, Train Loss:22.893328, Test Loss: 19.287909
i:1600, Train Loss:22.851740, Test Loss: 19.194019
i:1800, Train Loss:22.817598, Test Loss: 19.112064
i:2000, Train Loss:22.789570, Test Loss: 19.040365
- 可视化输出
plt.figure(figsize=(15,10))
plt.subplot(221)
plt.scatter(x_train,y_train,color="blue",label="data")
plt.plot(x_train,pred_train,color="red",label="model")
plt.legend(loc="upper left")
plt.subplot(222)
plt.plot(mse_train,color="blue",linewidth=3,label="train loss")
plt.plot(mse_test,color="red",linewidth=1.5,label="test loss")
plt.legend(loc="upper right")
plt.subplot(223)
plt.plot(y_train,color="blue",marker="o",label="true_price")
plt.plot(pred_train,color="red",marker=".",label="predict")
plt.legend
plt.subplot(224)
plt.plot(y_test,color="blue",marker="o",label="true_price")
plt.plot(pred_test,color="red",marker=".",label="predict")
plt.legend
plt.show()
波士顿房价多元线性回归--多个属性一起预测
- 二维数组归一化--循环实现
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
boston_housing=tf.keras.datasets.boston_housing
(train_x,train_y),(test_x,test_y)=boston_housing.load_data()
train_x.shape,train_y.shape
((404, 13), (404,))
test_x.shape,test_y.shape
((102, 13), (102,))
num_train=len(train_x)
num_test=len(test_x)
# 数据归一化
x_train=(train_x-train_x.min(axis=0))/(train_x.max(axis=0)-train_x.min(axis=0))
y_train=train_y
x_test=(test_x-test_x.min(axis=0))/(test_x.max(axis=0)-test_x.min(axis=0))
y_test=test_y
x0_train=np.ones(num_train).reshape(-1,1)
x0_test=np.ones(num_test).reshape(-1,1)
X_train=tf.cast(tf.concat([x0_train,x_train],axis=1),tf.float32)
X_test=tf.cast(tf.concat([x0_test,x_test],axis=1),tf.float32)
X_train.shape,X_test.shape
(TensorShape([404, 14]), TensorShape([102, 14]))
Y_train=tf.constant(y_train.reshape(-1,1),tf.float32)
Y_test=tf.constant(y_test.reshape(-1,1),tf.float32)
Y_train.shape,Y_test.shape
(TensorShape([404, 1]), TensorShape([102, 1]))
- 设置超参数
learn_rate=0.01
iter=2500
display_step=100
- 设置模型变量初值
np.random.seed(612)
W=tf.Variable(np.random.randn(14,1),dtype=tf.float32)
- 训练模型
mse_train=[]
mse_test=[]
for i in range(0,iter+1):
with tf.GradientTape() as tape:
PRED_train=tf.matmul(X_train,W)
Loss_train=0.5*tf.reduce_mean(tf.square(Y_train-PRED_train))
PRED_test=tf.matmul(X_test,W)
Loss_test=0.5*tf.reduce_mean(tf.square(Y_test-PRED_test))
mse_train.append(Loss_train)
mse_test.append(Loss_test)
dL_dW=tape.gradient(Loss_train,W)
W.assign_sub(learn_rate*dL_dW)
if i%display_step==0:
print("i:%i, Train Loss:%f, Test Loss: %f" % (i,Loss_train,Loss_test))
i:0, Train Loss:263.193451, Test Loss: 276.994110
i:100, Train Loss:44.476345, Test Loss: 47.471565
i:200, Train Loss:36.176552, Test Loss: 37.562954
i:300, Train Loss:31.584023, Test Loss: 32.202713
i:400, Train Loss:28.789461, Test Loss: 28.952513
i:500, Train Loss:26.911528, Test Loss: 26.827421
i:600, Train Loss:25.520697, Test Loss: 25.333916
i:700, Train Loss:24.405626, Test Loss: 24.216911
i:800, Train Loss:23.460522, Test Loss: 23.340532
i:900, Train Loss:22.630890, Test Loss: 22.629452
i:1000, Train Loss:21.887278, Test Loss: 22.039747
i:1100, Train Loss:21.212658, Test Loss: 21.544201
i:1200, Train Loss:20.596283, Test Loss: 21.124847
i:1300, Train Loss:20.030684, Test Loss: 20.769012
i:1400, Train Loss:19.510204, Test Loss: 20.467239
i:1500, Train Loss:19.030268, Test Loss: 20.212141
i:1600, Train Loss:18.587009, Test Loss: 19.997717
i:1700, Train Loss:18.177067, Test Loss: 19.818949
i:1800, Train Loss:17.797461, Test Loss: 19.671591
i:1900, Train Loss:17.445545, Test Loss: 19.551966
i:2000, Train Loss:17.118927, Test Loss: 19.456863
i:2100, Train Loss:16.815464, Test Loss: 19.383459
i:2200, Train Loss:16.533216, Test Loss: 19.329273
i:2300, Train Loss:16.270426, Test Loss: 19.292068
i:2400, Train Loss:16.025497, Test Loss: 19.269897
i:2500, Train Loss:15.797002, Test Loss: 19.260986