import numpy as np
import random
'''''
梯度下降算法
参数说明:X,Y
theta:一组向量和x相乘的一组值
alpha:梯度下降时的参数,即每一步下降多少
m:实例的个数
numIteration:迭代计算的次数,可以理解为梯度下降多少步
'''
def gradientDescent(X,Y,theta,alpha,m,numIteration):
x_trains = X.transpose() #X的转置矩阵
for i in range(0,numIteration):
hypothesis = np.dot(X,theta) #内积形式,X与theta的乘积 ,求出y的估计值
loss = hypothesis - Y #估计值与真实值之间的差
#通用的梯度下降算法,和logistic Regession中所描述的cost函数不一致
cos = np.sum(loss**2)/(2*m)
print "Iteration %d | Cost:%f" % (i,cos)
gradient = np.dot(x_trains,loss)/m
theta = theta - alpha*gradient
return theta
'''''
numPoints : 点的个数
bias :偏好ֵ
variance : 统计学概念, 偏差和
产生样本点和对应的标签
'''
def genData(numPoints,bias,variance):
X = np.zeros(shape=(numPoints,2)) #归类的数据
Y = np.zeros(shape=numPoints) #归类的标签
for i in range(0,numPoints): #从0~len(numPoints)-1执行如下
X[i][0] = 1
X[i][1] = i
#制造target数据
Y[i] = (i+bias) + random.uniform(0,1)*variance
return X,Y
X,Y = genData(100, 25, 10)
print "X:",X
print "Y:",Y
m, n = np.shape(X)
n_y = np.shape(Y)
print "x shape :", m, " ", n
print "y length :",n_y
numIterations =100000
alpha = 0.0005
theta = np.ones(n)
theta = gradientDescent(X, Y, theta, alpha, m, numIterations)
print "theta: " ,theta