机器学习吴恩达-线性回归课后习题

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
path =  r'F:\机器学习入门\黄海广-机器学习\Coursera-ML-AndrewNg-Notes-master\code\ex1-linear regression\ex1data1.txt'#这个直接去GitHub搜
data = pd.read_csv(path, header=None, names=['Population', 'Profit'])
data.head()
#查看数据的分布情况
data.plot(kind='scatter', x='Population', y='Profit', figsize=(10,6))
plt.show()
#线性回归的梯度下降公式(不懂的百度,放图不方便)
def computeCost(X,y,theta):
    inner = np.power((X * theta.T - y),2)
    return np.sum(inner)/(2*len(X))
#这个部分计算J(Ѳ),X是矩阵
#数据前面已经读取完毕,我们要为加入一列x,用于更新theta,然后我们将theta初始化为0,学习率初始化为0.01,迭代次数为1500次
data.insert(0,"Ones",1)
#初始化X和y
cols = data.shape[1]
X = data.iloc[:,:-1]
y = data.iloc[:,cols-1:cols]#y是data最后一列
X.head()
y.head()
X = np.matrix(X.values)
y = np.matrix(y.values)
theta = np.matrix(np.array([0,0]))
X.shape, theta.shape, y.shape
((97, 2), (1, 2), (97, 1))
a = np.array([[1,2],[2,2]])
print(a.shape)
(2, 2)
temp = np.matrix(np.zeros(theta.shape))
parameters = int(theta.ravel().shape[1])
parameters
2
alpha = 0.01
iters = 1500
def gradientDescent(X, y, theta, alpha, iters):
    temp = np.matrix(np.zeros(theta.shape))#等于0
    parameters = int(theta.ravel().shape[1])#等于2
    cost = np.zeros(iters)#损失函数刚开始为0
    print(cost)
    
    for i in range(iters):#第一次迭代
        error = (X * theta.T) - y#第一次错误的值
        
        for j in range(parameters):#遍历两次
            term = np.multiply(error, X[:,j])##矩阵对应元素位置相乘
            temp[0,j] = theta[0,j] - ((alpha / len(X)) * np.sum(term))
            
        theta = temp
        cost[i] = computeCost(X, y, theta)#第一次计算出来的损失函数
        
    return theta, cost
g, cost = gradientDescent(X, y, theta, alpha, iters)
g
matrix([[-3.63029144,  1.16636235]])
cost
array([6.73719046, 5.93159357, 5.90115471, ..., 4.48343473, 4.48341145,
       4.48338826])
cost.shape
(1500,)
predict1 = [1,3.5]*g.T
print("predict1:",predict1)
predict2 = [1,7]*g.T
print("predict2:",predict2)
#预测35000和70000城市规模的小吃摊利润
predict1: [[0.45197679]]
predict2: [[4.53424501]]
x = np.linspace(data.Population.min(), data.Population.max(), 100)
print(x)
print(g[0, 0],g[0, 1])
f = g[0, 0] + (g[0, 1] * x)

fig, ax = plt.subplots(figsize=(12,8))
ax.plot(x, f, 'r', label='Prediction')
ax.scatter(data.Population, data.Profit, label='Traning Data')
ax.legend(loc=2)
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predicted Profit vs. Population Size')
plt.show()
#原始数据以及拟合的直线
[ 5.0269      5.20039596  5.37389192  5.54738788  5.72088384  5.8943798
  6.06787576  6.24137172  6.41486768  6.58836364  6.7618596   6.93535556
  7.10885152  7.28234747  7.45584343  7.62933939  7.80283535  7.97633131
  8.14982727  8.32332323  8.49681919  8.67031515  8.84381111  9.01730707
  9.19080303  9.36429899  9.53779495  9.71129091  9.88478687 10.05828283
 10.23177879 10.40527475 10.57877071 10.75226667 10.92576263 11.09925859
 11.27275455 11.44625051 11.61974646 11.79324242 11.96673838 12.14023434
 12.3137303  12.48722626 12.66072222 12.83421818 13.00771414 13.1812101
 13.35470606 13.52820202 13.70169798 13.87519394 14.0486899  14.22218586
 14.39568182 14.56917778 14.74267374 14.9161697  15.08966566 15.26316162
 15.43665758 15.61015354 15.78364949 15.95714545 16.13064141 16.30413737
 16.47763333 16.65112929 16.82462525 16.99812121 17.17161717 17.34511313
 17.51860909 17.69210505 17.86560101 18.03909697 18.21259293 18.38608889
 18.55958485 18.73308081 18.90657677 19.08007273 19.25356869 19.42706465
 19.60056061 19.77405657 19.94755253 20.12104848 20.29454444 20.4680404
 20.64153636 20.81503232 20.98852828 21.16202424 21.3355202  21.50901616
 21.68251212 21.85600808 22.02950404 22.203     ]
-3.6302914394043597 1.166362350335582
©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成,浏览时请结合常识与多方信息审慎甄别。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

相关阅读更多精彩内容

友情链接更多精彩内容