#导入数据分析的三大件
import numpyas np
import pandasas pd
import matplotlib.pyplotas plt
data=pd.read_csv("mark.txt",header=None,names=["a","b","c"])
def sigmoid(z): return 1 / (1 + np.exp(-z))
# 预测函数模型
def model(X, theta):return sigmoid(np.dot(X,theta.T))
pdData.insert(0, 'Ones', 1)
# 表格转换为矩阵
orig_data = pdData.as_matrix()
# 样本维度
cols = orig_data.shape[1]
X = orig_data[:,0:cols-1]
Y = orig_data[:,cols-1:cols]
# 给参数占位
theta = np.zeros([1, 3])
def cost(X, Y, theta):
left = np.multiply(-Y, np.log(model(X, theta)))
right = np.multiply(1 - Y, np.log(1 - model(X, theta)))
return np.sum(left - right) / (len(X))
https://blog.csdn.net/hx2017/article/details/77950086
import numpy as np import pandas as pd import matplotlib.pyplot as plt data =pd.read_csv("mark.txt",header=None,names=["Exam 1",\ "Exam 2","Admitted"]) aa=data.head() positive = data[data['Admitted'] == 1] negative = data[data['Admitted'] == 0] def sigmoid(z): return 1/(1+np.exp(-z)) def model(X,theta): return sigmoid(np.dot(X,theta.T)) data.insert(0,"ones",1) orig_data=data.as_matrix() clos = orig_data.shape[1] X=orig_data[:,0:clos-1] y=orig_data[:,clos-1:clos] theta=np.zeros([1,3]) def cost(X,y,theta): left=np.multiply(-y,np.log(model(X,theta))) right=np.multiply(1-y,np.log(1-model(X,theta))) return np.sum(left-right)/len(X) def gradient(X,y,theta): grad=np.zeros(theta.shape) error=(model(X,theta)-y).ravel() for j in range(len(theta.ravel())): term=np.multiply(error,X[:,j]) grad[0,j]=np.sum(term)/len(X) return grad # Gradient descent # 比较3中不同梯度下降方法 STOP_ITER = 0 # STOP_COST = 1 # 根据损失函数 STOP_GRAD = 2 # 根据梯度 def stopCriterion(type, value, threshold): # 设定三种不同的停止策略 if type == STOP_ITER: return value > threshold # 最近两次损失值差别不大 elif type == STOP_COST: return abs(value[-1] - value[-2]) < threshold elif type == STOP_GRAD: return np.linalg.norm(value) < threshold import numpy.random # 数据要进行洗牌 def shuffleData(data): np.random.shuffle(data) cols = data.shape[1] X = data[:, 0:cols - 1] y = data[:, cols - 1:] return X, y import time def descent(data, theta, batchSize, stopType, thresh, alpha): # 梯度下降求解 init_time = time.time() # 迭代次数 i = 0 # batch k = 0 X, y = shuffleData(data) # 计算梯度 grad = np.zeros(theta.shape) # 损失值 costs = [cost(X, y, theta)] while True: grad = gradient(X[k: k + batchSize], y[k: k + batchSize], theta) # 取batch数量个数据 k += batchSize # 大于总数据 if k >= n: k = 0 # 重新洗牌 X, y = shuffleData(data) # 参数更新 theta = theta - alpha * grad # 计算新的损失 costs.append(cost(X, y, theta)) i += 1 if stopType == STOP_ITER: value = i elif stopType == STOP_COST: value = costs elif stopType == STOP_GRAD: value = grad if stopCriterion(stopType, value, thresh): break return theta, i - 1, costs, grad, time.time() - init_time def runExpe(data, theta, batchSize, stopType, thresh, alpha): # 总数据 n = 100 # import pdb; pdb.set_trace(); theta, iter, costs, grad, dur = descent(data, theta, batchSize, stopType, thresh, alpha) name = "Original" if (data[:, 1] > 2).sum() > 1 else "Scaled" name += " data - learning rate: {} - ".format(alpha) if batchSize == n: strDescType = "Gradient" elif batchSize == 1: strDescType = "Stochastic" else: strDescType = "Mini-batch ({})".format(batchSize) name += strDescType + " descent - Stop: " if stopType == STOP_ITER: strStop = "{} iterations".format(thresh) elif stopType == STOP_COST: strStop = "costs change < {}".format(thresh) else: strStop = "gradient norm < {}".format(thresh) name += strStop print("***{}\nTheta: {} - Iter: {} - Last cost: {:03.2f} - Duration: {:03.2f}s".format( name, theta, iter, costs[-1], dur)) fig, ax = plt.subplots(figsize=(12, 4)) ax.plot(np.arange(len(costs)), costs, 'r') ax.set_xlabel('Iterations') ax.set_ylabel('Cost') ax.set_title(name.upper() + ' - Error vs. Iteration') plt.show() return theta # 不同的停止策略 # 设定迭代次数 # 选择的梯度下降方法是基于所有样本的 n = 100 # 数据总共100行 runExpe(orig_data, theta, n, STOP_ITER, thresh=5000, alpha=0.000001) # 有点爆炸。。。很不稳定,再来试试把学习率调小一些 # 速度快,但稳定性差,需要很小的学习率 # 根据损失值停止 按照损失函数的精度 # 设定阈值 1E-6, 差不多需要110 000次迭代 runExpe(orig_data, theta, n, STOP_COST, thresh=0.000001, alpha=0.001) # Mini-batch descent # 批量梯度下降 runExpe(orig_data, theta, 64, STOP_ITER, thresh=15000, alpha=0.001)