Python机器学习3

记录一些常用的性能评价指标

预测模型 (均方误差(MSE)以及平均绝对错误(MAE) )

from math import sqrt
   #自己构建实际和预测数据
target = [1.5, 2.1, 3.3, -4.7, -2.3, 0.75]
prediction = [0.5, 1.5, 2.1, -2.2, 0.1, -0.5]

   #计算每一个差异
error = []
for i in range(len(target)):
    error.append(target[i] - prediction[i])

##RMSE,MSE
squaredError = []
absError = []
for val in error:
    squaredError.append(val*val)
    absError.append(abs(val))
    
print("RMSE = ", sqrt(sum(squaredError)/len(squaredError)))
print("MAE = ", sum(absError)/len(absError))

来个例子回归预测评估

import urllib.request
import numpy
from sklearn import  linear_model
from math import sqrt
import matplotlib.pyplot as plt
 
#从网页中读取数据
target_url = ("http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv")
data = urllib.request.urlopen(target_url)


xList = []
labels = []
names = []
firstLine = True
for line in data:
    if firstLine:
        names = str(line, encoding='utf-8').strip().split(";")   ##utf-8编码
        firstLine = False
    else:
        row = str(line, encoding='utf-8').strip().split(";")
        labels.append(float(row[-1]))          ##数据集最后一列是标签
        row.pop()                                       ##pop剔除标签
        floatRow = [float(num) for num in row]           ##无标签数据
        xList.append(floatRow)

##拆分训练集和测试集 编号能整除3的当成测试集
indices = range(len(xList))
xListTest = [xList[i] for i in indices if i%3 == 0 ]
xListTrain = [xList[i] for i in indices if i%3 != 0 ]
labelsTest = [labels[i] for i in indices if i%3 == 0]
labelsTrain = [labels[i] for i in indices if i%3 != 0]

##转成np array格式方便计算
xTrain = numpy.array(xListTrain); yTrain = numpy.array(labelsTrain)
xTest = numpy.array(xListTest); yTest = numpy.array(labelsTest)

##岭回归参数可选域
alphaList = [0.1**i for i in [0,1, 2, 3, 4, 5, 6]]

##不能alphaList看下RMSE的变化
rmsError = []
for alph in alphaList:
     wineRidgeModel = linear_model.Ridge(alpha=alph)
     wineRidgeModel.fit(xTrain, yTrain)
     rmsError.append(numpy.linalg.norm((yTest-wineRidgeModel.predict(
     xTest)), 2)/sqrt(len(yTest)))

print("RMS Error alpha")
for i in range(len(rmsError)):
   print(rmsError[i], alphaList[i])

画图看下MSE随参数alphaList的变化情况
x = range(len(rmsError))
plt.plot(x, rmsError, 'k')
plt.xlabel('-log(alpha)')
plt.ylabel('Error (RMS)')
plt.show()

##找出最好的参数alphaList(MSE最小所对应的）进行训练预测
indexBest = rmsError.index(min(rmsError))
alph = alphaList[indexBest]
wineRidgeModel = linear_model.Ridge(alpha=alph)
wineRidgeModel.fit(xTrain, yTrain)
errorVector = yTest-wineRidgeModel.predict(xTest)
plt.hist(errorVector)
plt.xlabel("Bin Boundaries")
plt.ylabel("Counts")
plt.show()

##把上面的MSE套过来用
error = []
for i in range(len(yTest)):
    error.append(yTest[i] - wineRidgeModel.predict(xTest)[i])

##RMSE,MSE
squaredError = []
absError = []
for val in error:
    squaredError.append(val*val)
    absError.append(abs(val))


print("RMSE = ", sqrt(sum(squaredError)/len(squaredError)))
print("MAE = ", sum(absError)/len(absError))

image.png

Python机器学习3

预测模型 (均方误差(MSE)以及平均绝对错误(MAE) )

来个例子回归预测评估

推荐阅读更多精彩内容