单项式模型
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
%matplotlib inline
#导入训练集和测试集
train_data = pd.read_csv("datas/T-R-train.csv")
test_data = pd.read_csv("datas/T-R-test.csv")
image.png
# 定义x和y
x_train = train_data["T"]
y_train = train_data["rate"]
x_test = test_data["T"]
y_test = test_data["rate"]
image.png
#将数据转换成一维
x_train = np.array(x_train).reshape(-1, 1)
x_test = np.array(x_test).reshape(-1, 1)
# 模型训练
line_model = LinearRegression()
line_model.fit(x_train, y_train)
#预测
y_train_predict = line_model.predict(x_train)
y_test_predict = line_model.predict(x_test)
# 查看模型的r2分数
from sklearn.metrics import r2_score
r2_train = r2_score(y_train, y_train_predict)
r2_test = r2_score(y_test, y_test_predict)
print(r2_train, r2_test) 0.016665703886981964 -0.758336343735132
#生成新的数据点
x_range = np.linspace(40, 90, 300).reshape(-1, 1)
new_y_predict = line_model.predict(x_range)
image.png
多项式模型——二阶
# 多项式模型 将原数据转换成二维
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures()
x_2_train = poly.fit_transform(x_train)
x_2_test = poly.transform(x_test)
# 模型训练
line_model2 = LinearRegression()
line_model2.fit(x_2_train, y_train)
#预测
y2_train_predict = line_model2.predict(x_2_train)
y2_test_predict = line_model2.predict(x_2_test)
#r2分数
r2_train2 = r2_score(y_train, y2_train_predict)
r2_test2 = r2_score(y_test, y2_test_predict)
print(r2_train2, r2_test2) 0.970051540068942 0.9963954556468684
#生成新的数据点
x2_range = np.linspace(40, 90, 300).reshape(-1, 1)
x2_range = poly.transform(x2_range)
new_y2_predict = line_model2.predict(x2_range)
image.png
多项式模型——五阶
#五阶模型
poly5 = PolynomialFeatures(degree=5)
x_5_train = poly5.fit_transform(x_train)
x_5_test = poly5.transform(x_test)
# 模型训练
line_model5 = LinearRegression()
line_model5.fit(x_5_train, y_train)
#预测
y5_train_predict = line_model5.predict(x_5_train)
y5_test_predict = line_model5.predict(x_5_test)
#r2分数
r2_train5 = r2_score(y_train, y5_train_predict)
r2_test5 = r2_score(y_test, y5_test_predict)
print(r2_train5, r2_test5)
#生成新的数据点
x5_range = np.linspace(40, 90, 300).reshape(-1, 1)
x5_range = poly5.transform(x5_range)
new_y5_predict = line_model5.predict(x5_range) 0.9978527267187658 0.5437837627379174
image.png