1. 多输出问题
多个y值,y也为VSM结构
from sklearn.tree import DecisionTreeRegressor
X(VSM) y
X Y(VSM)
X(x1, x2, x3.. xn) Y(y1,y2,...yn) # 目标列为多列,可以用此建模。 # 拓展:人脸自动补全问题。拟合能力约大,越真。
2. 决策树回归
2.1 波士顿房价,获取X,Y
from sklearn.datasets import load_boston
boston = load_boston()data = boston.data
target = boston.target
2.2拆分数据集,建模
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=1) # 拆分数据集
dt_rgr = DecisionTreeRegressor() # 构建算法模型
dt_rgr.fit(X_train, y_train) # 训练数据
算均方误差
from sklearn.metrics import mean_squared_error
mean_squared_error(y_test, dt_rgr.predict(X_test)) #18.550588235294118
2.2多输出问题
2.2.1 构建X,y
标签列为圆上的点的横纵坐标, X为半径的角度
X = np.sort(np.random.random(size=200)*200 - 100).reshape(-1,1) # 从[-100 100]的弧度
X # X 为弧度
噪声数据
bias = np.random.random(size=200)*0.5 - 0.25
bias = bias.reshape(-1,1)
构建Y
y = np.sin(X)
x = np.cos(X)
Y = np.concatenate((x, y), axis=1)
Y.shape # (200, 2)
弧度转角度
2.2.2 绘图
import numpy as np
import pandas as pd
plt.scatter(Y[:,0], Y[:,1], color='green')
plt.axis('equal')
plt.show()
2.2.3建模
决策树建模
dt = DecisionTreeRegressor()
dt.fit(X, Y)
X_test = np.linspace(-100,100,100).reshape(-1,1)
Y_ = dt.predict(X_test)
Y_.shape # (100, 2)
绘图:
plt.scatter(Y[:,0], Y[:,1], color='blue')
plt.axis('equal')
plt.show()
2.2.3 查看不同深度的决策树,回归效果
depth_list = [2, 7, 18, None]
plt.figure(figsize=(16,4))
for index, depth in enumerate(depth_list):
dt = DecisionTreeRegressor(max_depth=depth)
dt.fit(X, Y)
X_test = np.linspace(-100,100,100).reshape(-1,1)
Y_ = dt.predict(X_test)
axes = plt.subplot(1,len(depth_list),index+1)
plt.scatter(Y_[:,0], Y_[:,1], color='blue')
plt.axis('equal')
plt.title("max_depth={}".format(depth))
plt.show()