#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Oct 29 10:39:29 2019
@author: liyili2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.model_selection import ShuffleSplit
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score
boston=pd.read_csv("/Users/liyili2/Downloads/datas/kaggle/housing.csv")
price=boston['MEDV']
features=boston.drop('MEDV',axis=1)
print ("波士顿房价数据有{} 行 points with {} variables each.".format(*boston.shape))
'''基础统计运算'''
'''因变量基本统计量查看'''
print("房价最小值是:",price.min())
print("房价最大值是:",price.max())
print("房价均值是:",price.mean())
print("房价中位数是:",price.median())
print("房价标准差是:",price.std())
'''建模'''
def performance_metric(y, y_predict):
score = r2_score(y, y_predict)
return score
score = performance_metric([3, -0.5, 2, 7, 4.2], [2.5, 0.0, 2.1, 7.8, 5.3])
print ("Model has a coefficient of determination, R^2, of {:.3f}.".format(score))
X_train, X_test, y_train, y_test =train_test_split(features, price,test_size = 0.2, random_state = 17)#随机种子值
print ("Training and testing split was successful.")
def fit_model(X, y):
cv_sets = ShuffleSplit(X.shape[0], random_state = 0)
regressor = DecisionTreeRegressor()
params ={'max_depth': [1,2,3,4,5,6,7,8,9,10]}
scoring_fnc = make_scorer(performance_metric)
grid = GridSearchCV(regressor, params, scoring = scoring_fnc, cv = cv_sets)
grid = grid.fit(X, y)
return grid.best_estimator_
reg = fit_model(X_train, y_train)
print ("Parameter 'max_depth' is {} for the optimal model.".format(reg.get_params()['max_depth']))
client_data = [[5, 17, 15], # Client 1
[4, 32, 22], # Client 2
[8, 3, 12]] # Client 3
'''预测'''
for i, price in enumerate(reg.predict(client_data)):
print ("Predicted selling price for Client {}'s home: ${:,.2f}".format(i+1, price))
结果:
波士顿房价数据有489 行 points with 4 variables each.
房价最小值是: 105000.0
房价最大值是: 1024800.0
房价均值是: 454342.9447852761
房价中位数是: 438900.0
房价标准差是: 165340.27765266786
Model has a coefficient of determination, R^2, of 0.923.
Training and testing split was successful.
波士顿房价预测
最后编辑于 :
©著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
推荐阅读更多精彩内容
- xgboost中文叫做极致梯度提升模型,官方文档链接:https://xgboost.readthedocs.io...
- xgboost中文叫做极致梯度提升模型,官方文档链接:https://xgboost.readthedocs.io...
- LinearRegression中文叫做线性回归,是一种基础、常用的回归方法。 2018年8月22日笔记sklea...
- 模型思想 多元线性回归(multiple linear regression) 模型的目的是构建一个回归方程,利用...