Sklearn的高级使用

正规化 Normalization(把x、y缩到同一个区间,类似椭圆变圆)

# 验证scale的作用
from sklearn import preprocessing
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets._samples_generator import make_classification
from sklearn.svm import SVC
import matplotlib.pyplot as plt
X, Y = make_classification(n_samples=300, n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1, scale=100)
# plt.scatter(X[:, 0], X[:, 1], c=Y)
# plt.show()
X = preprocessing.scale(X)  # 验证scale的作用
# plt.scatter(X[:, 0], X[:, 1], c=Y)
# plt.show()
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)  # train_data, test_data, train_label, test_label
clf = SVC()
clf.fit(X_train, Y_train)
print(clf.score(X_test, Y_test))
print()

检验神经网络 (Evaluation)

交叉验证 1 Cross-validation(对比一下和之前的sklearn莺尾花有什么不同)

from __future__ import print_function
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

iris = load_iris()
X = iris.data
y = iris.target

# test train split #
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=4)
knn = KNeighborsClassifier(n_neighbors=5)  # 看附近五个,然后综合考虑
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
print(knn.score(X_test, y_test))

# this is cross_val_score #
from sklearn.model_selection import cross_val_score
knn = KNeighborsClassifier(n_neighbors=5)
scores = cross_val_score(knn, X, y, cv=5, scoring='accuracy')  #  k折交叉,k选为5,依照accuracy打分
print(scores)

# this is how to use cross_val_score to choose model and configs #
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
k_range = range(1, 31)
k_scores = []
for k in k_range:  # 测试选择不同个数的neighbor有什么区别,测试结果是太高太低都不好
    knn = KNeighborsClassifier(n_neighbors=k)
    # loss = -cross_val_score(knn, X, y, cv=10, scoring='neg_mean_squared_error') # for regression用于回归问题
    scores = cross_val_score(knn, X, y, cv=10, scoring='accuracy') # for classification
    k_scores.append(scores.mean())

plt.plot(k_range, k_scores)
plt.xlabel('Value of K for KNN')
plt.ylabel('Cross-Validated Accuracy')
plt.show()

交叉验证 2 Cross-validation(监视过拟合)

from sklearn.model_selection import learning_curve
from sklearn.datasets import load_digits
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np

digits = load_digits()
X = digits.data
Y = digits.target
train_sizes, train_loss, test_loss = learning_curve(
    SVC(gamma=0.001), X, Y, cv=10, scoring='neg_mean_squared_error',
    train_sizes=[0.1, 0.25, 0.5, 0.75, 1]  # 记录五个点
)
train_loss_mean = -np.mean(train_loss, axis=1)  # 第一维度是五个点,-1才是loss
test_loss_mean = -np.mean(test_loss, axis=1)

plt.plot(train_sizes, train_loss_mean, 'o-', color='r', label='Training')
plt.plot(train_sizes, test_loss_mean, 'o-', color='g', label='Cross-validation')

plt.xlabel('Training examples')
plt.ylabel('Loss')
plt.legend(loc='best')
plt.show()

交叉验证 3 Cross-validation validation_curve 检视过拟合

from sklearn.model_selection import validation_curve
from sklearn.datasets import load_digits
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np

digits = load_digits()
X = digits.data
Y = digits.target
param_range = np.logspace(-6, -2.3, 5)  # 在这个区间,取五个值
train_loss, test_loss = validation_curve(
    SVC(), X, Y, param_name='gamma', param_range=param_range,  cv=10, scoring='neg_mean_squared_error',)
train_loss_mean = -np.mean(train_loss, axis=1)  # 第一维度是五个点,-1才是loss
test_loss_mean = -np.mean(test_loss, axis=1)

plt.plot(param_range, train_loss_mean, 'o-', color='r', label='Training')
plt.plot(param_range, test_loss_mean, 'o-', color='g', label='Cross-validation')

plt.xlabel('gamma')
plt.ylabel('Loss')
plt.legend(loc='best')
plt.show()
©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成,浏览时请结合常识与多方信息审慎甄别。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

相关阅读更多精彩内容

友情链接更多精彩内容