不是每个样本点都有松弛因子,只有离群的样本才有惩罚因子
首先随机生成一些数据
np.random.seed(0)
X = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20, 2) + [2, 2]]
Y = [0] * 20 + [1] * 20
选用线性SVM分类器并进行数据
clf = svm.SVC(kernel='linear')
clf.fit(X, Y)
# 查看拟合模型的w
clf.coef_
# array([[ 0.90230696, 0.64821811]])
#查看支撑向量
clf.support_vectors_
# array([[-1.02126202, 0.2408932 ],
# [-0.46722079, -0.53064123]
# [ 0.95144703, 0.57998206]])
用支持点和w绘出分类超平面
研究参数
松弛指数的影响
clf = svm.SVC(kernel='linear',C=10000)
clf = svm.SVC(kernel='linear',C=1)
clf = svm.SVC(kernel='linear',C=0.01)
完整代码
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
# we create 40 separable points
np.random.seed(0)
X = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20, 2) + [2, 2]]
Y = [0] * 20 + [1] * 20
# fit the model
clf = svm.SVC(kernel='linear')
clf.fit(X, Y)
# get the separating hyperplane
w = clf.coef_[0]
a = -w[0] / w[1]
xx = np.linspace(-5, 5)
yy = a * xx - (clf.intercept_[0]) / w[1]
# plot the parallels to the separating hyperplane that pass through the
# support vectors
b = clf.support_vectors_[0]
yy_down = a * xx + (b[1] - a * b[0])
b = clf.support_vectors_[-1]
yy_up = a * xx + (b[1] - a * b[0])
# plot the line, the points, and the nearest vectors to the plane
plt.plot(xx, yy, 'k-')
plt.plot(xx, yy_down, 'k--')
plt.plot(xx, yy_up, 'k--')
plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1],
s=80, facecolors='none')
plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)
plt.axis('tight')
plt.show()