一、支持向量机SVM(support vector machine)
- SVC分类,SVR回归--统称SVM
- 支持向量机(Support Vector Machine, SVM)是一类按[监督学习](supervised learning)方式对数据进行二元分类的广义线性分类器(generalized linear classifier),其决策边界是对学习样本求解的最大边距超平面
- 主要针对小样本数据,非线性及高维模式识别中具有特定的优势。
-
泛化能力(防止过度拟合)
svm.png
(一、) 原理:
image.png
image.png
- image.png
原理还没写完---还不知道怎么写
(二、)svc(分类)使用
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
X,y = datasets.load_iris(True)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 1024)
# 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'
svc = SVC(kernel='linear',probability=True)
# kernel就是核函数 linear 线性 poly多项式 rbf高斯核 precomputed预处理
svc.fit(X_train,y_train)
print(svc.score(X_test,y_test))
# 获得了3个方程!!!
print(svc.coef_,svc.intercept_)
print(svc.predict_proba(X_test))
0.9666666666666667
[[ 0.09714833 0.48876798 -0.87735232 -0.27322406]
[ 0.04017839 0.17446763 -0.55699585 -0.24382116]
[ 0.74228477 0.87194349 -2.09095748 -1.89810869]] [0.51951756 1.28800631 6.2047204 ]
[[6.40071499e-02 9.20931817e-01 1.50610328e-02]
[9.08371250e-01 6.80256092e-02 2.36031406e-02]
[1.88189376e-03 4.16381549e-04 9.97701725e-01]
[1.48787188e-02 2.18012822e-02 9.63319999e-01]
[9.35973703e-01 4.68906479e-02 1.71356491e-02]
[9.57877245e-01 2.87620199e-02 1.33607348e-02]
[7.23873761e-03 9.41866793e-01 5.08944689e-02]
[6.85331194e-03 3.83251361e-03 9.89314174e-01]
[1.31107157e-02 8.40674308e-01 1.46214976e-01]
[9.46595804e-01 3.76523323e-02 1.57518640e-02]
[9.84841335e-01 7.61926377e-03 7.53940150e-03]
[9.02886357e-01 7.95492517e-02 1.75643910e-02]
[1.44947683e-02 9.75717193e-01 9.78803825e-03]
[6.05411214e-03 2.27923310e-03 9.91666655e-01]
[1.39994240e-02 4.86137084e-01 4.99863492e-01]
[9.67022890e-01 2.13270652e-02 1.16500449e-02]
[9.29572828e-03 9.73287780e-01 1.74164916e-02]
[9.59930460e-01 2.72354324e-02 1.28341073e-02]
[9.82135164e-03 3.96292421e-02 9.50549406e-01]
[9.41101487e-01 4.00264331e-02 1.88720794e-02]
[8.83535762e-03 1.79468570e-02 9.73217785e-01]
[9.71442677e-01 1.80237687e-02 1.05335539e-02]
[1.50575546e-01 8.17386188e-01 3.20382661e-02]
[9.26678862e-01 5.52609505e-02 1.80601877e-02]
[7.87016042e-03 1.32082359e-02 9.78921604e-01]
[1.27655076e-02 9.57087513e-01 3.01469791e-02]
[1.15682854e-02 5.15100288e-03 9.83280712e-01]
[5.63602219e-03 6.01781115e-03 9.88346167e-01]
[5.37382775e-03 2.02029473e-03 9.92605878e-01]
[1.74245265e-02 2.72356089e-01 7.10219384e-01]]
lr = LogisticRegression()
lr.fit(X_train,y_train)
print(lr.score(X_test,y_test))
lr.coef_
1.0
array([[-0.42950628, 0.83667747, -2.39313278, -0.95907637],
[ 0.47647805, -0.24379394, -0.13247376, -0.93516504],
[-0.04697178, -0.59288353, 2.52560654, 1.89424141]])
# 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'
svc = SVC(kernel='rbf',probability=True)
svc.fit(X_train,y_train)
print(svc.score(X_test,y_test))
# 获得了3个方程!!!
# print(svc.coef_,svc.intercept_)
print(svc.predict_proba(X_test))
1.0
[[0.0541574 0.9327725 0.0130701 ]
[0.93847978 0.04522213 0.01629809]
[0.01303676 0.00141136 0.98555189]
[0.01054322 0.09637134 0.89308544]
[0.95448914 0.0320195 0.01349136]
[0.96630075 0.02223438 0.01146486]
[0.00628795 0.95423647 0.03947558]
[0.00855857 0.00302154 0.98841989]
[0.00878754 0.90760118 0.08361129]
[0.96152059 0.02605406 0.01242535]
[0.9769783 0.01231281 0.01070889]
[0.9220724 0.05845731 0.01947028]
[0.01074537 0.97366838 0.01558625]
[0.00882244 0.0019064 0.98927117]
[0.00894026 0.57299231 0.41806742]
[0.97059011 0.0186655 0.01074438]
[0.01284496 0.8846894 0.10246564]
[0.96903515 0.01967863 0.01128622]
[0.00905532 0.07619724 0.91474744]
[0.95155491 0.03425512 0.01418997]
[0.00832953 0.02057062 0.97109984]
[0.97431839 0.01557245 0.01010915]
[0.12194746 0.85385058 0.02420197]
[0.94349505 0.0418431 0.01466184]
[0.00898573 0.0064567 0.98455758]
[0.00892507 0.97841727 0.01265766]
[0.00973732 0.01835271 0.97190997]
[0.00819412 0.00759467 0.98421121]
[0.00756816 0.00241693 0.99001491]
[0.01080086 0.31717545 0.67202369]]
# knn.coef_ KNN这个算法,没有coef_这个系数
# 决策树,有没有coef_?没有coef_
- svc = SVC(kernel='linear',probability=True),kernel就是指定该算法的核函数
- kernel就是核函数 linear 线性 poly多项式 rbf高斯核 precomputed预处理
- 回归和分类都是有核函数的,本身是参数,就是调参的过程。
(三、)svr(回归)使用
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVR
X = np.linspace(0,2*np.pi,40).reshape(-1,1)
X_test = np.linspace(0,2*np.pi,256).reshape(-1,1) #算法预测的数据
y = np.sin(X)
plt.scatter(X,y)
<matplotlib.collections.PathCollection at 0x16ce189a948>
output_1_1.png
svr = SVR(kernel='linear')
svr.fit(X,y)
y_ = svr.predict(X_test)
plt.scatter(X,y)
plt.plot(X_test,y_,color = 'green')
[<matplotlib.lines.Line2D at 0x16ce3b5b308>]
output_2_1.png
# 核函数,变弯,非线性,很多问题,非线性的问题
# coef0 相当于截距b
svr = SVR(kernel='poly',degree=3,coef0=2)
svr.fit(X,y)
y_ = svr.predict(X_test)
plt.scatter(X,y)
plt.plot(X_test,y_,color = 'green')
[<matplotlib.lines.Line2D at 0x16ce50aed88>]
output_3_1.png
# 核函数,变弯,非线性,很多问题,非线性的问题
# coef0 相当于截距b
svr = SVR(kernel='sigmoid',coef0=0)
svr.fit(X,y)
y_ = svr.predict(X_test)
plt.scatter(X,y)
plt.plot(X_test,y_,color = 'green')
[<matplotlib.lines.Line2D at 0x16ce50953c8>]
output_4_1.png
# 核函数,变弯,非线性,很多问题,非线性的问题
# coef0 相当于截距b
svr = SVR(kernel='rbf')
svr.fit(X,y)
y_ = svr.predict(X_test)
plt.scatter(X,y)
plt.plot(X_test,y_,color = 'green')
[<matplotlib.lines.Line2D at 0x16ce511cb08>]
output_5_1.png
- 不同的核函数的比较:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVR
X = (np.arange(2009,2020) - 2008).reshape(-1,1)
X_test = np.linspace(1,12,50).reshape(-1,1) #间隔更小!
# 销售额
y = np.array([0.5,9.36,52,191,350,571,912,1207,1682,2135,2684])
plt.scatter(X,y)
<matplotlib.collections.PathCollection at 0x2ac9db493c8>
output_1_1.png
svr = SVR(kernel='linear')
svr.fit(X,y)
y_ = svr.predict(X_test)
plt.scatter(X,y)
plt.plot(X_test,y_,color = 'g')
[<matplotlib.lines.Line2D at 0x2ac9dea6d48>]
output_2_1.png
svr = SVR(kernel='poly',degree=3,coef0 = 200)
# degree指定函数是幂,cofo0指定截距
svr.fit(X,y)
y_ = svr.predict(X_test)
plt.scatter(X,y)
plt.plot(X_test,y_,color = 'g')
svr.predict(np.array([[12]]))
array([3291.58967443])
output_3_1.png