data = np.array(
[[-0.017612, 14.053064],
[-1.395634, 1.662541],
[-0.752157, 6.538620],
[-1.322371, 7.152853],
[0.423363, 11.054677],
[0.406704, 7.067335],
[0.667394, 12.741452],
[-2.460150, -0.866805],
[0.569411, 9.548755],
[-0.026632, 10.427743]], dtype=float)
label = np.array([0, 1, 0, 0, 0, 1, 0, 1, 0, 0])
target = [2.0, 1.0]
- from sklearn.preprocessing import scale可以直接将给定数据进行标准化。
scaler = scale(data)
print(scaler.mean(axis=0), scaler.std(axis=0))
<<output>>:[0 0] [1 1]
- from sklearn.preprocessing import StandardScaler该类的好处在于可以保存训练集中的参数(均值、方差)直接使用其对象转换测试集数据。
scaler = StandardScaler().fit(data)
print(scaler.mean_,scaler.var_)
data = scaler.transform(data)
target = scaler.transform([target])
<<output>>:[-0.3907684 7.9380235]
[0.99028455 19.92129407]
- from sklearn.preprocessing import minmax_scale 归一化到一定区间内
scaler = minmax_scale(data,feature_range=(0,1))
print(scaler.mean(axis=0), scaler.std(axis=0))
<<output>>:[ 0.66166347, 0.59014114],
[0.31818271, 0.29915328]
- from sklearn.preprocessing import MinMaxScaler该类的好处在于可以保存训练集中的参数(均值、方差)直接使用其对象转换测试集数据。
mm = MinMaxScaler()
scaler = mm.fit_transform(data)
target = mm.transform([target])[0]
print(mm.min_,mm.scale_)
<<output>>:[ 0.78660764, 0.05809736]
[ 0.31973971, 0.06702472]
- from sklearn.preprocessing import normalize特定归一化
target = [2.0, 1.0]
scalar = normalize(data, norm='l2')
- from sklearn.preprocessing import Normalizer 将测试集映射到相同的空间中
scalar = Normalizer().fit(data)
data = scalar.transform(data)
target = scalar.transform([target])[0]