import numpy as np
from sklearn.preprocessing import MinMaxScaler,StandardScaler
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import Imputer
from sklearn.feature_selection import VarianceThreshold
from sklearn.decomposition import PCA
基于方差的特征选择
#基于方差的特征选择
one = np.ones(shape=(10)).reshape(-1,1)
two = np.random.randint(0,10,(10,1))
data = np.concatenate([one,two],axis=1)
var_ = VarianceThreshold()
var_.fit_transform(data)
主成分分析(降维)(PCA )
#PCA
#读取5000张0-9的手写数字作为数字
path = './knn_num_data/{}/{}_{}.bmp'
data = []
target = []
for i in range(10):
for j in range(500):
data.append(plt.imread(path.format(i,i,j+1)).ravel())
target.append(i)
data = np.array(data)
target = np.array(target)
pca = PCA(n_components=160,whiten=True)
pca_data = pca.fit_transform(data)
pca_data.shape #(5000, 160)