import numpy as np
from sklearn.preprocessing import MinMaxScaler,StandardScaler
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import Imputer
data = np.random.randint(0,100,(10,5))
归一化:
#归一化
res = np.zeros(data.shape)
for i in range(data.shape[-1]):
min_ = data[:,i].min()
max_ = data[:,i].max()
res[:,i] = (data[:,i] - min_)/(max_ - min_)
res
mms = MinMaxScaler()
res2 = mms.fit_transform(data)
标准化:
#标准化,去中心化
res = np.zeros(data.shape)
for i in range(data.shape[-1]):
mean_ = data[:,i].mean()
std_ = data[:,i].std()
res[:,i] = (data[:,i] - mean_)/std_
ss = StandardScaler()
res2 = ss.fit_transform(data)
缺失值填充:
#缺失值填充
iris = sns.load_dataset('iris')
iris.iloc[1,1] = np.nan
iris.iloc[2,2] = np.nan
im = Imputer(strategy='most_frequent',axis=0)
im.fit_transform(iris.iloc[:,:-1])