模拟数据
df = pd.DataFrame(data=[['bird',5],['dog',3],['cat',2],['tiger',1],['lion',10],['bear',6]],columns=['animal','show_time'])
print df
定义新字段
animal_to_type = {'bird':'fly','dog':'run','tiger':'run'}
#--方法1:特别推荐
df['type_0'] = np.where(df['animal'].map(str.lower).isin(x if animal_to_type[x]=='run' else 'not in' for x in animal_to_type.keys()),'run',
np.where(df['animal'].map(str.lower).isin(x if animal_to_type[x]=='fly' else 'not in' for x in animal_to_type.keys()),'fly',np.NaN))
df['type_1'] = df['animal'].map(str.lower).map(animal_to_type)
#--方法2:不推荐,如果字典中animal定义不全,程序将报错
df['type_2'] = df['animal'].map(lambda x:animal_to_type[x.lower()])
#--方法3:一般推荐
df.ix[df['animal'].isin(['bird']),'type_3'] = 'fly'
df.ix[df['animal'].isin(['dog','tiger']),'type_3'] = 'run'
print df
随机重排
(n,_) = df.shape
sampler = np.random.permutation(n)
print sampler
print df.take(sampler)
哑变量
print pd.get_dummies(df['animal'])