案例及代码来源:https://www.jianshu.com/p/4778bffbfcc6
from datetime import datetime
%matplotlib inline
plt.style.use('ggplot')
df['order_date']=pd.to_datetime(df.order_dt,format='%Y%m%d')
df['month']=df.order_date.values.astype('datetime64[M]')
plt.figure(figsize=(12,4))
plt.subplot(121)
df.order_amount.hist(bins=30)
columns_month=df.month.sort_values().astype('str').unique()
pivoted_counts.columns=columns_month
pivoted_amount=df.pivot_table(index='user_id',columns='month',values='order_amount',aggfunc='mean').fillna(0)
columns_month=df.month.sort_values().astype('str').unique()
pivoted_amount.columns=columns_month
return pd.Series(status,index=columns_month)
pivoted_status_counts=pivoted_purchase_status.replace('unreg',np.NaN).apply(lambda x: pd.value_counts(x)
pivoted_status_counts.head()
purchase_status_counts.fillna(0).T.plot.area(figsize=(12,4))
return_rate=purchase_status_counts.apply(lambda x: x/x.sum(),axis=1)
return_rate.loc['return'].plot(figsize=(12,6))
user_purchase=df[['user_id','order_products','order_amount','
((order_date_max-order_date_min)/np.timedelta64(1,'D')).hist(bins=15)
user_purchase_retention=pd.merge(left=user_purchase,right=order_date_min.reset_index(),how='inner',on='user_id',suffixes=('','_min'))
user_purchase_retention['date_diff_bin']=pd.cut(user_purchase_retention.date_diff,bins=bin)
pivoted_retention=user_purchase_retention.pivot_table(index='user_id',columns='date_diff_bin',values='order_amount',aggfunc=sum)
pivoted_retention.mean()
pivoted_retention_trans=pivoted_retention.fillna.applymap(lambda x: 1 if x>0 else 0)
(pivoted_retention_trans.sum()/pivoted_retention_trans.count()).plot.bar()
def diff(group):
d=group.date_diff-group.date_diff.shift(-1)
return date_diff
last_diff=user_purchase_retention.groupby('user_id').apply(diff)
last_diff.mean()
last_diff.hist(bins=20)