SHAP的文章

强烈推荐SHAP的运用
链接

# 在pycharm的jupyter写的

#%%

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import warnings

warnings.filterwarnings('ignore')

import shap

shap.initjs()


#%%

data, labels = shap.datasets.adult(display=True)
labels = np.array(([int(label) for label in labels]))
print(data.shape, labels.shape)
data.head()


#%%

pd.Series(labels).value_counts()


#%%

cat_cols = data.select_dtypes(['category']).columns  # Storing names of all categorical columns in cat_cols
cat_cols

#%%

data[cat_cols] = data[cat_cols].apply(
    lambda x: x.cat.codes)  # Converting the categorical columns into numerical columns
data.head()


#%%
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.3, random_state=42)
print(X_train.shape, X_test.shape)
X_train.head()


#%%

data_disp, labels_disp = shap.datasets.adult(display=True)
X_train_disp, X_test_disp, y_train_disp, y_test_disp = train_test_split(data_disp, labels_disp, test_size=0.3,
                                                                        random_state=42)
print(X_train_disp.shape, X_test_disp.shape)
X_train_disp.head(3)


#%%

import xgboost as xgb  #importing xgboost model from xgb library

xgc = xgb.XGBClassifier(n_estimators=500, max_depth=5, random_state=42)  # Creating a XGB Classifier

#%%

xgc.fit(X_train, y_train)  # Training the model with fit method


#%%

predictions = xgc.predict(X_test)
predictions[:10]


#%%

accuracy_score(y_test, predictions)


#%%

# creating an explainer for our model
explainer = shap.TreeExplainer(xgc)

# finding out the shap values using the explainer
shap_values = explainer.shap_values(X_test)

# Expected/Base/Reference value = the value that would be predicted if we didn’t know any features of the current output”
print('Expected Value:', explainer.expected_value)

# displaying the first 5 rows of the shap values table
pd.DataFrame(shap_values).head()


#%%

y_test_disp[0]


#%%

shap.initjs()
shap.force_plot(explainer.expected_value,
                shap_values[0, :], X_test_disp.iloc[0, :])
#%%

y_test_disp[2]

#%%

shap.initjs()
shap.force_plot(explainer.expected_value,
                shap_values[2, :], X_test_disp.iloc[2, :])

#%%
y_test_disp[:10]

#%%

shap.initjs()
shap.force_plot(explainer.expected_value,
                shap_values[:1000, :], X_test.iloc[:1000, :])

#%%
shap.initjs()
shap.summary_plot(shap_values,
                  X_test, plot_type="bar")

#%%

shap.initjs()
shap.summary_plot(shap_values, X_test)

#%%

shap.initjs()
shap.dependence_plot(ind='Age', interaction_index='Age',
                     shap_values=shap_values,
                     features=X_test)

#%%

shap.initjs()
shap.dependence_plot(ind='Education-Num', interaction_index='Education-Num',
                     shap_values=shap_values,
                     features=X_test)

#%%

shap.initjs()
shap.dependence_plot(ind='Relationship', interaction_index='Relationship',
                     shap_values=shap_values,
                     features=X_test)

#%%

shap.initjs()
shap.dependence_plot(ind='Capital Gain', interaction_index='Capital Gain',
                     shap_values=shap_values,
                     features=X_test)
#%%
shap.initjs()
shap.dependence_plot(ind='Age', interaction_index='Capital Gain',
                     shap_values=shap_values, features=X_test,
                     display_features=X_test_disp)
#%%
shap.initjs()
shap.dependence_plot(ind='Education-Num', interaction_index='Relationship',
                     shap_values=shap_values, features=X_test)
#%%
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容