强烈推荐SHAP的运用
链接
# 在pycharm的jupyter写的
#%%
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')
import shap
shap.initjs()
#%%
data, labels = shap.datasets.adult(display=True)
labels = np.array(([int(label) for label in labels]))
print(data.shape, labels.shape)
data.head()
#%%
pd.Series(labels).value_counts()
#%%
cat_cols = data.select_dtypes(['category']).columns # Storing names of all categorical columns in cat_cols
cat_cols
#%%
data[cat_cols] = data[cat_cols].apply(
lambda x: x.cat.codes) # Converting the categorical columns into numerical columns
data.head()
#%%
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.3, random_state=42)
print(X_train.shape, X_test.shape)
X_train.head()
#%%
data_disp, labels_disp = shap.datasets.adult(display=True)
X_train_disp, X_test_disp, y_train_disp, y_test_disp = train_test_split(data_disp, labels_disp, test_size=0.3,
random_state=42)
print(X_train_disp.shape, X_test_disp.shape)
X_train_disp.head(3)
#%%
import xgboost as xgb #importing xgboost model from xgb library
xgc = xgb.XGBClassifier(n_estimators=500, max_depth=5, random_state=42) # Creating a XGB Classifier
#%%
xgc.fit(X_train, y_train) # Training the model with fit method
#%%
predictions = xgc.predict(X_test)
predictions[:10]
#%%
accuracy_score(y_test, predictions)
#%%
# creating an explainer for our model
explainer = shap.TreeExplainer(xgc)
# finding out the shap values using the explainer
shap_values = explainer.shap_values(X_test)
# Expected/Base/Reference value = the value that would be predicted if we didn’t know any features of the current output”
print('Expected Value:', explainer.expected_value)
# displaying the first 5 rows of the shap values table
pd.DataFrame(shap_values).head()
#%%
y_test_disp[0]
#%%
shap.initjs()
shap.force_plot(explainer.expected_value,
shap_values[0, :], X_test_disp.iloc[0, :])
#%%
y_test_disp[2]
#%%
shap.initjs()
shap.force_plot(explainer.expected_value,
shap_values[2, :], X_test_disp.iloc[2, :])
#%%
y_test_disp[:10]
#%%
shap.initjs()
shap.force_plot(explainer.expected_value,
shap_values[:1000, :], X_test.iloc[:1000, :])
#%%
shap.initjs()
shap.summary_plot(shap_values,
X_test, plot_type="bar")
#%%
shap.initjs()
shap.summary_plot(shap_values, X_test)
#%%
shap.initjs()
shap.dependence_plot(ind='Age', interaction_index='Age',
shap_values=shap_values,
features=X_test)
#%%
shap.initjs()
shap.dependence_plot(ind='Education-Num', interaction_index='Education-Num',
shap_values=shap_values,
features=X_test)
#%%
shap.initjs()
shap.dependence_plot(ind='Relationship', interaction_index='Relationship',
shap_values=shap_values,
features=X_test)
#%%
shap.initjs()
shap.dependence_plot(ind='Capital Gain', interaction_index='Capital Gain',
shap_values=shap_values,
features=X_test)
#%%
shap.initjs()
shap.dependence_plot(ind='Age', interaction_index='Capital Gain',
shap_values=shap_values, features=X_test,
display_features=X_test_disp)
#%%
shap.initjs()
shap.dependence_plot(ind='Education-Num', interaction_index='Relationship',
shap_values=shap_values, features=X_test)
#%%