LIME 2

import lime
import sklearn
import numpy as np
import sklearn
import sklearn.ensemble
import sklearn.metrics
from __future__ import print_function

from sklearn.datasets import fetch_20newsgroups
# check data structure
list(fetch_20newsgroups(subset='train').target_names

# create train/test dataset, and select two categories to run LIME
categories =['alt.atheism','soc.religion.christian']
newsgroups_train = fetch_20newsgroups(subset='train',categories=categories)
newsgroups_test = fetch_20newsgroups(subset='test', categories=categories)
class_names =['atheism','christian']

# view the first record
print(newsgroups_train.data[0])
print(newsgroups_train.target[0])
# [0.1,2...] correspond to labels in order    
newsgroups_train.target_names 

# tfid vectorizer
vectorizer = sklearn.feature_extraction.text.TfidfVectorizer(lowercase=False)
train_vectors = vectorizer.fit_transform(newsgroups_train.data)
test_vectors = vectorizer.transform(newsgroups_test.data)

# use a random forest clf and fit model
clf = sklearn.ensemble.RandomForestClassifier(n_estimator=500)
clf.fit(train_vectors, newsgroups_train.target)
pred = clf.predict(test_vectors)
sklearn.metrics.f1_score(newsgroups_test.target, pred, average='binary')

LIME 上阵咯

from lime import lime_text
from sklearn.pipeline import make_pipeline
c = make_pipeline(vectorizer, clf)

# trial
c.predict_proba([newsgroups_test.data[0])

from lime.lime_text import LimeTextExplainer
explainer = LimeTextExplainer(class_names=class_names) 
idx = 20
exp = explainer.explain_instance(newsgroups_test.data[idx],c.predict_proba, n_features=6)
print('Document id: %d' % idx)
print('Probability(christian)=',c.predict_proba(newsgroups_test.data[idx])[0,1])
print('True class: %s' % class_names[newsgroups_test.target[idx]])

print('Original prediction:', clf.predict_proba(test_vectors[idx])[0,1])
temp = test_vectors[idx].copy()
temp[0, vectorizer.vocabulary_['Posting']]=0
temp[0, vectorizer.vocabulary_['Host']]=0
print('Prediction removing some features:', clf.predict_proba(temp)[0,1])
print('Difference:', clf.predict_proba(temp)[0,1]-clf.predict_proba(test_vectors[idx])[0,1])

%matplotlib inline
fig = exp.as_pyplot_figure()
exp.show_in_notebook(test=False)
exp.save_to_file('lime.html')

最后编辑于：2018.08.14 14:58:14

©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成，浏览时请结合常识与多方信息审慎甄别。
平台声明：文章内容（如有图片或视频亦包括在内）由作者上传并发布，文章内容仅代表作者本人观点，简书系信息发布平台，仅提供信息存储服务。

LIME 2

LIME 2

相关阅读更多精彩内容

友情链接更多精彩内容