直接使用Huggingface的Pipeline这个神器来轻松使用Transformer处理各种NLP问题简直太方便了。
通过AutoModel来直接从checkpoint导入模型。如果用于具体的任务,例如做情感分析,我们可以使用包含 SequenceClassification 的Head的模型去加载,就可以直接得到对应分类问题的logits。
后处理把logits转化成概率值,然后把概率值跟具体的标签对应上 (使用模型的config中的id2label)
import os
import torch
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
from transformers import AutoTokenizer
checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
raw_inputs = ['Today is a good day! Woo~~~',
'How about tomorrow?']
output = tokenizer(raw_inputs)
output2 = tokenizer(raw_inputs, padding=True)
output3 = tokenizer(raw_inputs, padding=True, truncation=True, max_length=7)
print(output)
print(output2)
print(output3)
from transformers import AutoModel
model = AutoModel.from_pretrained(checkpoint)
inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='pt')
outputs = model(**inputs) # 这里变量前面的**,代表把inputs这个dictionary给分解成一个个参数单独输进去
print(vars(outputs).keys()) # 查看一下输出有哪些属性
print(outputs.last_hidden_state.shape) # 三个维度分别是 batch,seq_len和hidden size
print(outputs)
from transformers import AutoModelForSequenceClassification
clf = AutoModelForSequenceClassification.from_pretrained(checkpoint)
outputs_clf = clf(**inputs)
print(vars(outputs_clf).keys()) # 有logits,loss
print(outputs_clf.logits)
predictions = torch.nn.functional.softmax(outputs_clf.logits, dim=-1)
print(predictions)
id2label = clf.config.id2label
for i in torch.argmax(predictions, dim=-1):
print(id2label[i.item()])