基于Classifier的例子
from flair.data import Corpus
from flair.datasets import WNUT_17, NEWSGROUPS
from typing import List
from flair.embeddings import FastTextEmbeddings, ELMoEmbeddings, TokenEmbeddings, WordEmbeddings, FlairEmbeddings, BertEmbeddings, PooledFlairEmbeddings
from flair.embeddings import DocumentRNNEmbeddings, DocumentPoolEmbeddings
# 1. get the corpus
corpus: Corpus = NEWSGROUPS()
print(corpus)
# 2. what tag do we want to predict?
tag_type = 'class'
# 3. make the tag dictionary from the corpus
label_dictionary = corpus.make_label_dictionary()
print(label_dictionary)
# 4. initialize embeddings
news_backward = "/home/huyufeng/flair/flair/checkpoints/news-backward-0.4.1.pt"
fast_text_embedding = WordEmbeddings(".flair/embeddings/en-fasttext-news-300d-1M")
bert_path = "/home/huyufeng/glove/uncased_L-12_H-768_A-12"
bert_embedding = BertEmbeddings(bert_path, layers='-1')
glove_embedding = WordEmbeddings('glove')
embeddings: List[TokenEmbeddings] = [
# comment in these lines to use flair embeddings
fast_text_embedding,
]
# document_embeddings = DocumentPoolEmbeddings([embeddings], fine_tune_mode='nonlinear')
document_embeddings = DocumentRNNEmbeddings(embeddings,
hidden_size=128,
rnn_layers=1,
bidirectional=True,
rnn_type='LSTM'
)
# 5. initialize sequence TextClassifier
from flair.models import TextClassifier
classifier: TextClassifier = TextClassifier(document_embeddings=document_embeddings,
label_dictionary=label_dictionary,
# label_type=tag_type,
).cuda()
# 6. initialize trainer
from flair.trainers import ModelTrainer
trainer: ModelTrainer = ModelTrainer(classifier, corpus)
# 7. start training
root = "resources/taggers/"
# from date import
file_root = root + 'test/'
trainer.train(file_root,
learning_rate=0.1,
mini_batch_size=32,
max_epochs=150)
# 8. plot weight traces (optional)
from flair.visual.training_curves import Plotter
plotter = Plotter()
plotter.plot_weights(file_root + 'weights.txt')
基于 Tagging模型的例子
from flair.data import Corpus
from flair.datasets import WNUT_17
from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings
from typing import List
# 1. get the corpus
corpus: Corpus = WNUT_17().downsample(0.1)
print(corpus)
# 2. what tag do we want to predict?
tag_type = 'ner'
# 3. make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary)
# 4. initialize embeddings
embedding_types: List[TokenEmbeddings] = [
WordEmbeddings('glove'),
# comment in this line to use character embeddings
# CharacterEmbeddings(),
# comment in these lines to use flair embeddings
# FlairEmbeddings('news-forward'),
# FlairEmbeddings('news-backward'),
]
embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)
# 5. initialize sequence tagger
from flair.models import SequenceTagger
tagger: SequenceTagger = SequenceTagger(hidden_size=256,
embeddings=embeddings,
tag_dictionary=tag_dictionary,
tag_type=tag_type,
use_crf=True)
# 6. initialize trainer
from flair.trainers import ModelTrainer
trainer: ModelTrainer = ModelTrainer(tagger, corpus)
# 7. start training
trainer.train('resources/taggers/example-ner',
learning_rate=0.1,
mini_batch_size=32,
max_epochs=150)
# 8. plot weight traces (optional)
from flair.visual.training_curves import Plotter
plotter = Plotter()
plotter.plot_weights('resources/taggers/example-ner/weights.txt')