原文取自 How I generated inspirational quotes with less than 20 lines of python code
阿里云云栖社区翻译后为 如何用简易代码自动生成经典语录
鉴于二者都没有完整的代码,故结合 jieba
分词整合完整代码如下:
说明:原文代码的数据集是从文件中进行读取的,所以命名是 dataset_file
. 本文为了运行和演示的方便,直接用的列表存储原始数据集,但命名上仍然沿用原作者的命名方式。
import random
import jieba
dataset_file = ["我喜欢吃苹果。", "你吃橘子。"]
print("\n分词前:", dataset_file)
for i, each_sentence in enumerate(dataset_file):
dataset_file[i] = " ".join(jieba.cut(each_sentence))
print("\n分词后:", dataset_file)
model = {}
for line in dataset_file:
line = line.lower().split()
for i, word in enumerate(line):
if i == len(line)-1:
model['END'] = model.get('END', []) + [word]
else:
if i == 0:
model['START'] = model.get('START', []) + [word]
model[word] = model.get(word, []) + [line[i+1]]
print("\n模型:", model)
generated = []
while True:
if not generated:
words = model['START']
elif generated[-1] in model['END']:
break
else:
words = model[generated[-1]]
generated.append(random.choice(words))
print("\n生成的一个结果:" + "".join(generated))