def seg_sentence(filename):
message_regex = "(201\d-\d{2}-\d{2}) (\d{2}:\d{2}:\d{2}) (.*)\n(.*)(?=\n\n)"
message_complile = re.compile(message_regex)
with open(filename, encoding='utf-8') as f:
message_data = message_complile.findall(f.read())
words = ''
for rows in message_data:
if (rows[2] == str):
seg_list = jieba.cut(rows[3], cut_all=False)
seg_list = ' '.join(seg_list)
if len(seg_list) > 1:
if seg_list != '\t':
words += seg_list
words += ' '
return words