https://www.bilibili.com/video/BV1AP4y1J7MA?spm_id_from=333.337.search-card.all.click
https://www.bilibili.com/video/BV1MY4y1i7Ea/?spm_id_from=autoNext
import jieba
from pyecharts.charts import WordCloud
import requests
import re
import wordcloud
import imageio
imageio.imread
img = imageio.imread('hudie.png')###轮阔底图
------------url = 'https://api.bilibili.com/x/v2/dm/web/history/seg.so?type=1&oid=569263349&date=2022-05-04'
------------headers = {
------------
------------
------------'cookie': 'buvid3=141EC3F3-D547-D74A-2A38-77E2247FF4AC09833infoc; _uuid=10192853D-10248-6DCA-5D6B-881CD4F393AD13431infoc; buvid4=DD0F00DB-60D9-6F20-A2DF-6B27FAAC328614986-022051500-aN5fltImCgQMvNLYEhYL2EkaGkTpE2i/eXZ0wdnNFDPE2ztj9bEMww%3D%3D; rpdid=0zbfvRPWha|nKrOOSmd|zzi|3w1NPUoD; i-wanna-go-back=-1; b_ut=7; nostalgia_conf=-1; CURRENT_BLACKGAP=0; fingerprint=3f268f930d96421c47a2b2ed09da9269; buvid_fp_plain=undefined; b_lsid=BA1BAFAC_180CC88EBF2; sid=5t8ec3b6; PVID=1; buvid_fp=3f268f930d96421c47a2b2ed09da9269; DedeUserID=513683229; DedeUserID__ckMd5=f13d26c689bb814f; SESSDATA=0e664445%2C1668251000%2C56df3*51; bili_jct=4a9c959b982d8086d9a7bc80ededfc84; innersign=1; CURRENT_FNVAL=4048; blackside_state=1; b_timer=%7B%22ffp%22%3A%7B%22333.1007.fp.risk_141EC3F3%22%3A%22180CC89117B%22%2C%22888.65382.fp.risk_141EC3F3%22%3A%22180CC891E47%22%2C%22333.42.fp.risk_141EC3F3%22%3A%22180CC892A28%22%2C%22444.8.fp.risk_141EC3F3%22%3A%22180CC893BFF%22%2C%22333.337.fp.risk_141EC3F3%22%3A%22180CC8A262D%22%2C%22333.788.fp.risk_141EC3F3%22%3A%22180CC8B02B7%22%2C%22333.46.fp.risk_141EC3F3%22%3A%22180CC968C87%22%7D%7D',
------------
------------'user-agent': 'Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.188 Safari/537.36 CrKey/1.54.250320',
------------
------------
------------}
------------
------------response = requests.get(url=url,headers=headers)
------------#print(response.text)
------------response.encoding = response.apparent_encoding
------------content_list = re.findall(':(.*?)@',response.text)
------------for index in content_list:
------------ content = index[1:]
------------ with open('洛锦桑弹幕.txt',mode = 'a' , encoding = 'utf-8') as f:
------------ f.write(content+'\n')
111 读取文件
f = open('洛锦桑弹幕.txt', encoding = 'utf-8')
txt = f.read()
print(txt)
222 结巴分词 拆分词汇
txt_list = jieba.lcut(txt)
列表转字符串
string_txt = ' '.join(txt_list)
print(string_txt)
for word in string_txt:
333 词云图设计
wc = wordcloud.WordCloud(
width = 1000,
height = 700,
background_color='white',
font_path = 'msyh.ttc',
scale=15,
mask = img,
stopwords = {'了','的'},
contour_width = '5',
contour_color = 'red',
)
444 输入文字内容 字符串格式
wc.generate(string_txt)
555 输出图片
wc.to_file('666.png')