可以根据网易云音乐任何歌单的ID,抓取歌单中所有歌曲的信息以及歌词,并根据歌词中的词频生成词云图片。项目中还将歌曲信息及歌词保存在本地数据库,详细信息见代码
github地址 lyricWordCloud.
词云图
1.根据歌单ID 获取歌单中歌曲列表信息
def get163SongList(song_url,headers):
res = requests.request('GET',song_url,headers=headers)
song_list = res.json()['result']['tracks']
return song_list
2.获取每首歌歌词
def getSongLyric(headers,lyric_url):
res = requests.request('GET',lyric_url,headers=headers)
# print(res.json())
if 'lrc' in res.json():
lyric = res.json()['lrc']['lyric']
lyric_without_time = re.sub(r'[\d:.[\]]','',lyric)
return lyric_without_time
else:
return ''
3.根据词频 生成词云
print('根据词频,开始生成词云!')
f1 = f.replace('作词','')
f2 = f1.replace('作曲','')
cut_text = " ".join(jieba.cut(f2,cut_all=False, HMM=True))
# print(cut_text)
# color_mask = plt.imread("dy.png")
# color_mask = np.array(Image.open(os.path.join(os.path.dirname(__file__), "aa.jpg")))
wc = WordCloud(
font_path="aaa.ttf",
# mask=color_mask,
max_words=100,
width=2000,
height=1200,
margin=2,
)
wordcloud = wc.generate(cut_text)
wordcloud.to_file(os.path.join(os.path.dirname(__file__), "h11.jpg"))
print('打开词云图片')
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
所用到的模块
from bs4 import BeautifulSoup
import sqlite3
import sys
import re
import os
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import jieba
from PIL import Image
import numpy as np
效果如下
github地址 lyricWordCloud.