python 爬取QQ音乐,并将结果存入EXCEl——以爬取周杰伦的歌曲相关信息为例,欢迎留言、评论,交流。
import requests,re,openpyxl
from bs4import BeautifulSoup
# 引用requests模块
wb=openpyxl.Workbook()
sheet=wb.active
sheet.title='joymusic200'
sheet['A1']='歌名'
sheet['B1']='专辑'
sheet['C1']='时间'
sheet['D1']='链接'
sheet['E1']='歌词'
headers = {
'origin':'https://y.qq.com',
# 请求来源,本案例中其实是不需要加这个参数的,只是为了演示
'referer':'https://y.qq.com/n/yqq/song/004Z8Ihr0JIu5s.html',
# 请求来源,携带的信息比“origin”更丰富,本案例中其实是不需要加这个参数的,只是为了演示
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
# 标记了请求从什么设备,什么浏览器上发出
}
# 伪装请求头
url ='https://c.y.qq.com/soso/fcgi-bin/client_search_cp'
for xin range(100):
params = {
'ct':'24',
'qqmusic_ver':'1298',
'new_json':'1',
'remoteplace':'sizer.yqq.song_next',
'searchid':'64405487069162918',
't':'0',
'aggr':'1',
'cr':'1',
'catZhida':'1',
'lossless':'0',
'flag_qc':'0',
'p':str(x+1),
'n':'20',
'w':'周杰伦',
'g_tk':'5381',
'loginUin':'0',
'hostUin':'0',
'format':'json',
'inCharset':'utf8',
'outCharset':'utf-8',
'notice':'0',
'platform':'yqq.json',
'needNewCode':'0'
}
# 将参数封装为字典
res_music = requests.get(url,params=params,headers=headers)
# 调用get方法,下载这个字典
json_music = res_music.json()
list_music = json_music['data']['song']['list']
# # 一层一层地取字典,获取歌单列表
for musicin list_music:
# list_music是一个列表,music是它里面的元素
name=(music['name'])
# 以name为键,查找歌曲名
album=music['album']['name']
# 查找专辑名
time=str(music['interval'])+'秒'
# 查找播放时长
link='https://y.qq.com/n/yqq/song/'+music['mid']+'.html'
url1='https://c.y.qq.com/lyric/fcgi-bin/fcg_query_lyric_yqq.fcg'
params1={
'nobase64':'1',
'musicid':music['id'],
'-':'jsonp1',
'g_tk':'5381',
'loginUin':'0',
'hostUin':'0',
'format':'json',
'inCharset':'utf8',
'outCharset':'utf-8',
'notice':'0',
'platform':'yqq.json',
'needNewCode':'0'
}
res=requests.get(url1,params=params1,headers=headers)
josn_lyric=res.json()
try:
str_lyric=josn_lyric['lyric']
except:
str_lyric=''
#print(str_lyric)
#print(type(str_lyric))
str_lyric=str_lyric.replace(' ','\n')
lyric=re.sub("[A-Za-z0-9\!\%\[\]\,\。\&\#\;]", "", str_lyric)
sheet.append([name,album,time,link,lyric])
wb.save('joy_music.xlsx')