本人也是初学Python,学习爬虫,不喜勿喷,纯技术交流,如有非法用途,与本人无关。有些地方可能注解的不详细,往后有时间再写的详细点,我也在继续深化脚本。希望这些微薄的经验能给你们些帮助。
#python 3.8.2
#下载网易云音乐MV
import requests
import os
import re
import json
import urllib
import sys
def inputInt(content='请输入MV_ID():'):
count=0
while count <5:
num2 = str(input(content))
if num2.isalpha() == 0:
return num2
break
else:
count = count + 1
if count >= 5:
sys.exit()
pass
num = inputInt()
url='http://music.163.com/mv?id=' + str(num)
headers = {
'User-Agent':'Mozilla/5.0(Windows NT 6.3;WOW64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/46.0.0.2490.80 Safari/537.36',
'Referer':'http://93.174.95.27',
}
req = requests.get(url, headers=headers)
req.encoding='utf-8'
try:
pattern = re.compile(r'<script type="application/ld\+json">(.*?)</script>',re.S)#re.S(DOTALL): 点任意匹配模式,改变’.’的行为
result = re.findall(pattern ,req.text)[0].strip()
file_name = json.loads(result)['title']
pattern_1 = re.compile(r'&murl=http(.*?(:?&auto))(?!\.)') #通过正则找到MV视频下载地址
pattern2 = re.findall(pattern_1,req.text)
if len(pattern2) == 0:
pattern_1 = re.compile(r'hurl=http(.*?(:?&murl))(?!\.)')
pattern2 = re.findall(pattern_1,req.text)
elif len(pattern2) == 0:
pattern_1 = re.compile(r'hurl=&murl=http(.*?(:?&auto))(?!\.)')
pattern2 = re.findall(pattern_1,req.text)
s = [str(i) for i in pattern2]
s4 = ' '.join(s)
url2 = urllib.parse.unquote(s4)
s = url2.split(',')[0]
url3 = s[3:-6]
url4 ="http:" + url3
#url = "http://vodkgeyttp8.vod.126.net/cloudmusic/IDAiMDA2MDEwZCQxMTAgJA==/mv/5917939/f69e25b5e5da089286a29b086558e9d0.mp4?wsSecret=95a23975fe12272976a62ae67962e56b&wsTime=1584531134"
req2 = requests.get(url4, headers = headers)
f = open("e:\music\mv\\" + file_name + ".mp4", 'wb')
f.write(req2.content)
f.close()
print(" \n %s *已成功下载,尽请聆听!\n"%(file_name))
except:
if len(pattern2)<1:
print(file_name + "因版权问题,无法下载,即将退出程序\n")
sys.exit()