2020-05-03 爬取网易云音乐,名称和文件下载

今天时间太晚了,先把代码放这,有些地方不得不说很low,没办法,我的水平目前只到这里

其实我也想定义def 函数的,不是很熟练,就没敢,怕爆错
关于里面用到的requests库、xpath、等等(还有待精通与提高)

#下载网易云歌曲
import  requests
import re
page_url='https://music.163.com//playlist?id=3113923112'
headers = {
        'Referer': 'https://music.163.com/',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3756.400 QQBrowser/10.5.4039.400'
    }


page_res=requests.get(page_url,headers=headers)
print(page_res.status_code)
Song_id_name = page_res.text
# print(Song_id_name)
# 获取对应音乐id
from lxml import etree
dom = etree.HTML(Song_id_name)
song_ids = dom.xpath('//a[contains(@href,"song?")]/@href')
# Song_id = re.findall(r'href="/song?(.*?)"',Song_id_name)
#print(song_ids) 提取出来的id文件(还没有过滤)
head_url_basis ='https://link.hhtjim.com/163/'
back_url_basis ='.mp3'
box_songid =[]
for song_id in song_ids:
    song_id = song_id.strip('/song?id=')
    if ('$' in song_id) == False:
        #print(song_id) pure id
        real_song_id =head_url_basis + song_id + back_url_basis
        box_songid.append(real_song_id)
del box_songid[1]
#print(box_songid)


# 获取对应音乐名称
song_names = re.findall(r'">(.*?)</a></li>',Song_id_name)
song_names[0] = 'Maps'
#print(song_names) #提取出来的name文件(还没有过滤)
box_songname = []
for song_name in song_names:
    if ('$' in song_name)  == False:
        if ('享' in song_name) == False:
            if ('iPhone' in song_name) == False:
                if ('PC' in song_name) == False:
                    if ('Android' in song_name) == False:
                        box_songname.append(song_name)
del box_songname[-1]
del box_songname[-1]
del box_songname[1]

def validateTitle(title):
    rstr = r"[\/\\\:\*\?\"\<\>\|]"  # '/ \ : * ? " < > |'
    new_title = re.sub(rstr, "_", title)  # 替换为下划线
    return new_title
rep_name = []
for box_name in box_songname:
    new_title=validateTitle(box_name)
    rep_name.append(new_title)
#print(rep_name)
#print(box_songname)


#保存歌曲且和名称一一对应

num =1
for real_song_name,real_song_id in zip(rep_name,box_songid):
    song_res =requests.get(real_song_id,headers=headers)
    print('loading the {}th song,please wait for a moment......'.format(num))
    song_call = 'wangyiyun/{0}.{1}'.format(real_song_name, real_song_id.split('.')[-1])
    with open (song_call,'wb') as f:
        f.write(song_res.content)
    num +=1


#用于核对id 和 name 数量是否一致
# for a in rep_name:
#     print(a)
#
# for b in box_songid:
#     print(b)
狗命要紧,赶紧睡了,大佬有好的建议请留言,感谢您八辈祖宗!给大佬端茶!
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。