python实现千千音乐下载
1.目的
对于千千音乐的首页的歌单进行爬取,创建以歌单为名字的文件夹并且下载歌单内的所有歌曲保存至本地
2.介绍功能介绍
不要加多进程与多线程进去增加千千音乐的负担,只做类人爬取,对于技术的练习
爬取内容请不要用做商业用途
import os
import time
import json
import requests
from lxml import etree
class QianQianMusic(object):
def __init__(self, name):
self.url = 'http://music.taihe.com/search?key={name}'.format(name=name)
self.api_url_temp = 'http://musicapi.taihe.com/v1/restserver/ting?method=baidu.ting.song.playAAC&format=jsonp\
&songid={song_id}&_={t}'
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) \
Chrome/77.0.3865.90 Safari/537.36',
}
self.t = int(time.time() * 1000)
def get_music_info(self):
response = requests.get(self.url, headers=self.headers)
html_text = response.content.decode()
html = etree.HTML(html_text)
lis = html.xpath('//div[@class="search-song-list song-list song-list-hook"]/ul/li')
music_infos = []
for li in lis:
href = li.xpath('.//span[@class="music-icon-hook"]/a/@href')
if href:
musicicon = li.xpath('.//span[@class="music-icon-hook"]/@data-musicicon')[0]
song_id = json.loads(musicicon)['id']
title = li.xpath('.//span[@class="song-title"]/a//text()')
title = ''.join(title)
author = li.xpath('.//span[@class="author_list"]/a/text()')[0]
item = (song_id, title, author)
music_infos.append(item)
return music_infos
def choose_music(self, music_infos):
for music_info in music_infos:
print(f'歌曲序号: {music_infos.index(music_info) + 1}\t歌曲: {music_info[1]}\t歌手: {music_info[2]}')
choose_number = input('请输入你要下载歌曲序号:')
if choose_number.isdigit():
choose_number = int(choose_number)
if choose_number <= len(music_infos) and choose_number > 0:
choose_music = music_infos[choose_number - 1]
return choose_music
else:
exit('输入歌曲序号不存在')
else:
exit('输入歌曲序号有误')
def mkdir(self, music_name):
music_dir = os.path.join(os.curdir, 'music')
if not os.path.isdir(music_dir):
os.mkdir(music_dir)
file_name = music_name + '.mp3'
file_path = os.path.join(music_dir, file_name)
return file_path
def get_music_url(self, music_song_id, ):
data = requests.get(self.api_url_temp.format(song_id=music_song_id, t=self.t), headers=self.headers).json()
return data['bitrate']['file_link']
def download(self, music_url, music_name, file_path):
response = requests.get(music_url, headers=self.headers)
with open(file_path, 'wb') as f:
print(f'正在下载歌曲:{music_name}')
for ret in response.iter_content(1024):
f.write(ret)
print(f'歌曲下载完成:{music_name}')
def main(self):
music_infos = self.get_music_info()
choose_music = self.choose_music(music_infos)
music_song_id = choose_music[0]
music_name = choose_music[1]
music_url = self.get_music_url(music_song_id)
file_path = self.mkdir(music_name)
self.download(music_url, music_name, file_path)
if __name__ == "__main__":
music_name = input('请输入你要下载的歌曲:')
qianqian = QianQianMusic(music_name)
qianqian.main()
以上就是全部的代码