网易云歌曲爬取(包含Vip歌曲)
歌单批量下载
仅能下载非vip歌曲,相当于批量点击下载按钮的操作没啥难度,需自己在py文件目录里新建music文件
# -*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup
header = { # 伪造浏览器头部,不然获取不到网易云音乐的页面源代码。
'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36',
'Referer': 'http://93.174.95.27',
}
# link = 'http://music.163.com/playlist?id=2884035' # 网易原创歌曲榜
# link ='http://music.163.com/playlist?id=19723756' # 云音乐飙升榜
# link ='http://music.163.com/playlist?id=3778678' # 云音乐热歌榜
# link ='http://music.163.com/playlist?id=3779629' # 云音乐新歌榜
link = "https://music.163.com/playlist?id=3779629"
# print(link)
# 这是网易云音乐歌单的链接,注意删除链接中的'#'
# (其实是嵌套在网页里面含有歌曲数据的页面框架的真实链接)
r = requests.get(link, headers=header)
html = r.content
# print(html)
soup = BeautifulSoup(html, "html.parser")
songs = soup.find("ul", class_="f-hide").select("a", limit=100)
# 通过分析网页源代码发现排行榜中的歌曲信息全部放在类名称为 f-hide 的 ul 中
# 于是根据特殊的类名称查找相应 ul,然后找到里面的全部 a 标签
# 限制数量为 10,即歌单的前 10 首歌
i = 1
for s in songs:
song_id = s['href'][9:]
song_name = s.text
song_down_link = "http://music.163.com/song/media/outer/url?id=" + song_id + ".mp3"
print("第 " + str(i) + " 首歌曲:" + song_name)
print("正在下载...")
response = requests.get(song_down_link, headers=header).content
f = open('music\\'+song_name + ".mp3", 'wb')
# music\\ 路径
f.write(response)
f.close()
print("下载完成!\n\r")
i = i + 1
运行结果:
会下载在目录下面的music文件里,真实能听
vip音乐下载
重点来了,vip音乐都有加密,而且加密方式经常变,截止2021/05/25,代码好使
其中Crypto库需要安装pycrypto,引入的时候还是Crypto就可以
# -*- coding: utf-8 -*-
import requests
from Crypto.Cipher import AES, PKCS1_OAEP
from Crypto.Util.Padding import pad
from Crypto.PublicKey import RSA
from Crypto.Random import get_random_bytes
import random
import base64
import json
import os
class EncryptText:
def __init__(self):
self.character = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
self.iv = '0102030405060708'
self.public_key = '010001'
self.modulus = '00e0b509f6259df8642dbc35662901477df22677ec152b' \
'5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417' \
'629ec4ee341f56135fccf695280104e0312ecbda92557c93' \
'870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b' \
'424d813cfe4875d3e82047b97ddef52741d546b8e289dc69' \
'35b3ece0462db0a22b8e7'
self.nonce = '0CoJUm6Qyw8W8jud'
def create16RandomBytes(self):
"""
# 产生16位随机字符, 对应函数a
:return:
"""
generate_string = random.sample(self.character, 16)
generated_string = ''.join(generate_string)
return generated_string
def AESEncrypt(self, clear_text, key):
"""
AES加密, 对应函数b
:param clear_text: 需要加密的数据
:return:
"""
# 数据填充
clear_text = pad(data_to_pad=clear_text.encode(), block_size=AES.block_size)
key = key.encode()
iv = self.iv.encode()
aes = AES.new(key=key, mode=AES.MODE_CBC, iv=iv)
cipher_text = aes.encrypt(plaintext=clear_text)
# 字节串转为字符串
cipher_texts = base64.b64encode(cipher_text).decode()
return cipher_texts
def RSAEncrypt(self, session_key):
"""
RSA加密的结果每次都不一样
:param session_key:
:return:
"""
# n和e构成公钥
# (n, e)
# key = RSA.RsaKey(n=int(self.modulus, 16), e=int(self.public_key, 16))
key = RSA.construct(rsa_components=(int(self.modulus, 16), int(self.public_key, 16)))
public_key = key.publickey()
rsa = PKCS1_OAEP.new(key=public_key)
cipher_text = rsa.encrypt(message=session_key).hex()
return cipher_text
def RSAEncrypt(self, i, e, n):
"""
RSA加密, 对应函数c
:param i:
:return:
"""
# num = pow(x, y) % z
# 加密C=M^e mod n
num = pow(int(i[::-1].encode().hex(), 16), int(e, 16), int(n, 16))
result = format(num, 'x')
return result
def resultEncrypt(self, input_text):
"""
对应函数d
:param input_text:
:return:
"""
i = self.create16RandomBytes()
encText = self.AESEncrypt(input_text, self.nonce)
encText = self.AESEncrypt(encText, i)
encSecKey = self.RSAEncrypt(i, self.public_key, self.modulus)
from_data = {
'params': encText,
'encSecKey': encSecKey
}
return from_data
class WangYiYunMusic(object):
def __init__(self):
self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
def get_html(self, url, method='GET', from_data=None):
try:
if method == 'GET':
response = requests.get(url, headers=self.headers)
else:
response = requests.post(url, from_data, headers=self.headers)
response.raise_for_status()
response.encoding = 'utf-8'
return response.text
except Exception as err:
print(err)
return '请求异常'
def parse_text(self, text):
ids_list = json.loads(text)['result']['songs']
count = 0
info_list = []
print('{:*^80}'.format('搜索结果如下'))
print('{0:{5}<5}{1:{5}<20}{2:{5}<10}{3:{5}<10}{4:{5}<20}'.format('序号', '歌名', '歌手', '时长(s)', '专辑', chr(12288)))
print('{:-^84}'.format('-'))
for id_info in ids_list:
song_name = id_info['name']
id = id_info['id']
time = id_info['dt'] // 1000
album_name = id_info['al']['name']
picture_url = id_info['al']['picUrl']
singer = id_info['ar'][0]['name']
info_list.append([id, song_name, singer])
# print(id)
# print(song_name)
# print(singer)
print('{0:{5}<5}{1:{5}<20}{2:{5}<10}{3:{5}<10}{4:{5}<20}'.format(count, song_name, singer, time, album_name,
chr(12288)))
count += 1
if count == 8:
# 为了测试方便, 这里只显示了9条数据
break
print('{:*^80}'.format('*'))
return info_list
def save_file(self, song_text, download_info):
filepath = './music'
if not os.path.exists(filepath):
os.mkdir(filepath)
filename = download_info[1] + '-' + download_info[2]
music_url = json.loads(song_text)['data'][0]['url']
response = requests.get(music_url, headers=self.headers)
with open(os.path.join(filepath, filename) + '.mp3', 'wb') as f:
f.write(response.content)
print("下载完毕!")
if __name__ == '__main__':
id_url = 'https://music.163.com/weapi/cloudsearch/get/web?csrf_token='# web?csrf_token=
# id_url = "https://music.xxx.com/weapi/song/enhance/player/url/v1?csrf_token="
song_url = 'https://music.163.com/weapi/song/enhance/player/url/v1?csrf_token='
id_d = {
"hlpretag": "<span class=\"s-fc7\">",
"hlposttag": "</span>",
"s": input("请输入歌名或歌手: "),
"type": "1",
"offset": "0",
"total": "true",
"limit": "30",
"csrf_token": ""
}
# id_d = {
# "ids":"3778678",
# "level":"standard",
# "encodeType":"aac",
# "csrf_token":""
# }
encrypt = EncryptText()
id_from_data = encrypt.resultEncrypt(str(id_d))
wyy = WangYiYunMusic()
id_text = wyy.get_html(id_url, method='POST', from_data=id_from_data)
info_list = wyy.parse_text(id_text)
while True:
input_index = eval(input("请输入要下载歌曲的序号(-1退出): "))
if input_index == -1:
break
download_info = info_list[input_index]
song_d = {
"ids": str([download_info[0]]),
"level": "standard",
"encodeType": "aac",
"csrf_token": ""
}
song_from_data = encrypt.resultEncrypt(str(song_d))
song_text = wyy.get_html(song_url, method='POST', from_data=song_from_data)
wyy.save_file(song_text, download_info)
运行结果:
会下载在目录下面的music文件里,真实能听