python音乐爬虫程序代码QZQ

“”"
程序目标是下载当前页面中所有的歌曲。
通过接口下载: 接口链接 + 歌曲ID
“”"
import os
import time
import requests
from lxml import etree

1、定义请求头

headers = {
‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0’,
‘referer’:‘https://music.163.com/’,
‘cookie’:‘NMTID=00O9wrzyeYu-Zl77E51njSDBS79_kMAAAGTC73avQ; iuqxldmzr=32; ntes_nnid=36540996ca0ba892ca98daef16a2d549,1731068818823; ntes_nuid=36540996ca0ba892ca98daef16a2d549; WEVNSM=1.0.0; WNMCID=tchfyn.1731070382592.01.0; sDeviceId=YD-0WesaQzHYV9BRwABFVbTD0DfpiOk5T1a; __snaker__id=UOOF3K8fvakAWfOu; ntes_utid=tid..op0wLTfjBj1FV1BQFBeTC8U1ilfmWSIx..0; WM_TID=FqTsBp8LtwlBBQURQUPDG5RkzgO4CLHN; WM_NI=5LrKxO5oaJLrWgDumkS%2F%2BIN4ofb1pWXsI0xQr8rpcBBIR%2FScInXulbHxW9nr3tVkWbw8gej9N8csO%2BCjfPXMr7lhyU48064MB0ot7t4fOUPODEw2d%2BvgKDmiNzZzsd5Gelo%3D; WM_NIKE=9ca17ae2e6ffcda170e2e6ee83db6fa9989689f47aaf928ea3c15e969f9fadcb44fb8dbf92f772f18ca3d5ef2af0fea7c3b92af794beacfb45f7b1afabcf7a95b98c96f06191f1e5aacd40adbaaed2f35d9cad87d1fb5bb2e8be91e24a82979ed0d2698bb29896ef6a8ca899daed7af1ac9a8cd66ab5e8a986e9338ee8ad94ce2181b6aab1db47b2e9a489fc5ff8e800d7b74782f0a195b680f8ba81b6d345889bb9d3e721b6b985a5f0638ea6969bf270f799ab8dee37e2a3; gdxidpyhxdE=%2BKW6AZQOhTRnWhXVZN4SEVBDb%5C%5CoejIBniR%2BYyIrImLKpbGsWiJNcZVC%2Bg%5CH0KKdxqQlYevGHdnBmNvYw40fU6ftVlWO6EIo%2BiniaOTwyGifEC2O2soBMeLULIAg1y%2BMU4Cw%2B4cKcyW9DDemUpx9gsGsCpo5twe8xiUqq%2FIxuPAE89EK%3A1733294509812; __remember_me=true; __csrf=0c1747425d8f9e5f06b8ee16694dee0b; MUSIC_U=004F187009EEDC492D5AD8D2C7A5701C403DEDD531E32E3A1948638B372CA9326DC1B9B0F42ED92A9CC1691006E6475340E3A0019FB6AEC1EE5EE28D02706BA3CADC3F66B93F47A9A2DE8D3884FD15CCE507B48826C9CF65C537492F7D829BA60409B5B627F8A52B23128A1498C539AA3DE4A01D030AB87C221D3661808B67CC65267186FC3F4027D2848B2CA5DE558C63CE335B769F154185F54D8004E07FF3BED97E12B30D0D3929046E9B594157DBFD78124BB82A22E2EAB4E2FB2D86B7A1BCCD36C988D2609EA9EA399A0F1B5E275C3E3C84006AF208CD5516C83570E4BC757CBC97F220327C0C4D79A074E008FA7F6598A83A5595F7BB3B5ADAF456398135B43AEEFE42FFCAE5FE44591212317DF61E32AE7902ECA965DA5D2BE4E2EFBE1CE65A7C334A59E609E44DB8319219E779A71E15C6F43F991BE468FE513A0742D3F9C9ED9F502AA6174BFC41D7E062C253E1F155A074B1BAE97D09A2FCA39FE23B; ntes_kaola_ad=1; playerid=83094160; JSESSIONID-WYYY=%2BjHa7ltqOWX8QJtuiZ0sX7m%5CJH1%5CcpawDDkvTafISjbvasqX53H5qhHECo7FhBxVmm%2F45STUiI8zprl1Kn4riAdqPenMyyYzu79mA6TzxtMtCOBHGJYCAfxYGKfzas2zlMJx9Yp%2BIl1O5vj%2FyUrI7XwkFujzID2hrPqfsdumCYw1438i%3A1733301202799’
}

2、定义网易云歌单的网址(记得删除/#,否则请求到的不是真是的网页)

#url = ‘https://music.163.com/playlist?id=934870683’

2、定义网易云歌单的网址(记得删除/#,否则请求到的不是真是的网页)

#试用url = ‘https://music.163.com/playlist?id=8161158913’
#试用https://music.163.com/#/my/m/music/playlist?id=8161158913

#url=‘https://music.163.com/playlist?id=2153841685’
#https://music.163.com/#/my/m/music/playlist?id=2153841685

url=‘https://music.163.com/playlist?id=7625953680’
#url=‘https://music.163.com/#/my/m/music/playlist?id=7625953680’

3、发起网络请求,获取网页中的数据

#response = requests.get(url, headers=headers)

3、发起网络请求,获取网页中的数据

max_retries = 3 # 设置最大重试次数
for _ in range(max_retries):
try:
response = requests.get(url, headers=headers)
break # 请求成功则跳出循环
except requests.RequestException as e:
print(f"请求出现错误: {e},正在重试…")
time.sleep(2) # 重试前等待2秒
else:
print(“多次重试后仍无法获取网页数据,程序终止”)
raise SystemExit

print(response.text)

4、将网页的html数据(字符串)转换为树形结构

html = etree.HTML(response.text)

5、筛选目标数据(歌曲ID和名称)

music_label_list = html.xpath(‘//a[contains(@href,“/song?”)]’)

print(music_label_list)

可以使用os库来新建文件夹

if not os.path.exists(‘音乐’):
os.mkdir(‘音乐’)

6、对标签列表进行循环遍历,得到单首歌曲的标签信息

for music_label in music_label_list:
# 从单个的音乐标签中筛选出ID信息
href = music_label.xpath(‘./@href’)[0]
# print(‘href数据:’, href)

# 从ID信息中切割出ID的数字值
music_id = href.split('=')[1]
# print('ID数据:', music_id)

# 判断字符串中是否是数字(如果是数字结果才为真)
if music_id.isdigit():
    # 这是正确的歌曲ID
    print('ID数据:', music_id)

    # 提取出歌曲的名字
    music_name = music_label.xpath('./text()')[0]
    print('歌曲名字:', music_name)

    # 定义请求歌曲的链接
    music_url = 'http://music.163.com/song/media/outer/url?id=' + music_id

    # 发送网络请求获取歌曲数据
    response = requests.get(music_url, headers=headers)


    # 将歌曲数据保存到mp3文件中
    with open(f'./音乐/{music_name}.mp3', 'wb') as file:
        file.write(response.content)

    print(f'《{music_name}》下载成功......')

    # 下载一首歌后延时1秒
    time.sleep(1)

作者:EYYLTV

物联沃分享整理
物联沃-IOTWORD物联网 » python音乐爬虫程序代码QZQ

发表回复