python 爬取樱花动漫视频并保存
项目说明
- 目标站点:https://www.yinghuadongman.cn/
- 具有自动去重功能:不会出现重复下载同一个视频的情况
- 遇到视频下载有障碍,支持自动换源
- 程序会根据用户输入的orig_url通过网页解析自动补全其余集数url
- 使用之前创建一个文件夹,用于存放视频文件(我的是‘E/动漫视频’,根据需要自行修改)
- 将代码封装到了一个class中,便于后期维护(如果有必要的话)
代码
DongMan.py
import os
import time
import requests
from bs4 import BeautifulSoup
from selenium.webdriver import Chrome
from selenium.webdriver import ChromeOptions
from selenium.webdriver.common.by import By
class YinHuaDongMan():
#将初始url扩充为整个系列视频url的集合
def GetUrlList(self,url):
resp = requests.get(url=url)
bgn_soup = BeautifulSoup(resp.text,'html.parser')
items = bgn_soup.find('div',attrs={'class':'module-play-list-content module-play-list-base'}).find_all('a')
url_list=[]
for item in items:
url_list.append('https://www.yinghuadongman.cn'+item.get('href'))
return url_list
#得到url中视频文件的url
def get_link(self,url):
options = ChromeOptions()
options.add_argument('--headless')
options.add_argument('--disable-cpu')
driver = Chrome(options=options)
driver.get(url = url)
driver.switch_to.frame(driver.find_element(by=By.XPATH,value='''//*[@id="playleft"]/iframe'''))
time.sleep(1)
a = driver.page_source
driver.close()
soup = BeautifulSoup(a,'html.parser')
return (soup.find('video').get('src'),a)
#下载视频
def downloading(self,link,id,file_name):
resp2=requests.get(link)
with open(f'E:\动漫视频\\{file_name}\\{id}.mp4','wb') as fp:
fp.write(resp2.content)
print(f"下载完成\n")
#换源
def HuanYuan(self, html):
soup = BeautifulSoup(html, 'html.parser')
try:
spare_url = soup.find('a', attrs={'data-hash': 'slide{1}'})['href']
return 'https://www.yinghuadongman.cn' + spare_url
except:
return 0
def main(self,orig_url,file_name):
try:
os.mkdir(f'E:\动漫视频\\{file_name}')
except:
print("文件夹已存在\n")
url_list = self.GetUrlList(orig_url)
for url in url_list:
id = url.split('-')[-1].replace('.html', '')
if os.path.exists(f'E:\动漫视频\\{file_name}\\{id}.mp4'):
print("文件存在")
else:
try:
link,html = map(str,self.get_link(url=url))
print(f"link='{link}'")
print("-获取到link", end=" ")
self.downloading(link=link, id=id, file_name=file_name)
except:
spare_url = self.HuanYuan(html=html)
if spare_url==0:
print("没有可用的其他源")
else:
link = self.get_link(url=spare_url)[0]
print(f"link='{link}'")
print("-获取到link", end=" ")
self.downloading(link=link, id=id, file_name=file_name)
class AgeDongMan():
pass
在DongMan.py的同级目录创建start.py,内容如下
from DongMan import YinHuaDongMan
orig_url = input("orig_url=")
#示例输入: https://www.yinghuadongman.cn/play_tpH8888H-2-2.html
file_name = input("file_name=")
#示例输入:爆笑虫子
handle=YinHuaDongMan()
handle.maDongManin(orig_url=orig_url,file_name=file_name)
运行start.py即可
成果展示
作者:都市最强牛爷爷