python 爬取樱花动漫视频并保存

项目说明

  1. 目标站点:https://www.yinghuadongman.cn/
  2. 具有自动去重功能:不会出现重复下载同一个视频的情况
  3. 遇到视频下载有障碍,支持自动换源
  4. 程序会根据用户输入的orig_url通过网页解析自动补全其余集数url
  5. 使用之前创建一个文件夹,用于存放视频文件(我的是‘E/动漫视频’,根据需要自行修改)
  6. 将代码封装到了一个class中,便于后期维护(如果有必要的话)

代码

DongMan.py 

import os
import time
import requests
from bs4 import BeautifulSoup
from selenium.webdriver import Chrome
from selenium.webdriver import ChromeOptions
from selenium.webdriver.common.by import By


class YinHuaDongMan():
    #将初始url扩充为整个系列视频url的集合
    def GetUrlList(self,url):
        resp = requests.get(url=url)
        bgn_soup = BeautifulSoup(resp.text,'html.parser')
        items = bgn_soup.find('div',attrs={'class':'module-play-list-content module-play-list-base'}).find_all('a')
        url_list=[]
        for item in items:
            url_list.append('https://www.yinghuadongman.cn'+item.get('href'))
        return url_list


    #得到url中视频文件的url
    def get_link(self,url):
        options = ChromeOptions()
        options.add_argument('--headless')
        options.add_argument('--disable-cpu')
        driver = Chrome(options=options)
        driver.get(url = url)
        driver.switch_to.frame(driver.find_element(by=By.XPATH,value='''//*[@id="playleft"]/iframe'''))
        time.sleep(1)
        a = driver.page_source
        driver.close()
        soup = BeautifulSoup(a,'html.parser')
        return (soup.find('video').get('src'),a)



    #下载视频
    def downloading(self,link,id,file_name):
        resp2=requests.get(link)
        with open(f'E:\动漫视频\\{file_name}\\{id}.mp4','wb') as fp:
            fp.write(resp2.content)
        print(f"下载完成\n")


    #换源
    def HuanYuan(self, html):
        soup = BeautifulSoup(html, 'html.parser')
        try:
            spare_url = soup.find('a', attrs={'data-hash': 'slide{1}'})['href']
            return 'https://www.yinghuadongman.cn' + spare_url
        except:
            return 0


    def main(self,orig_url,file_name):

        try:
            os.mkdir(f'E:\动漫视频\\{file_name}')
        except:
            print("文件夹已存在\n")

        url_list = self.GetUrlList(orig_url)
        for url in url_list:
            id = url.split('-')[-1].replace('.html', '')
            if os.path.exists(f'E:\动漫视频\\{file_name}\\{id}.mp4'):
                print("文件存在")
            else:
                try:
                    link,html = map(str,self.get_link(url=url))
                    print(f"link='{link}'")
                    print("-获取到link", end=" ")
                    self.downloading(link=link, id=id, file_name=file_name)
                except:
                    spare_url = self.HuanYuan(html=html)
                    if spare_url==0:
                        print("没有可用的其他源")
                    else:
                        link = self.get_link(url=spare_url)[0]
                        print(f"link='{link}'")
                        print("-获取到link", end=" ")
                        self.downloading(link=link, id=id, file_name=file_name)
                
class AgeDongMan():
    pass




在DongMan.py的同级目录创建start.py,内容如下

from DongMan import YinHuaDongMan

orig_url = input("orig_url=")
#示例输入: https://www.yinghuadongman.cn/play_tpH8888H-2-2.html
file_name = input("file_name=")
#示例输入:爆笑虫子
handle=YinHuaDongMan()

handle.maDongManin(orig_url=orig_url,file_name=file_name)

运行start.py即可

成果展示

作者:都市最强牛爷爷

物联沃分享整理
物联沃-IOTWORD物联网 » python 爬取樱花动漫视频并保存

发表回复