王者荣耀壁纸爬虫

原创 zayki 2022-05-01
346
import requests
from lxml import etree
import re
import os

URL="https://pvp.qq.com/web201605/herolist.shtml"

headers = {
    "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
}

if not os.path.exists("./王者荣耀英雄皮肤"):
    os.mkdir("./王者荣耀英雄皮肤")

#获取所有英雄地址
def all_url():
    response=requests.get(URL,headers=headers).text
    html=etree.HTML(response)
    href=html.xpath("//ul[@class='herolist clearfix']/li/a/@href")
    #print(href)
    down_picture(href)

def down_picture(href):
    for i in range(len(href)):
        url0="https://pvp.qq.com/web201605/"+href[i]
        response = requests.get(url0)
        response.encoding = "gbk"
        response = response.text
        src = re.findall('<div class="zk-con1 zk-con" style="background:url(.*?) center 0">', response)
        a = str(src)
        b = a.replace('["(', "")
        b = b.replace(')"]', "")
        href0 = b.replace("'", "")
        href1 = href0.replace("//", "")
        # print(href1)
        # print(len(href1))

        #获取图片名称
        html = etree.HTML(response)
        title0 = html.xpath('//div[@class="pic-pf"]//ul//@data-imgname')
        title0="".join(title0)
        a=title0.split("&")
        del a[len(a)-1]
        title="".join(a)
        title=title.split("0|")
        print(title)

        src = ""
        for i in range(0, len(href1)):
            if i != 67:
                src = src + href1[i]
            else:
                src = src + "{}"
        url = "https://" + src
        url = '"' + url + '"'
        # print(url)

        for i in range(1, 8):
            try:
                URL = url.format(i)
                # print(URL)
                url0 = URL.replace('"', "")
                response = requests.get(url0, headers=headers)  # 获取网页地址
                # print(response.status_code)
                if (response.status_code == 200):
                    with open("王者荣耀英雄皮肤/" +title[i-1]+".jpg", 'wb') as f:  # 下载文件
                        f.write(response.content)
            except:
                pass
        print("图片已下载完成")


if __name__ == '__main__':
    all_url()
python
「喜欢这篇文章，您的关注和赞赏是给作者最好的鼓励」
关注作者
王者荣耀壁纸爬虫

评论