欢迎光临UUpython
最大、最新、最全的Python代码收集站

爬取彼岸图的4K图片自动保存本地

1.加入进程池

import os.path
import random
import time
import requests
from lxml import etree
import threading
from multiprocessing import Pool
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
}
url = 'https://pic.netbian.com'

def huoqu(urll):

    print('我已经运行了')

    # 发送HTTP请求,获取页面内容
    respones = requests.get(urll, headers=headers)
    mg = respones.content.decode("gbk")
    tree = etree.HTML(mg)
    # 使用XPath选择器提取图片URL和相关信息
    img_url = tree.xpath('//ul[@class="clearfix"]//li')

    for a in img_url:
        c = a.xpath('./a//img/@alt')[0]
        img_mg = a.xpath('./a//img/@src')
        img_mgg = requests.get('https://pic.netbian.com' + ''.join(img_mg))
        if not os.path.exists('彼岸图网'):
            os.mkdir("彼岸图网")
        with open(f'彼岸图网/{c}.jpg', 'wb') as f:
            f.write(img_mgg.content)
            print(f'彼岸图网/{c}.jpg,保存成功')

def zongpage(url):
    # 发送HTTP请求,获取页面内容
    respones = requests.get(url, headers=headers)
    mg = respones.content.decode("gbk")
    tree = etree.HTML(mg)
    # 使用XPath选择器提取总页数
    page = tree.xpath('//div[@class="page"]/a/text()')
    zongpage = page[-2]
    print('总共:' + zongpage + '页')
    return zongpage

zongpagee = zongpage(url)

if __name__ == '__main__':
    threads = []
#Pool(5)可以调整进程数量  不填则按照CPU最大核心数来并发不填会有死机风险,最好不要超过10
    pool = Pool(5)
    for a in range(1, int(zongpagee)):
        if a == 1:
            # 第一页直接调用huoqu函数
            huoqu(url)
        else:
            uu = f'{url}/index_{a}.html'
            print(uu)
            threads.append(uu)

    for i in range(1, int(zongpagee)):
        # 创建线程并启动
        pool.map(huoqu,threads)
        pool.close()

        # 随机延迟一段时间
        time.sleep(random.randint(2, 5))

    for t in threads:
        # 等待所有线程完成
        pool.join()
    print("完事")
赞(0) 打赏
未经允许不得转载:UUpython » 爬取彼岸图的4K图片自动保存本地
分享到: 更多 (0)

评论 抢沙发

评论前必须登录!