2024-01-25用线程池下载图片

用线程池下载图片

写的过程中, 用alt + enter导入random包时,导成了random.random, 应该是random, 花了半天时间debug了。

import random
import time

import requests
import matt_utils.utils as matt
from lxml import etree
import os
from concurrent.futures import ThreadPoolExecutor


def makeDir(base_dir):
    if not os.path.exists(base_dir):
        os.mkdir(base_dir)
    return base_dir


def login():
    login_url = 'https://www.kelagirls.com/api/user/tologin'
    data = {
        "userName": matt.email,
        "pwd": matt.password
    }
    headers = {
        "Referer": "https://www.kelagirls.com/login",
        "User-Agent": matt.headers.get('User-Agent')
    }
    session = requests.session()
    session.get(url=login_url, data=data, headers=matt.headers)
    return session

num = 0
def get_pic_url_list(root_url, type_url, session):
    res = session.get(url=type_url).text
    # print(res)
    tree = etree.HTML(res)
    id_list = tree.xpath('//div[@class="album"]/div/@albumid')
    print(id_list)
    url_list = [f'https://www.kelagirls.com/albums/album-{i}.html' for i in id_list]
    pic_list=[]

    for i in enumerate(url_list):
        pic_tree = etree.HTML(session.get(i[1]).text)
        pic_part_url = pic_tree.xpath('//div[@id="gallery"]/img/@src')
        pic_url_list = [root_url + i for i in pic_part_url]
        for url in pic_url_list:
            global num
            pic_list.append((num,url))
            num = num + 1
        # for url in enumerate(pic_url):
        #     r = session.get(url[1])
        #     pic_name = os.path.join(base_dir, type + str(i[0]) + str(url[0]) + '.jpg')
        #     with open(pic_name, 'wb') as f:
        #         f.write(r.content)
        #         print(f"{pic_name} is downloaded.")
    print(pic_list)
    return pic_list


def download_pic(url,session,base_dir, beauty_type):
    print('i am downing')
    print(url)
    r = session.get(url[1])
    print(r.status_code)
    i = random.randint(1000,9999)
    print('base_dir,beauty_type')
    pic_name = os.path.join(base_dir, beauty_type + str(url[0]) + '.jpg')
    with open(pic_name, 'wb') as f:
        f.write(r.content)
        print(f"{pic_name} is downloaded.")


def main(beauty_type, type_url, dr):
    root_url = 'https://www.kelagirls.com'
    session = login()
    base_dir = makeDir(dr)

    ulist = get_pic_url_list(root_url, type_url, session)

    pool = ThreadPoolExecutor(max_workers=8)

    for url in ulist:
        pool.submit(download_pic,url,session,base_dir,beauty_type)
    pool.shutdown()

sports_url = 'https://www.kelagirls.com/albums-7.html#'
sexy_url = 'https://www.kelagirls.com/albums-1.html'
wetty_url = 'https://www.kelagirls.com/albums-3.html'
sch_url = 'https://www.kelagirls.com/albums-4.html'
curve_url = 'https://www.kelagirls.com/albums-6.html'
s_url = 'https://www.kelagirls.com/albums-2.html'
t = time.time()
main('wetty', wetty_url, 'wetty_url')
print(time.time() - t)

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容