用线程池下载图片
写的过程中, 用alt + enter导入random包时,导成了random.random, 应该是random, 花了半天时间debug了。
import random
import time
import requests
import matt_utils.utils as matt
from lxml import etree
import os
from concurrent.futures import ThreadPoolExecutor
def makeDir(base_dir):
if not os.path.exists(base_dir):
os.mkdir(base_dir)
return base_dir
def login():
login_url = 'https://www.kelagirls.com/api/user/tologin'
data = {
"userName": matt.email,
"pwd": matt.password
}
headers = {
"Referer": "https://www.kelagirls.com/login",
"User-Agent": matt.headers.get('User-Agent')
}
session = requests.session()
session.get(url=login_url, data=data, headers=matt.headers)
return session
num = 0
def get_pic_url_list(root_url, type_url, session):
res = session.get(url=type_url).text
# print(res)
tree = etree.HTML(res)
id_list = tree.xpath('//div[@class="album"]/div/@albumid')
print(id_list)
url_list = [f'https://www.kelagirls.com/albums/album-{i}.html' for i in id_list]
pic_list=[]
for i in enumerate(url_list):
pic_tree = etree.HTML(session.get(i[1]).text)
pic_part_url = pic_tree.xpath('//div[@id="gallery"]/img/@src')
pic_url_list = [root_url + i for i in pic_part_url]
for url in pic_url_list:
global num
pic_list.append((num,url))
num = num + 1
# for url in enumerate(pic_url):
# r = session.get(url[1])
# pic_name = os.path.join(base_dir, type + str(i[0]) + str(url[0]) + '.jpg')
# with open(pic_name, 'wb') as f:
# f.write(r.content)
# print(f"{pic_name} is downloaded.")
print(pic_list)
return pic_list
def download_pic(url,session,base_dir, beauty_type):
print('i am downing')
print(url)
r = session.get(url[1])
print(r.status_code)
i = random.randint(1000,9999)
print('base_dir,beauty_type')
pic_name = os.path.join(base_dir, beauty_type + str(url[0]) + '.jpg')
with open(pic_name, 'wb') as f:
f.write(r.content)
print(f"{pic_name} is downloaded.")
def main(beauty_type, type_url, dr):
root_url = 'https://www.kelagirls.com'
session = login()
base_dir = makeDir(dr)
ulist = get_pic_url_list(root_url, type_url, session)
pool = ThreadPoolExecutor(max_workers=8)
for url in ulist:
pool.submit(download_pic,url,session,base_dir,beauty_type)
pool.shutdown()
sports_url = 'https://www.kelagirls.com/albums-7.html#'
sexy_url = 'https://www.kelagirls.com/albums-1.html'
wetty_url = 'https://www.kelagirls.com/albums-3.html'
sch_url = 'https://www.kelagirls.com/albums-4.html'
curve_url = 'https://www.kelagirls.com/albums-6.html'
s_url = 'https://www.kelagirls.com/albums-2.html'
t = time.time()
main('wetty', wetty_url, 'wetty_url')
print(time.time() - t)