我的代码
from bs4 import BeautifulSoup
import requests
import time
import urllib
url = 'https://knewone.com/discover?page='
def get_page(url,data=None):
wb_data = requests.get(url)
soup = BeautifulSoup(wb_data.text,'lxml')
imgs = soup.select('a.cover-inner > img')
download_links = []
folder_path = '/Users/zhoujiangfeng/Pictures/jandan/'
for img in imgs:
img_link = img.get('src')
download_links.append(img_link)
for item in download_links:
#使用urllib下载图片到本地
urllib.request.urlretrieve(item,folder_path + item[-21:-16])
print('done')
def get_more_pages(start,end):
for one in range(start,end):
get_page(url+str(one))
time.sleep(2)
get_more_pages(1,10)
总结
- 注意图片链接的信息,需以图片格式后缀名保存(截取相应位数)
- 使用urllib库的方法下载图片到本地