问题:
requests下载图片太麻烦了,虽然urlretrieve可以一步搞定,但是我想统一使用requests库。注释部分就是用requests写的。
from urllib.request import urlretrieve
import requests
from bs4 import BeautifulSoup
r = requests.get("http://www.pythonscraping.com")
bs = BeautifulSoup(r.text)
image = bs.find("a", {"id": "logo"}).find("img")["src"]
urlretrieve(image, "logo.jpg")
# ir = requests.get(image, stream=True)
# if ir.status_code == 200:
# with open('logo.jpg', 'wb') as f:
# for chunk in ir:
# f.write(chunk)
回答
import requests
from bs4 import BeautifulSoup
r = requests.get("http://www.pythonscraping.com")
bs = BeautifulSoup(r.text,'lxml')
image = bs.find("a", {"id": "logo"}).find("img")["src"]
ir = requests.get(image)
if ir.status_code == 200:
open('logo.jpg', 'wb').write(ir.content)
或者用正则
import re, requests
r = requests.get("http://www.pythonscraping.com")
p = re.compile(r'<a[^>]*?id="logo"[^<]*?<img[^>]*?src="([^"]*)')
image = p.findall(r.text)[0]
ir = requests.get(image)
sz = open('logo.jpg', 'wb').write(ir.content)
print('logo.jpg', sz,'bytes')