环境
Python3.5.1
Windows 10
直接上代码
# -*- coding:utf-8 -*-
from requests import Session
from bs4 import BeautifulSoup
import json
from PIL import Image
# zhihu主站
baseurl = "https://www.zhihu.com"
# 使用email登录url
email_login = "https://www.zhihu.com/login/email"
# 通过httpbin测试post请求格式
testurl = "https://httpbin.org/post"
email = "123@123.com"
password = "123456"
# session可以跨请求保存和传递cookies
s = Session()
# 初始化header
# 修改User-Agent
s.headers.update(
{"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" +
" AppleWebKit/537.36 (KHTML, like Gecko)" +
" Chrome/51.0.2704.106 Safari/537.36"})
# 后续POST表单需要xsrf,GET和登录时不需要
'''
soup = BeautifulSoup(s.get(baseurl).text, "lxml")
xsrf = soup.input["value"]
print(xsrf)
'''
form_data = dict(
password=password,
email=email,
remember_me="true")
# _xsrf=xsrf)
# 获取验证码
with open("cap.gif", "wb") as f:
f.write(s.get(
baseurl + "/captcha.gif?"+ "type=login").content)
img = Image.open("cap.gif")
img.show()
# 手动输入验证码
captcha = input("输入验证码: ")
form_data["captcha"] = captcha
# POST登录
r = s.post(email_login, data=form_data)
rjson = json.loads(r.text)
print(rjson["msg"])
# 获取昵称
r = s.get("https://www.zhihu.com/settings/profile")
soup = BeautifulSoup(r.text, "lxml")
rename_section = soup.find(id="rename-section")
print("你的昵称: " + rename_section.span.string)
输出结果
d:\workspace\test>python requests_test.py
输入验证码: emvx
登录成功
你的昵称: ***