学习Python爬虫(七)--Scrapy模拟登录的post模拟登陆后,自己写了模拟登陆知乎首页的代码。
from scrapy.spiders import CrawlSpider
from scrapy.selector import Selector
from scrapy.http import Request,FormRequest
class zhihu_login(CrawlSpider):
name = 'zhihu'
allowed_domains = ['www.zhihu.com']
start_urls = ['https://www.zhihu.com/#signin']
#获得_xsrf属性
def parse(self, response):
cel = Selector(response)
_xsrf = cel.xpath('//html/body/input[@name="_xsrf"]/@value').extract()[0]
print(_xsrf)
password = '***' #密码
captcha_type = 'cn'
phone_num = '***' #电话号码
Formdata = {'_xsrf':_xsrf,
'password':password,
'captcha_type':captcha_type,
'phone_num':phone_num
}
#post访问豆瓣登陆url
return FormRequest.from_response(response,formdata=Formdata,callback=self.after_login,dont_filter = True)
#对访问后的页面进行解析
def after_login(self,response):
cel = Selector(response)
print(response.body())
测试后发现无效