一、settings.py
- 添加一个代理地址池
# 定义一个字段,表示我们收集好的代理
IPPOOL = [
{"ip":"113.16.160.101:8118"},
{"ip":"119.29.119.64:8080"},
{"ip":"202.112.237.102:3128"},
{"ip":"119.31.210.170:7777"},
{"ip":"183.129.207.83:10800"},
{"ip":"183.129.207.73:14823"}
]
- 开启中间件,并注册代理中间件
DOWNLOADER_MIDDLEWARES = {
"scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware":123,
'IPPOOLDemo.middlewares.IPPOOLS': 125
}
二、middlewares.py
清空重写
- 从settings文件中导入IPOOl
from .settings import IPPOOL
- 导入官方文档对应的HttpProxyMiddleware
from scrapy.contrib.downloadermiddleware.httpproxy import HttpProxyMiddleware
# 创建一个代理中间件类集成自官方代理中间件
class IPPOOLS(HttpProxyMiddleware):
# 重写初始化方法
def __init__(self,ip=''):
self.ip = ip
# 重写请求处理方法
def process_request(self, request, spider):
# 从ip代池中随机挑选一个ip地址
current_ip = random.choice(IPPOOL)
print("当前代理ip是:",current_ip["ip"])
# 设置请求对象的代理服务器是当前ip
request.meta["proxy"] = "https://" + current_ip["ip"]
# 此时就可以把代理ip植入到下载器中