存储数据至mongoDB中
在 pipelines.py 中创建对应的管道类
在settings 中写好指定的数据
class QuotesMongoPipelin:
def init(self, local, port, username, password, mongo_db, mongo_collection):
self.local = local
self.port = port
self.username = username
self.pwd = password
self.mongodb = mongo_db
self.collection = mongo_collection
self.connection = pymongo.MongoClient(host=self.local, port=self.port, username=self.username,
password=self.pwd)
db = self.connection[self.mongodb]
self.connection = db[self.collection]
该方法通过crawler 对象在settings中读取数据
@classmethod
def from_crawler(cls, crawler):
return cls(
local=crawler.settings.get('MONGO_LOCA'),
port=crawler.settings.get('MONGO_PORT'),
username=crawler.settings.get('USERNAME'),
password=crawler.settings.get('PASSWORD'),
mongo_db=crawler.settings.get('MONGO_DB'),
mongo_collection=crawler.settings.get('MONGO_COLLECTION')
)
def close_spider(self, spider):
self.collection.close()
def process_item(self, item, spider):
valid = True
for data in item:
if not data:
valid = False
raise DropItem("missing{0}".format(data))
if valid:
self.connection.insert(dict(item))
return item
最后在settings.py中激活该管道即可