phantomjs 配置
macweb = webdriver.PhantomJS('/Users/apple/xinjiang/phantomjs-2.1.1-macosx/bin/phantomjs')
firefox 配置
browser = webdriver.Firefox('/Users/apple/xinjiang/phantomjs-2.1.1-macosx/bin/')
火狐浏览器 需要geckodriver 在统一文件夹下。
driver = webdriver.Chrome('/Users/apple/xinjiang/phantomjs-2.1.1-macosx/bin/chromedriver') 谷歌浏览器
新疆首页
http://xj.gsxt.gov.cn/sydq/loginSydqAction!sydq.dhtml
============================================
>>> from selenium import webdriver
>>> from selenium.webdriver.common.action_chains import ActionChains
>>> from selenium.webdriver.common.keys import Keys
>>> browser = webdriver.Firefox('/Users/apple/xinjiang/phantomjs-2.1.1-macosx/bin/')
>>> browser = webdriver.Firefox('/Users/apple/xinjiang/phantomjs-2.1.1-macosx/bin/')
>>> browser.get('http://xj.gsxt.gov.cn/sydq/loginSydqAction!sydq.dhtml')
>>> yy=browser.find_element_by_css_selector('#keyword_qycx')
>>> yy.click()
>>> yy.send_keys("新疆建设大厦".decode('utf-8'))
=====================
middleware
class middle (object):
def process_request(self,request,spider):
try:
driver=webdriver.PhantomJS()
driver.get(request.url)
time.sleep(10)
content=dirver.page_source.encode('utf-8')
url=driver.current_url.encode('utf-8')
print('xxxxxxxx')
if content== '':
logger.info('content is empty :503')
retrun HtmlResponse(request.url,encoding='utf-8',status=503,body='')
else:
logger.info('content get success:200')
print('ok')
return HtmlResponse(request.url,encoding='utf-8',status=503,body=content)
except Exception,e:
logger.warning(e)
logger.info('Exception content is empty :503')
retrun HtmlResponse(request.url,encoding='utf-8',status=503,body='')
time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))