import requests
import re
import json
from lxml import etree
import urllib.parse
import urllib
import pymysql,random,time
conn=pymysql.connect(host='127.0.0.1',user='root',password='bc123',db='leshop',charset='utf8')
cur=conn.cursor()
header = {
"User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
}
def qingqiu(url):
response = requests.get(url,headers=header)
response.encoding = 'utf-8'
# print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//*[@id="pcUL"]/dl[1]/dd/div/ul')
# print(b)
for i in b:
jiexi = i.xpath('.//li[1]/p')
for w in jiexi:
ww = w.xpath('.//a/text()')
# print(ww)
qq = w.xpath('.//a/@href')
for e in qq:
url = urllib.parse.urljoin(response.url,e)
# print(url)
yuedxq(url)
# yield yuedxq(url)
def yuedxq(url):
# print(url)
response = requests.get(url,headers=header)
# print(response)
response.encoding = 'utf-8'
# print(response.status_code)
a= etree.HTML(response.text)
v = a.xpath('//*[@id="5"]/a[2]/@title')
for rr in v:
ee = rr
d = a.xpath('//*[@id="listShowStyleBody"]/li/div[3]/div[1]/p/em/text()')
for y in d:
wr = y
# print(d)
b = a.xpath('//*[@id="listShowStyleBody"]/li/div[2]/h4/a/@title')
for qw in b:
we = qw
# print(we)
c = a.xpath('//*[@id="listShowStyleBody"]/li/div[1]/a/img/@src')
# a=random.randint(0,1000)
# name = models.CharField(magth=100, verbose_name="商品名")
# sql='''insert into goods_goods(goods_sn,name,click_num,sold_num,fav_num,goods_num,market_price,shop_price,goods_brief,goods_desc,ship_free,is_new,is_hot,add_time,category_id) values(0,%s,0,0,0,0,0,0,0,0,True,False,False,%s,%s)'''
# s=time.time()
sql='''insert into goods_goodscategory(id,name,code,`desc`,category_type,is_tab,add_time,parent_category_id) values(0,%s,1,0,3,1,%s,1)'''
s= time.strftime('%Y-%m-%d',time.localtime(time.time()))
ss=random.randint(20,106)
# cur.execute(sql,(ee,s,ss))
cur.execute(sql,(ee,s))
conn.commit()
if __name__ == '__main__':
url = "http://xjbh.net/index.html"
qingqiu(url)
杂类爬取香江百货 硬存数据库
©著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
推荐阅读更多精彩内容
- 这是全部的调试过程,我已经整理成为笔记,这里分享给大家:python爬取豆瓣两千万图书简介信息:(一)目标API分...
- 本人长期出售超大量微博数据、旅游网站评论数据,并提供各种指定数据爬取服务,Message to YuboonaZh...
- talk is cheap,show you the code 'use strict'; const http ...
- 'use strict'; const http = require('http'); const cheerio...