需要引入的模块
#!/usr/bin/env python3
#-*-encoding:utf-8-*-
from urllib import request,parse
import re
import os
import time
url请求
def open_url(url):
str1 = request.Request(url)
response = request.urlopen(str1)
html = response.read().decode('gbk')
return html
获取详情页url
def get_urls():
url = 'http://www.mm131.com/'
html = open_url(url)
url_pattern = re.compile(r'<a target="_blank" href="http://www.mm131.com/(.*?)">')
item1 = re.findall(url_pattern,html)
return item1
保存
def save(urls):
for each in list(range(len(urls))):
url = 'http://www.mm131.com/'+urls[each]
detail_html = open_url(url)
time.sleep(5)
main_img_pattern = re.compile(r'<img alt="(.*?)" src="(.*?)"')
img_title = re.findall(main_img_pattern,detail_html)
page_pattern = re.compile(r'<span class="page-ch">共(.*?)页</span>')
page_total = re.findall(page_pattern,detail_html)
title_pattern = re.compile(r'<h5>(.*?)<\h5>')
#标题名
titles = re.findall(title_pattern,detail_html)
#创建文件夹
if os.path.exists('/home/yzw/mm131/'+titles[0]):#是否存在
os.chdir(titles[0])
else:
try:
os.mkdir('/home/yzw/mm131/'+titles[0])
os.chdir('/home/yzw/mm131/'+titles[0])
except:
continue
for i in range(int(page_total[0])):
if i==0:
next_url = url
else:
num = i+1
a = urls[each][0:-5]
next_url = 'http://www.mm131.com/'+str(a)+'_'+str(num)+'.html'
every_html = open_url(next_url)
every_img_pattern = re.compile(r'<img alt="(.*?)" src="(.*?)"')
img_title = re.findall(every_img_pattern,every_html)
print (img_title)
#标题名
try:
title = img_title[0][0]
#图片
img = img_title[0][1]
request.urlretrieve(img,title)
except:
continue
time.sleep(5)
开始抓取
urls = get_urls()
title = 'mm131'
os.mkdir(title)
os.chdir(title)
save(urls)
抓取结果
.
.
.
[('纹身小妹夏美酱酥胸半露诱惑十足(图45)', 'http://img1.mm131.com/pic/2274/45.jpg')]
[('纹身小妹夏美酱酥胸半露诱惑十足(图46)', 'http://img1.mm131.com/pic/2274/46.jpg')]
[('纹身小妹夏美酱酥胸半露诱惑十足(图47)', 'http://img1.mm131.com/pic/2274/47.jpg')]
[('纹身小妹夏美酱酥胸半露诱惑十足(图48)', 'http://img1.mm131.com/pic/2274/48.jpg')]
[('纹身小妹夏美酱酥胸半露诱惑十足(图49)', 'http://img1.mm131.com/pic/2274/49.jpg')]
[('纹身小妹夏美酱酥胸半露诱惑十足(图50)', 'http://img1.mm131.com/pic/2274/50.jpg')]
[('纹身小妹夏美酱酥胸半露诱惑十足(图51)', 'http://img1.mm131.com/pic/2274/51.jpg')]
[('美护士沈梦瑶制服写真大胆张腿很诱惑(图1)', 'http://img1.mm131.com/pic/2746/1.jpg')]
[('美护士沈梦瑶制服写真大胆张腿很诱惑(图2)', 'http://img1.mm131.com/pic/2746/2.jpg')]
[('美护士沈梦瑶制服写真大胆张腿很诱惑(图3)', 'http://img1.mm131.com/pic/2746/3.jpg')]
[('美护士沈梦瑶制服写真大胆张腿很诱惑(图4)', 'http://img1.mm131.com/pic/2746/4.jpg')]
.
.
.