import requests
import re
import time
from multiprocessing import Pool
time1 = time.strftime("%H:%M:%S").replace(':','')
def get_baidu_dasou(keyword ='剑圣'):
final_longtaikeywords=[]
try:
url1 = 'http://suggestion.baidu.com/su?json=1&p=3?wd=' + keyword
url2 = 'http://suggestion.baidu.com/su?sugmode=2&json=1&wd=' + keyword
reslt_baidu_dasou1 = requests.get(url1).text
reslt_baidu_dasou2 = requests.get(url2).text
kws = re.compile(r'"st": \{ "q": "(.*?)", "new":')
kws_list_dasou1 = kws.findall(reslt_baidu_dasou1)
kws_list_dasou2 = kws.findall(reslt_baidu_dasou2)
for kws1 in kws_list_dasou1:
final_longtaikeywords.append(kws1)
for kws2 in kws_list_dasou2:
final_longtaikeywords.append(kws2)
print(str(keyword)+':'+str(final_longtaikeywords))
return (final_longtaikeywords) #返回的是数列
except Exception as e:
time.sleep(5)
print(e)
def get_sougou(keyword ='剑圣'):
final_longtaikeywords=[]
try:
urlsougou = 'https://www.sogou.com/suggnew/ajajjson?key='+ keyword+'&type=web&ori=yes&pr=web'
reslt_sougou_dasou1 = requests.get(urlsougou).text
reslt_sougou_dasou1=reslt_sougou_dasou1.split('[')[2] #获得中间的长尾词结果
kws = re.compile(r'"(.*?)",')
kws_list_dasou1 = kws.findall(reslt_sougou_dasou1)
for kws1 in kws_list_dasou1:
final_longtaikeywords.append(kws1)
print(str(keyword)+':'+str(final_longtaikeywords))
return (final_longtaikeywords) #返回的是数列
except Exception as e:
time.sleep(5)
print(e)
def get_360(keyword ='剑圣'):
final_longtaikeywords=[]
try:
url360 = 'http://sug.so.360.cn/suggest?encodein=utf-8&encodeout=utf-8&format=json&fields=word,obdata&word='+ keyword
reslt_360_dasou1 = requests.get(url360).text
kws = re.compile(r'\{"word":"(.*?)"},')
kws_list_dasou1 = kws.findall(reslt_360_dasou1)
for kws1 in kws_list_dasou1:
kws1 = re.sub('","obdata":"{.*}', '', kws1)
final_longtaikeywords.append(kws1)
print(str(keyword)+':'+str(final_longtaikeywords))
return (final_longtaikeywords) #返回的是数列
except Exception as e:
time.sleep(5)
print(e)
def read(startpath,zm1s,resultpath):
final_longtais=[]
last_longtais=[]
with open(startpath,'r+',encoding='utf-8') as f:
kwsline = f.readlines()
for kw in kwsline:
kw = kw.strip() # 去除空行
kw = kw.replace('\n','') #替换最后一行
for zm1 in zm1s:
print('百度搜索:'+kw+zm1)#是否还要判断
final_longtais.append(get_baidu_dasou(kw+zm1))
print('百度搜索:' + zm1+ kw)
final_longtais.append(get_baidu_dasou(zm1+kw))#其他搜索引擎
print('搜狗搜索:' + kw + zm1)
final_longtais.append(get_sougou(kw + zm1))#搜狗不做
print('360搜索:' + kw + zm1)
final_longtais.append(get_360(kw + zm1))
print('360搜索:' + zm1+ kw)
final_longtais.append(get_360(zm1+ kw))
# 形成新的数列
if final_longtais!=None:
for final_longtai in final_longtais: #第一层数列
if final_longtai != None:
for each_word in final_longtai: #数列中的数列
last_longtais.append(each_word)
# 对新数列去重
last_longtais2 = list(set(last_longtais))#去重
last_longtais2.sort(key=last_longtais.index)#按原来的排序
print(last_longtais2)
print(len(last_longtais2))
with open(resultpath,'a+',encoding='utf-8') as text:
for last_longtai in last_longtais2:
text.write(last_longtai+'\n')
#写进数据库
def final(startpath1):
start = time.clock()
resultpath= r'C:/Users/Administrator/Desktop/result/xiala/'+time1+'resultkeywords.txt'
finalpath = r'C:/Users/Administrator/Desktop/result/xiala/'+time1+'finalkeywords.txt'
lastpath = r'C:/Users/Administrator/Desktop/result/xiala/' + time1 + 'lastpath.txt'
zm1s=['',' ','a','b','c','d','e','f','g','h','j','k','l','m','n','o','p','q','r','s','t','w','x','y','z','0','1','2','3','4','5','6','7','8','9']
#zm1s = ['']
zm2s=['']
zm2s = ['', ' ']
lastlist=[]
read(startpath1,zm1s,resultpath)
read(resultpath,zm2s,finalpath)
with open(resultpath, 'r', encoding='utf-8') as text: #第一个结果
kwsline = text.readlines()
for kw in kwsline:
kw = kw.strip() # 去除空行
lastlist.append(kw)
with open(finalpath, 'r', encoding='utf-8') as text: #第二个结果
kwsline = text.readlines()
for kw in kwsline:
kw = kw.strip() # 去除空行
lastlist.append(kw)
lastlist2 = list(set(lastlist)) # 去重
lastlist2.sort(key=lastlist.index) # 按原来的排序
print(len(lastlist2))
with open(lastpath, 'a+', encoding='utf-8') as text: #第三个结果
for singlelastlist in lastlist2:
text.write(singlelastlist + '\n')
end = time.clock()
print("read: %f s" % (end - start))
final(startpath1=r'C:\Users\Administrator\Desktop/result\xiala\testforkeywords.txt')
根据搜索引擎下拉框,扩展关键词(单)
最后编辑于 :
©著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
推荐阅读更多精彩内容
- 成果 并没有处理好循环中的重复采集问题,实际上是不能用于多进程的根据关键词txt(一行一个关键词),以下拉框为标准...
- 今天我要给带给你的是一个小技术的营销策略,就是如何如何让你的品牌关键词持续曝光在搜索引擎的下拉框中? 各位,你们有...
- 10:04 2013/11/20 星期三 by依山居 userscripts可以称为用户脚本,userscript...