100字范文,内容丰富有趣,生活中的好帮手!
100字范文 > Python 爬虫实战(三) 获取百度地图搜索结果

Python 爬虫实战(三) 获取百度地图搜索结果

时间:2019-12-07 00:46:35

相关推荐

Python 爬虫实战(三) 获取百度地图搜索结果

百度地图其实有个API给开发者使用/index.php?title=webapi/place-suggestion-api

但是呢,有一些缺点

1、需要申请ak码才能使用

2、搜索结果比百度地图的搜索结果少,有一些地址在API中根本搜索不到

所以呢,还是自己重新写了一个百度地图爬虫

map_web.py爬虫

# -*- coding: utf-8 -*-"""Created on Tue Aug 20 15:22:02 @author: Eric"""import requestsimport refrom map_class import Addressdef search_params(query, city, page=0):# 设置搜索的请求信息parameter = {'newmap': 1,'reqflag': 'pcmap','biz': 1,'from': 'webmap','da_par': 'direct','pcevaname': 'pc4.1','qt': 's','da_src': 'searchBox.button','wd': query,'wd2': '' , 'c': city,'src': 0,'pn': page,'sug': 0,'db': 0,#'l': '11','addr': 0,'biz_forward': {"scaler":1,"styles":"pl"},'from': 'webmap','auth': '2dZB4vFJNWZ8@9fL6v99La95@FOJRvx=uxHLLBNVLLztComRB199Ay1uVt1GgvPUDZYOYIZuEt2gz4yYxGccZcuVtPWv3GuRBtR9KxXwUvhgMZSguxzBEHLNRTVtcEWe1GD8zv7ucvY1SGpuxxti0XEI=1mDLYClnDjnCENRRHN@Z@EBfiKKvCMuGllhIQT','device_ratio': 1,'tn': 'B_NORMAL_MAP','nn': page*10,'ie': 'utf-8','t': '1566370557403'}return parameterdef reduce(stri):# 获取list结果stack = []str2 = ''flag = Falsefor i in range(0, len(stri)):if stri[i] == '{':stack.append(stri[i])flag = Trueelif stri[i] == '}' and flag:stack.pop(-1)if flag:str2 = str2+stri[i]if not len(stack) and flag:breakif str2 == '' :return Falsereturn str2def search(query, city, findall=False, debug=False):"""findall=True 代表获取所有搜索结果findall=False 代表获取第一页搜索结果"""try:# 访问网址url = '/'parameter = search_params(query, city, 0)response = requests.get(url, params=parameter)response.encoding = 'unicode_escape' # 转码text = response.text.replace(' ','') total = int(re.findall('total":(.*?),', text)[0]) # 获取结果数量if findall: max_page = (total//10) + 1 # 结果最大页数else: max_page = 1 results_list = []for i in range(1, max_page+1):parameter = search_params(query, city, page=i-1)response = requests.get(url, params=parameter)response.encoding = 'unicode_escape'text = response.text.replace(' ','')xx1 = text.split(',"content":')xx2 = xx1[-1].split(',"current_city"')info = xx2[0]result = reduce(info)while result:results_list.append(result)info = info.replace(result, '')result = reduce(info)addresses = []for result in results_list:address = Address()address.fill(result)addresses.append(address)if debug: print(address)except:return []return addressesresults_list = search('文峰', 289) #city为地区码,可以在百度搜索那里查看

map_class.py通过正则匹配需要的信息

# -*- coding: utf-8 -*-"""Created on Tue Aug 20 16:01:41 @author: win 10"""import reclass Address():def __init__(self):self.name = '' # 地址名self.addr = '' # 地址self.area = '' # 区self.city = '' # 市self.tag = '' # 标签self.prov = '' # 省def fill(self, info):# 摘取信息name = re.findall('"geo_type":.*,"name":"(.*?)","navi_update_time', info)if name: self.name = name[0]tag = re.findall('std_tag":"(.*?)"', info)if tag and tag[0] != '': self.tag = tag[0]else: tag = re.findall('di_tag":"(.*?)"', info)if tag: self.tag = tag[0]addr = re.findall('addr":"(.*?)"', info)if addr: self.addr = addr[0]area = re.findall('area_name":"(.*?)","city_id', info)if area: self.area = area[0]city = re.findall('city_name":"(.*?)"',info)if city: self.city = city[0]prov = re.findall('\[(.*?)\(.*\|', info)if prov: self.prov = prov[0]def __str__(self):string = self.name+'\n'+self.addr+'\n'+self.prov+self.city+self.area+'\n'+self.tagreturn string

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。