java爬取百度地图矢量图_百度地图爬取数据 - osc_bg1xj59w的个人空间 - OSCHINA - 中文开源技术交流社区...

# -*- coding:utf-8 -*-import requestsimport reimport xlwtimport demjsonimport timeimport jsonclass get_location():def __init__(self):self.i = 0self.li = []self.dict1 = {}self.li_ak = 'U0QGae7viQsN0yLB

weixin_39542043

677人浏览 · 2021-02-28 06:04:37

weixin_39542043 · 2021-02-28 06:04:37 发布

# -*- coding:utf-8 -*-

import requests

import re

import xlwt

import demjson

import time

import json

class get_location():

def __init__(self):

self.i = 0

self.li = []

self.dict1 = {}

self.li_ak = 'U0QGae7viQsN0yLBirGsRD90XI0tlcGO'

self.headers = {

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36',

'Cookie': 'JSESSIONID=15BF07D8D4F4515A5C2247D5606AECB2'

}

self.f = xlwt.Workbook(encoding='utf-8')

self.sheet1 = self.f.add_sheet(u'sheet1', cell_overwrite_ok=True)

def get_html(self, url):

'''

请求

:param url:

:return:

'''

#print(url)

try:

res = requests.get(url, headers=self.headers)

res.encoding = res.apparent_encoding

if res.status_code == 200:

html = res.text

return html

else:

time.sleep(0.1)

return self.get_html(url)

except Exception as e: # except BaseException 这个也可以 e是打印出错误的原因

print("问题是", e)

def parse(self, response,url_location_detail): # 解析

'''

:param response: 网页源码

:param url_location_detail: 拼接url的一部分

:return:

'''

#num = 0

response_json = json.loads(response) #[{"id":610300,"lat":34.36784,"lng":107.24291,"name":"宝鸡市","}] 把这样的数据转成response_json

for i in response_json:

if "区" in i['name']:

url = url_location_detail + "?query="+ i['name'] + "&region=" + "宝鸡市"+ i['name'] + "&output=json&ak=" + self.li_ak

#print(url)

self.li.append(url)

if "街道" in i['name']:

url = url_location_detail + "?query="+ i['name'] + "&region=" + "宝鸡市"+ i['name'] + "&output=json&ak=" + self.li_ak

#print(url)

self.li.append(url)

if "县" in i['name']:

url = url_location_detail + "?query="+ i['name'] + "&region=" + i['name'] + "&output=json&ak=" + self.li_ak

#print(url)

self.li.append(url)

if "镇" in i['name']:

#http://api.map.baidu.com/place/v2/search?query=坪头镇&region=宝鸡市坪头镇&output=json&ak=cnYtqDjL7NzjYIVQXV67RVxy6oIF0Nsp

url = url_location_detail + "?query="+ i['name'] + "&region=" + "宝鸡市"+ i['name'] + "&output=json&ak=" + self.li_ak

#print(url)

self.li.append(url)

if "公园" in i['name']:

url = url_location_detail + "?query=" + i['name'] + "&region=" + "宝鸡市" + i['name'] + "&output=json&ak=" + self.li_ak

#print(url)

self.li.append(url)

if "法门寺" in i['name']:

url = url_location_detail + "?query"+ i['name'] + "&region=" + "宝鸡市"+ i['name'] + "&output=json&ak=" + self.li_ak

#print(url)

self.li.append(url)

# num += 1

# if num > 3:

# num = 0

def red_dot_parse(self,response): #红点的坐标拼接url.

response_json = json.loads(response)

view_url = "http://api.map.baidu.com/panorama/v2?ak={}&width=512&height=256&location={},{}&fov=180"

for i in response_json.get("results"):

name = i.get("name")

a = i.get('location')

try:

lat = a['lat']

lng = a['lng']

detail_view_url = view_url.format(self.li_ak,lng,lat)

self.dict1[detail_view_url] = name

self.Judge(detail_view_url) # 去判断

except Exception:

pass

def Judge(self,detail_view_url):

#print("判断是否全景的url",detail_view_url)

res = requests.get(detail_view_url, headers=self.headers)

#print(res.headers['Content-Type'])

if res.headers['Content-Type'] == 'image/jpeg': #判断如果等于image/jpeg

print("有全景的地方",self.dict1[res.url])

else:

js = res.json()

#print(js['status'])

#print('没有全景',self.dict1[res.url])

self.excel_write(self.dict1[res.url])

def excel_write(self, text_ip):

self.sheet1.write(self.i, 0, text_ip)

self.i += 1

self.f.save(r'C:\Users\85740\Desktop\get_ip.xls') # 保存

if __name__ == '__main__':

url = "http://imapway.cn:8098/bjzs/video/getVideoCatalog"

a = get_location()

url_location_detail = 'http://api.map.baidu.com/place/v2/search'

a.parse(a.get_html(url),url_location_detail) # 解析详细页面，调用requests请求

for url in a.li:

a.red_dot_parse(a.get_html(url)) #提取红点的坐标.

DAMO开发者矩阵

DAMO开发者矩阵，由阿里巴巴达摩院和中国互联网协会联合发起，致力于探讨最前沿的技术趋势与应用成果，搭建高质量的交流与分享平台，推动技术创新与产业应用链接，围绕“人工智能与新型计算”构建开放共享的开发者生态。

更多推荐

TongSIM：智能机器仿真通用平台

DAMO开发者矩阵

机器人诊断系统十年演进

摘要：机器人诊断系统十年演进（2015-2025）从救火式运维发展为Robot SRE闭环治理体系。核心演进包括：诊断对象从单机扩展到服务SLA，证据从经验升级为结构化数据链（metrics/logs/traces/replay），处置从人肉运维进化为自愈联动，治理从被动救火转变为防复发闭环。第三代系统通过五大模块（证据采集、事件编排、根因推理、自愈处置、防复发）实现"检测-定位-处置-

DAMO开发者矩阵

非结构化数据处理的容错机制设计

非结构化数据是指没有固定结构、无法用传统数据库（如SQL）直接存储和查询文本：用户评论、新闻 articles、社交媒体内容；图像：用户上传的照片、产品图片、医疗影像；音频/视频：语音留言、直播片段、短视频；其他：PDF、Word文档、日志文件。“不按常理出牌”——没有统一的 schema，格式千变万化，质量参差不齐。