python怎么爬数据_python简单爬数据(这两个成功了)
#!usr/bin/pythonimportrequestsimporttimeimportsys#-------- 配置以选择要爬的东西#'hour', 'month', 'latitude', 'longitude'sel = 'longitude'#--------web_url= r'https://ccmc.gsfc.nasa.gov/modelweb/models/nrlmsise00
#!usr/bin/python
importrequestsimporttimeimportsys#-------- 配置以选择要爬的东西#'hour', 'month', 'latitude', 'longitude'
sel = 'longitude'
#--------
web_url= r'https://ccmc.gsfc.nasa.gov/modelweb/models/nrlmsise00.php'request_url= r'https://ccmc.gsfc.nasa.gov/cgi-bin/modelweb/models/vitmo_model.cgi'
#filepath = sys.path[0] + '\\dataaa_' + time.strftime("%Y%m%d%H%M%S", time.localtime()) + '.txt'
filepath = sys.path[0] + '\\data_nrmlsise_raw_' + sel + '.txt'
print(filepath)
fid= open(filepath, 'w', encoding = 'utf-8')
headers= {#POST /cgi-bin/modelweb/models/vitmo_model.cgi HTTP/1.1
'Host': 'ccmc.gsfc.nasa.gov','User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:53.0) Gecko/20100101 Firefox/53.0','Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8','Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3','Accept-Encoding': 'gzip, deflate, br','Content-Type': 'application/x-www-form-urlencoded','Content-Length': '296','Referer': 'https://ccmc.gsfc.nasa.gov/modelweb/models/nrlmsise00.php','Cookie': '__utma=35212851.490003371.1494462808.1494462808.1494462808.1; __utmb=35212851.12.10.1494462808; __utmc=35212851; __utmz=35212851.1494462808.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmt=1','Connection': 'keep-alive','Upgrade-Insecure-Requests': '1','Cache-Control': 'max-age=0'}
payload= {'model' : 'nrlmsise','year' : '2016','month' : '12','day' : '01','time_flag' : '1','hour' : '8','geo_flag' : '0.','latitude' : '60','longitude' : '120','height' : '100.','profile' : '1','start' : '60.','stop' : '1000.','step' : '10.','f10_7' : '','f10_7_3' : '','ap' : '','format' : '0','vars' : ['08', '09', '10'],#O,N2,O2 : 氧原子,氮分子,氧分子
'linestyle' : 'solid','charsize' : '1.0','symbol' : '2','symsize' : '1.0','yscale' : 'Lin','xscale' : 'Lin','imagex' : '640','imagey' : '480',
}
payload['year'] = '2016'payload['month'] = '12'payload['day'] = '01'payload['hour'] = '8'payload['longitude'] = '120'payload['latitude'] = '60'payload['start'] = '60'payload['stop'] = '1000'payload['step'] = '1'count=0
hours= range(1, 25)
months= range(1, 13)
latitudes= range(-90, 100, 10)
longitudes= range(0, 360, 10)
dic= {'hour': hours,'month': months,'latitude': latitudes,'longitude': longitudes
}
items=dic[sel]
itemname=selfor item initems:
payload[itemname]=str(item)
fid.write('\n#=====================' + str(item) + '=====================\n')
TOGET=TruewhileTOGET:
TOGET=Truetry:print('\n=====================' + str(item) + '=====================\n')
count= count + 1
print('count :' +str(count))
r= requests.post(request_url, data = payload, headers = headers)#这里如果出错是不会向下执行的
fid.write(r.text)
TOGET=FalseexceptException as e:print(e)
TOGET=True
fid.write('\n---------------------' + str(item) + '---------------------\n')
fid.close();
DAMO开发者矩阵,由阿里巴巴达摩院和中国互联网协会联合发起,致力于探讨最前沿的技术趋势与应用成果,搭建高质量的交流与分享平台,推动技术创新与产业应用链接,围绕“人工智能与新型计算”构建开放共享的开发者生态。
更多推荐



所有评论(0)