python爬取网页数据(模拟用户登录)
   简介:python模拟用户登录,获取网页数据。

# -*- coding: utf-8 -*-

import requests
import json
import http.cookiejar as cookielib
from bs4 import BeautifulSoup
from lxml import etree
import re
import xlwt

wangyuSession = requests.session()
wangyuSession.cookies = cookielib.LWPCookieJar(filename="wangyuCookies")
userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36"
headers = {
    "Referer": "http://192.168.14.2:88/leadsec-cvs/cvs/checkItem/itemDetail?ci_ids=4659&alias=/server/Linux",
    'User-Agent': userAgent,
}

def Login():
    wookbook = xlwt.Workbook()
    sheet = wookbook.add_sheet('sheet2',cell_overwrite_ok=True)
    row = 0
    for i in range(1,10000):
        print("网页登录")
        postUrl = "http://192.168.14.2:88/leadsec-cvs/signin"
        postData = {
            "username": 'username',
            "password": 'password',
            "redirect": '/cvs/checkItem/itemDetail?ci_ids={}&alias=/server/Linuxhttps://hao.360.com/2020.html?src=lm&ls=n478bfd1a95'.format(i),
            'pwd-encrypted': 'True',
        }
        # 使用session直接发起post请求
        responseRes = wangyuSession.post(postUrl, data=postData, headers=headers, json=True )
        print(f"statusCode = {responseRes.status_code}")
        wangyuSession.cookies.save()
        mes = responseRes.content.decode(encoding='utf-8')
        html = etree.HTML(mes)
        ###获取id
        linux_id = html.xpath("/html/body/div[1]/form/div[2]/div[1]/ul/li[3]/div/input[@id='benchmark']/@value")
        ###获取json串
        result = re.findall(r'baselineDataStr\s=\s({(?:.|\n)*})?\r\n\tvar', str(mes))
        str_result = ''.join(result)
        json_result = json.loads(str_result)
        ns = 0
        for r in json_result['beans']:
            row += 1
            ns += 1
            sheet.write(row, 0, row)
            sheet.write(row, 1, linux_id)
            sheet.write(row, 2, ns-1)
            sheet.write(row, 3, r['info'])
            sheet.write(row, 4, r['opSign'])
            sheet.write(row, 5, r['valueItems'])
        sheet.write(row, 6, json_result['relation'])
        wookbook.save('wangyu1.xls')
if __name__ == '__main__':
    Login()


Logo

DAMO开发者矩阵,由阿里巴巴达摩院和中国互联网协会联合发起,致力于探讨最前沿的技术趋势与应用成果,搭建高质量的交流与分享平台,推动技术创新与产业应用链接,围绕“人工智能与新型计算”构建开放共享的开发者生态。

更多推荐