Python pyppeteer通过cookie获取数据(cookie爬虫)
1. 利用EditThisCookie插件获取页面cookie2. 源码实例"""set browser"""browser = await launch({"headless": False,"executablePath": "F:\\chrome-win32\\chrome.exe","args": ["--disable-gpu",
·
1. 利用EditThisCookie插件获取页面cookie
2. 源码实例
"""set browser"""
browser = await launch({
"headless": False,
"executablePath": "F:\\chrome-win32\\chrome.exe",
"args": [
"--disable-gpu",
"--disable-web-security",
"--disable-xss-auditor", # 关闭 XSS Auditor
"--no-sandbox",
"--disable-setuid-sandbox",
"--allow-running-insecure-content", # 允许不安全内容
"--disable-webgl",
],
"ignoreHTTPSErrors": True # 忽略证书错误
})
width, height = screen_size()
page = await browser.newPage()
tasks = [
# 设置UA
asyncio.ensure_future(page.setUserAgent(random.choice(user_agents))),
# 启用JS,不开的话无法执行JS
asyncio.ensure_future(page.setJavaScriptEnabled(True)),
# 关闭缓存
asyncio.ensure_future(page.setCacheEnabled(False)),
# 设置窗口大小
asyncio.ensure_future(page.setViewport({"width": width, "height": height}))
]
await asyncio.wait(tasks)
cookies = [
{
},
{
}
]
for item in cookies:
await page.setCookie(item)
"""Search"""
url = "https://www.abc.cn"
await page.goto(url, {"waitUntil": "networkidle2", "timeout": 30000})

DAMO开发者矩阵,由阿里巴巴达摩院和中国互联网协会联合发起,致力于探讨最前沿的技术趋势与应用成果,搭建高质量的交流与分享平台,推动技术创新与产业应用链接,围绕“人工智能与新型计算”构建开放共享的开发者生态。
更多推荐
所有评论(0)