#!/usr/bin/env python3  
# -*- coding: UTF-8 -*-  
  
from DrissionPage import ChromiumOptions, ChromiumPage  
  
co = ChromiumOptions()  
# 无沙盒模式  
co.set_argument('--no-sandbox')  
# 禁止所有弹出窗口  
co.set_pref(arg='profile.default_content_settings.popups', value='0')  
# 隐藏是否保存密码的提示  
co.set_pref('credentials_enable_service', False)  
# 设置代理  
co.set_proxy('http://1077764809707376640:pMkeLLTz@http-dynamic.xiaoxiangdaili.com:10030')  
# # 设置无界面  
# co.headless(True)  
page = ChromiumPage(co)  
  
# 访问网页  
page.get("https://www.lagou.com")  
key_words = ["爬虫", "数据分析", "python"]  
city_list = ["北京", "上海", "武汉", "郑州", "广州", "深圳"]  
for city in city_list:  
    for key in key_words:  
        for i in range(1, 30):  
            re_u = f"https://www.lagou.com/wn/jobs?pn={i}&kd={key}&city={city}"  
            page.get(re_u)  
            import re  
  
            res = re.findall(r"<script id=\"__NEXT_DATA__\" type=\"application/json\">(.*?)</script>", page.html,  
                             re.MULTILINE)  
            if res:  
                infp = eval(res[0].replace("null", "None").replace("false", "False").replace("true", "True"))  
                print(infp)  
                import json  
  
                with open(f"{city}_{key}_{i}.json", "w", encoding="utf-8") as f:  
                    f.write(json.dumps(infp, ensure_ascii=False, indent=4))
Logo

DAMO开发者矩阵,由阿里巴巴达摩院和中国互联网协会联合发起,致力于探讨最前沿的技术趋势与应用成果,搭建高质量的交流与分享平台,推动技术创新与产业应用链接,围绕“人工智能与新型计算”构建开放共享的开发者生态。

更多推荐