from email import header
import requests
from hyper.contrib import HTTP20Adapter #导入HTTP2的模块
import json
import math
import time
#import xlwt #导入excel官方模块,用于将字典生成excel
import pandas as pd #将字典列表转换为DataFrame
import re
def load_data(st,end): #加载数据需要传入st开始位置和end结束位置
url = 'https://qun.qq.com/cgi-bin/qun_mgr/search_group_members'
cookie = '您的cookie'
headers = {
":authority" : "qun.qq.com" ,
":method" : "POST" ,
":path" : "/cgi-bin/qun_mgr/search_group_members" ,
":scheme" : "https" ,
"accept" : "application/json, text/javascript, */*; q=0.01" ,
"accept-encoding" : "gzip, deflate, br" ,
"accept-language" : "zh-CN,zh;q=0.9" ,
"content-length" : "45" ,
"content-type" : "application/x-www-form-urlencoded; charset=UTF-8" ,
"cookie" : cookie,
"origin" : "https://qun.qq.com" ,
"referer" : "https://qun.qq.com/member.html" ,
"user-agent" : "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36" ,
"x-requested-with" : "XMLHttpRequest"
}
def cookie_to_dict(cookie): #将cookie字符串转换为字典
cookie_dict = {}
for i in cookie.split( '; ' ):
cookie_dict[i.split( '=' )[ 0 ]] = i.split( '=' )[ 1 ]
return cookie_dict
def get_bkn(): #从QQ群中获取bkn函数
e = cookie_to_dict(cookie)[ 'skey' ] #e = cookie里skey的值,调用cookie_to_dict函数将cookie字符串转换为字典
t = 5381
n = 0
o = len (e)
while n < o:
t + = (t << 5 ) + ord (e[n])
n + = 1
return ( 2147483647 & t)
# print(get_bkn())
# gc = input('请输入要查询的群号:')
global gc
gc = '599500635' # 群号
data = {
"gc" : gc,
"st" : st,
"end" : end,
"sort" : "0" ,
"bkn" : get_bkn()
}
sessions = requests.session()
sessions.mount( 'https://qun.qq.com' , HTTP20Adapter())
response = sessions.post(url,headers = headers,data = data)
response = response.text
response = response.encode( 'utf-8' ).decode( "unicode_escape" )
return response
def get_qq_member_count(): #获取群成员数量函数
response = load_data( 0 , 0 )
qq_member_count = json.loads(response)[ 'count' ] #qq群人数
# print(qq_member_count)
return qq_member_count
global qq_qun_info #全局变量qq群信息列表
qq_qun_info = []
def get_qq_member_list(): #获取群成员列表函数
count = math.ceil(get_qq_member_count() / 21 ) #需要循环的次数
# print('需要循环的次数:'+str(count))
n = 0 #用于计数
j = 0 #计数器用于判断当前循环的次数
num = 1 #用于给字典里的信息加序号
while j < count:
#load_data()参数的取值 第一次是从0到20 第二次是从21到41 第三次是从42到62以此类推保证不获取重复值
response = load_data(n + j,n + 20 + j)
res = json.loads(response)[ 'mems' ]
# print('res='+str(res))
qq_name = '' # qq名字
qq_qun_name = '' # qq群名字
qq_number = '' # qq号码
sex = '' # 性别
qq_age = '' #q龄
join_qun_time = '' # 入群时间
last_speak_time = '' # 最后一次发言
for i in res:
qq_name = i[ 'nick' ]
#替换qq昵称中的\为空
qq_name = filter_emoji(qq_name, '???' )
qq_qun_name = i[ 'card' ]
#替换qq群昵称中的\为空
qq_qun_name = filter_emoji(qq_qun_name, '???' )
qq_number = str (i[ 'uin' ])
sex = i[ 'g' ] #如果sex = 0则为男性, sex = -1未知, sex = 1为女性
if sex = = 0 :
sex = '男'
elif sex = = 1 :
sex = '女'
elif sex = = - 1 :
sex = '未知'
else :
sex = '错误'
qq_age = i[ 'qage' ]
join_qun_time = i[ 'join_time' ] #这里返回的是10位整数
last_speak_time = i[ 'last_speak_time' ]
#将以上内容加入字典dict1中
dict1 = {}
dict1[ 'num' ] = num
dict1[ "qq_name" ] = qq_name
dict1[ "qq_qun_name" ] = qq_qun_name
dict1[ "qq_number" ] = qq_number
dict1[ "sex" ] = sex
dict1[ "qq_age" ] = qq_age
#将十位数入群时间转为正常时间
join_qun_time = time.strftime( "%Y-%m-%d %H:%M:%S" , time.localtime(join_qun_time))
dict1[ "join_qun_time" ] = join_qun_time
#将十位数最后一次发言时间转为正常时间
last_speak_time = time.strftime( "%Y-%m-%d %H:%M:%S" , time.localtime(last_speak_time))
dict1[ "last_speak_time" ] = last_speak_time
qq_qun_info.append(dict1)
num = num + 1
j = j + 1
n + = 20
print (qq_qun_info)
#将qq_qun_info列表导出为excel表格
# export_excel(qq_qun_info)
print ( '导出表格完成' )
def export_excel(export): #将qq_qun_info列表保存为excel函数
#将字典列表转换为DataFrame
pf = pd.DataFrame( list (export))
#指定字段顺序
order = [ 'num' , 'qq_name' , 'qq_qun_name' , 'qq_number' , 'sex' , 'qq_age' , 'join_qun_time' , 'last_speak_time' ]
pf = pf[order]
#将列名替换为中文
columns_map = {
'num' : '序号' ,
'qq_name' : 'qq昵称' ,
'qq_qun_name' : 'qq群昵称' ,
'qq_number' : 'qq号码' ,
'sex' : '性别' ,
'qq_age' : 'Q龄' ,
'join_qun_time' : '入群时间' ,
'last_speak_time' : '最近发言时间'
}
pf.rename(columns = columns_map,inplace = True )
#指定生成的Excel表格名称
file_path = pd.ExcelWriter(gc + '.xlsx' )
#替换空单元格
pf.fillna( ' ' ,inplace = True )
#输出
pf.to_excel(file_path,encoding = 'utf-8' ,index = False )
#保存表格
file_path.save()
def filter_emoji(desstr, restr = ''): #过滤特殊字符函数
#desstr是要过滤的字符串
#restr是替换的字符串
# 过滤表情
res = re. compile (u '[\U00010000-\U0010ffff\\uD800-\\uDBFF\\uDC00-\\uDFFF]' )
return res.sub(restr, desstr)
if __name__ = = '__main__' :
get_qq_member_list()
|
所有评论(0)