如何用python爬虫抓取网页:Python爬虫实战 QQ空间全自动点赞工具

  • QQ空间秒赞


首先既然是对 QQ空间的一系列操作,自然是先解决登陆方面,在这篇文章里面我就不过多介绍了,因为我上几期之前对QQ空间已经做了一定的介绍了。直接放出链接就好。欢迎看博主以前的文章

def search_cookie(): qq_number = input('请输入qq号:') if not __import__('os').path.exists('cookie_dict.txt'): get_cookie_json(qq_number) with open('cookie_dict.txt' 'r') as f: cookie=json.load(f) return True def get_cookie_json(qq_number): password = __import__('getpass').getpass('请输入密码:') from selenium import webdriver from selenium.webdriver.chrome.options import Options login_url = 'https://i.qq.com/' chrome_options =Options() chrome_options.add_argument('--headless') driver = webdriver.Chrome(options=chrome_options) driver.get(login_url) driver.switch_to_frame('login_frame') driver.find_element_by_xpath('//*[@id="switcher_plogin"]').click() time.sleep(1) driver.find_element_by_xpath('//*[@id="u"]').send_keys(qq_number) driver.find_element_by_xpath('//*[@id="p"]').send_keys(password) time.sleep(1) driver.find_element_by_xpath('//*[@id="login_button"]').click() time.sleep(1) cookie_list = driver.get_cookies() cookie_dict = {} for cookie in cookie_list: if 'name' in cookie and 'value' in cookie: cookie_dict[cookie['name']] = cookie['value'] with open('cookie_dict.txt' 'w') as f: json.dump(cookie_dict f) return True def get_g_tk(): p_skey = self.cookie['p_skey'] h = 5381 for i in p_skey: h = (h << 5) ord(i) g_tk = h & 2147483647寻找XML


  • uin:
  • scope:
  • view:
  • daylist:
  • uinlist:
  • gid:
  • flag:
  • filter:
  • applist:
  • refresh:
  • aisortEndTime:
  • aisortOffset:
  • getAisort:
  • aisortBeginTime:
  • pagenum:
  • externparam:
  • firstGetGroup:
  • icServerTime:
  • mixnocache:
  • scene:
  • begintime:
  • count:
  • dayspac:
  • sidomain:
  • useutf8:
  • outputhtmlfeed:
  • rd:
  • usertime:
  • windowId:
  • g_tk:
  • qzonetoken:
  • g_tk:


  • qzonetoken
  • windowId
  • rd
  • usertime
  • g_tk
  1. qzonetoken 参数在源码中是个可变的“定值”,因为每次刷新这个参数都会变,但是源码中却给出了他的具体值。直接获取即可。

def get_space(): your_url = 'https://user.qzone.qq.com/' str(qq_number) html = requests.get(your_url headers=headers cookies=cookie) if html.status_code == 200: qzonetoken = re.findall('window.g_qzonetoken =(.*?);' html.text re.S)[1].split('"')[1] return True

  1. windowId 与 rd 虽说每次刷新结果都不同,但是经过博主多次实验得出,这两个参数对整体并没有什么影响,可以直接抄下来。

'rd': '0.9311604844249088' 'windowId': '0.51158950324406'

  1. usertime 参数看似很眼熟,是个时间戳参数,因为位数不对,说明应该是被放大了一千倍。

'usertime': str(round(time.time() * 1000))

  1. g_tk 参数上次教程已给出。在JavaScript中分析即可获得。

def get_g_tk(): p_skey = self.cookie['p_skey'] h = 5381 for i in p_skey: h = (h << 5) ord(i) g_tk = h & 2147483647获取第一个空间动态


demjson 可以解決不正常的json格式数据


encode将 Python 对象编码成 JSON 字符串decode将已编码的 JSON 字符串解码为 Python 对象

# 例子 # -*- coding: utf-8 -*- import demjson js_json = "{x:1 y:2 z:3}" py_json1 = "{'x':1 'y':2 'z':3}" py_json2 = '{"x":1 "y":2 "z":3}' data = demjson.decode(js_json) print(data) # {'y': 2 'x': 1 'z': 3} data = demjson.decode(py_json1) print(data) # {'y': 2 'x': 1 'z': 3} data = demjson.decode(py_json2) print(data) # {'y': 2 'x': 1 'z': 3}


text = html.text[10:-2].replace(" " "").replace('\n' '') json_list = demjson.decode(text)['data']['data'] qq_spaces = json_list[0]

在 qq_spaces 参数中我们发现里面有一个很长也很特殊的一个结果是 html 结果,这个结果里面很长,简单来看是个网页常规代码,应该是被JavaScript写入到网页中了,既然不是全部代码,那么只能用正则提取一下里面的具体我们需要的东西了。

content = str(qq_spaces['html']) try:zanshu = re.findall('<spanclass="f-like-cnt">(.*?)</span>人觉得很赞</div>' content re.S)[0] except:return None time_out = str(qq_spaces['feedstime']) print("名字:" str(qq_spaces['nickname'])) print("QQ号:" str(qq_spaces['opuin'])) print("时间:" time_out) print('赞数:' zanshu) times = qq_spaces['abstime'] his_url = re.findall('data-curkey="(.*?)"' content re.S)[0]寻找点赞所需的URL


  1. qzreferrer参数为自己QQ空间的网址,表示从哪里来的链接地址。
  2. opuin参数为自己的QQ号,可以直接在代码提取。
  3. unikey参数与curkey参数为被点赞方的链接,即说说链接,刚才已获取。
  4. abstime参数为被点赞方说说的发布时间的时间戳。
  5. fid参数为被点赞方的链接后缀。


def get_zan(times his_url): data = {'g_tk': g_tk 'qzonetoken': qzonetoken} post_data = { 'qzreferrer': 'https://user.qzone.qq.com/' str(qq_number) 'opuin': str(qq_number) 'unikey': str(his_url) 'curkey': str(his_url) 'from': '1' 'appid': '311' 'typeid': '0' 'abstime': str(times) 'fid': str(his_url).split('/')[-1] 'active': '0' 'fupdate': '1' } url = 'https://user.qzone.qq.com/proxy/domain/w.qzone.qq.com/cgi-bin/likes/internal_dolike_app?' url = url urllib.parse.urlencode(data) html = requests.post(url headers=headers cookies=cookie data=post_data) if html.status_code == 200:print("点赞成功" if len(html.text) == 469 else "点赞失败")功能提升到秒赞


  1. 在本地建立一个文件,负责写入最后一条说说所产生的时间戳。
  2. 比对当前时间戳与空间第一条说说是否相同,若相同则无更新。
  3. 点赞后重写文件,以便下次使用代码即可秒赞。

def run_tolike(): if os.path.exists('time_out.txt'): with open('time_out.txt' 'r') as f: time_out = f.read() else:time_out = None while True: get_friends_list() time.sleep(__import__('random').randint(0 5)) # 秒赞?

if not time_out or time_out != time_out: time_out = time_out get_zan(times his_url) return True else:log('说说无更新 等待中...')

with open('time_out.txt' 'w') as f: f.write(str(times))


import time os json import re import demjson import urllib import requests from lxml import etree def log(content): this_time = time.strftime('%H:%M:%S' time.localtime(time.time())) print("[" str(this_time) "]" content) class QQ_like: def __init__(self qq_number): self.headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/63.0.3239.132 Safari/537.36'} self.qq_number = qq_number self.get_preparameter() self.run_tolike() def get_preparameter(self): self.search_cookie() self.get_g_tk() self.get_space() def run_tolike(self): if os.path.exists('time_out.txt'): with open('time_out.txt' 'r') as f: self.time_out = f.read() else:self.time_out = None while True: self.get_friends_list() time.sleep(__import__('random').randint(0 5)) def search_cookie(self): if not os.path.exists('cookie_dict.txt'): self.get_cookie_json() with open('cookie_dict.txt' 'r') as f: self.cookie=json.load(f) return True def get_cookie_json(self): password = __import__('getpass').getpass('请输入密码:') from selenium import webdriver from selenium.webdriver.chrome.options import Options login_url = 'https://i.qq.com/' chrome_options =Options() chrome_options.add_argument('--headless') driver = webdriver.Chrome(options=chrome_options) driver.get(login_url) driver.switch_to_frame('login_frame') driver.find_element_by_xpath('//*[@id="switcher_plogin"]').click() time.sleep(1) driver.find_element_by_xpath('//*[@id="u"]').send_keys(self.qq_number) driver.find_element_by_xpath('//*[@id="p"]').send_keys(password) time.sleep(1) driver.find_element_by_xpath('//*[@id="login_button"]').click() time.sleep(1) cookie_list = driver.get_cookies() cookie_dict = {} for cookie in cookie_list: if 'name' in cookie and 'value' in cookie: cookie_dict[cookie['name']] = cookie['value'] with open('cookie_dict.txt' 'w') as f: json.dump(cookie_dict f) return True def get_g_tk(self): p_skey = self.cookie['p_skey'] h = 5381 for i in p_skey: h = (h << 5) ord(i) self.g_tk = h & 2147483647 def get_space(self): your_url = 'https://user.qzone.qq.com/' str(self.qq_number) html = requests.get(your_url headers=self.headers cookies=self.cookie) if html.status_code == 200: self.qzonetoken = re.findall('window.g_qzonetoken =(.*?);' html.text re.S)[1].split('"')[1] return True def get_friends_list(self): times = "" url = "https://user.qzone.qq.com/proxy/domain/ic2.qzone.qq.com/cgi-bin/feeds/feeds3_html_more?" data = { 'uin': self.qq_number 'scope': '0' 'view': '1' 'daylist': '' 'uinlist': '' 'gid': '' 'flag': '1' 'filter':'all' 'applist': 'all' 'refresh': '0' 'aisortEndTime': '0' 'aisortOffset': '0' 'getAisort': '0' 'aisortBeginTime': '0' 'pagenum': '1' 'externparam': 'undefined' 'firstGetGroup': '0' 'icServerTime': '0' 'mixnocache': '0' 'scene': '0' 'begintime': 'undefined' 'count': '10' 'dayspac': 'undefined' 'sidomain': 'qzonestyle.gtimg.cn' 'useutf8': '1' 'outputhtmlfeed': '1' 'rd': '0.9311604844249088' 'usertime': str(round(time.time() * 1000)) 'windowId': '0.51158950324406' 'g_tk': self.g_tk 'qzonetoken': self.qzonetoken } url = url urllib.parse.urlencode(data) '&g_tk=' str(self.g_tk) html = requests.get(url headers=self.headers cookies=self.cookie) if html.status_code == 200: text = html.text[10:-2].replace(" " "").replace('\n' '') json_list = demjson.decode(text)['data']['data'] qq_spaces = json_list[0] content = str(qq_spaces['html']) try:zanshu = re.findall('<spanclass="f-like-cnt">(.*?)</span>人觉得很赞</div>' content re.S)[0] except:return None time_out = str(qq_spaces['feedstime']) log("名字:" str(qq_spaces['nickname'])) log("QQ号:" str(qq_spaces['opuin'])) log("时间:" time_out) log('赞数:' zanshu) times = qq_spaces['abstime'] his_url = re.findall('data-curkey="(.*?)"' content re.S)[0] if not self.time_out or self.time_out != time_out: self.time_out = time_out self.get_zan(times his_url) return True else:log('说说无更新 等待中...') else:log(html.status_code) def get_zan(self times his_url): data = {'g_tk': self.g_tk 'qzonetoken': self.qzonetoken} post_data = { 'qzreferrer': 'https://user.qzone.qq.com/' str(qq_number) 'opuin': str(qq_number) 'unikey': str(his_url) 'curkey': str(his_url) 'from': '1' 'appid': '311' 'typeid': '0' 'abstime': str(times) 'fid': str(his_url).split('/')[-1] 'active': '0' 'fupdate': '1' } url = 'https://user.qzone.qq.com/proxy/domain/w.qzone.qq.com/cgi-bin/likes/internal_dolike_app?' url = url urllib.parse.urlencode(data) html = requests.post(url headers=self.headers cookies=self.cookie data=post_data) if html.status_code == 200:log("点赞成功" if len(html.text) == 469 else "点赞失败") with open('time_out.txt' 'w') as f: f.write(str(times)) if __name__ == "__main__": qq_number = input('请输入qq号:') QQ_like(qq_number)
