|
| 1 | +# _*_ coding: utf-8 _*_ |
| 2 | + |
| 3 | +import re |
| 4 | +import rsa |
| 5 | +import ssl |
| 6 | +import time |
| 7 | +import json |
| 8 | +import base64 |
| 9 | +import logging |
| 10 | +import binascii |
| 11 | +import urllib.parse |
| 12 | + |
| 13 | +# 参考PSpider项目 |
| 14 | +import spider |
| 15 | +ssl._create_default_https_context = ssl._create_unverified_context |
| 16 | + |
| 17 | + |
| 18 | +class WeiBoLogin(object): |
| 19 | + """ |
| 20 | + class of WeiBoLogin, to login weibo.com |
| 21 | + """ |
| 22 | + |
| 23 | + def __init__(self): |
| 24 | + """ |
| 25 | + constructor |
| 26 | + """ |
| 27 | + self.user_name = None # 登录用户名 |
| 28 | + self.pass_word = None # 登录密码 |
| 29 | + self.user_uniqueid = None # 用户唯一ID |
| 30 | + self.user_nick = None # 用户昵称 |
| 31 | + |
| 32 | + self.cookie_jar, self.opener = None, None |
| 33 | + return |
| 34 | + |
| 35 | + def login(self, user_name, pass_word, proxies=None): |
| 36 | + """ |
| 37 | + login weibo.com, return True or False |
| 38 | + """ |
| 39 | + # 变量赋值初始化 |
| 40 | + self.user_name = user_name |
| 41 | + self.pass_word = pass_word |
| 42 | + self.user_uniqueid = None |
| 43 | + self.user_nick = None |
| 44 | + |
| 45 | + # 构建cookie_jar和opener,这里不使用代理,同时保证整个流程中不需要关心cookie问题 |
| 46 | + self.cookie_jar, self.opener = spider.make_cookiejar_opener(is_cookie=True, proxies=proxies) |
| 47 | + self.opener.addheaders = spider.make_headers( |
| 48 | + user_agent="pc", |
| 49 | + host="weibo.com", |
| 50 | + referer="http://weibo.com/", |
| 51 | + accept="text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", |
| 52 | + accept_encoding="gzip, deflate", |
| 53 | + accept_language="zh-CN,zh;q=0.8" |
| 54 | + ).items() |
| 55 | + |
| 56 | + # (1) 打开weibo.com/login.php,先请求一些必要的cookie信息 |
| 57 | + self.opener.open("http://weibo.com/login.php") |
| 58 | + |
| 59 | + # (2) 根据用户名获取加密后的用户名 |
| 60 | + s_user_name = self.get_username() |
| 61 | + |
| 62 | + # (3) 利用加密后的用户名,获取其他一些数据:json格式 |
| 63 | + json_data = self.get_json_data(su_value=s_user_name) |
| 64 | + if not json_data: |
| 65 | + return False |
| 66 | + |
| 67 | + # (4) 根据第三步得到的json数据,获取加密后的密码 |
| 68 | + s_pass_word = self.get_password(json_data["servertime"], json_data["nonce"], json_data["pubkey"]) |
| 69 | + |
| 70 | + # (5) 构造登录中用到的postdata |
| 71 | + post_dict = { |
| 72 | + "entry": "weibo", |
| 73 | + "gateway": "1", |
| 74 | + "from": "", |
| 75 | + "savestate": "7", |
| 76 | + "userticket": "1", |
| 77 | + "vsnf": "1", |
| 78 | + "service": "miniblog", |
| 79 | + "encoding": "UTF-8", |
| 80 | + "pwencode": "rsa2", |
| 81 | + "sr": "1280*800", |
| 82 | + "prelt": "529", |
| 83 | + "url": "http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack", |
| 84 | + "rsakv": json_data["rsakv"], |
| 85 | + "servertime": json_data["servertime"], |
| 86 | + "nonce": json_data["nonce"], |
| 87 | + "su": s_user_name, |
| 88 | + "sp": s_pass_word, |
| 89 | + "returntype": "TEXT", |
| 90 | + } |
| 91 | + |
| 92 | + # (6) 判断是否需要输入验证码,如果需要,获取验证码并进行打码操作 |
| 93 | + if json_data.get("showpin", None) == 1: |
| 94 | + url = "http://login.sina.com.cn/cgi/pin.php?r=%d&s=0&p=%s" % (int(time.time()), json_data["pcid"]) |
| 95 | + with open("captcha.jpeg", "wb") as file_out: |
| 96 | + file_out.write(self.opener.open(url).read()) |
| 97 | + code = input("请输入验证码:") |
| 98 | + # cid, code = self.yundama.get_captcha(self.opener.open(url).read(), "captcha.jpeg", "image/jpeg", codetype="1005") |
| 99 | + # if not code: |
| 100 | + # return False |
| 101 | + post_dict["pcid"] = json_data["pcid"] |
| 102 | + post_dict["door"] = code |
| 103 | + |
| 104 | + # (7) 根据构造的postdata,登录微博 |
| 105 | + login_url_1 = "http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)&_=%d" % int(time.time()) |
| 106 | + json_data_1 = json.loads(spider.get_html_content(self.opener.open(login_url_1, data=spider.make_post_data(post_dict)))) |
| 107 | + if json_data_1["retcode"] == "0": |
| 108 | + # 登录后有一个跳转, 构造跳转链接的postdata |
| 109 | + post_dict = { |
| 110 | + "callback": "sinaSSOController.callbackLoginStatus", |
| 111 | + "ticket": json_data_1["ticket"], |
| 112 | + "ssosavestate": int(time.time()), |
| 113 | + "client": "ssologin.js(v1.4.18)", |
| 114 | + "_": int(time.time()*1000), |
| 115 | + } |
| 116 | + login_url_2 = "https://passport.weibo.com/wbsso/login?" + urllib.parse.urlencode(post_dict) |
| 117 | + html_data = spider.get_html_content(self.opener.open(login_url_2), charset="gbk") |
| 118 | + json_data_2 = json.loads(re.search("\((?P<result>.*)\)", html_data).group("result")) |
| 119 | + |
| 120 | + # 检查登录是否成功,并获取用户唯一ID,用户昵称等 |
| 121 | + if json_data_2["result"] is True: |
| 122 | + self.user_uniqueid = json_data_2["userinfo"]["uniqueid"] |
| 123 | + self.user_nick = json_data_2["userinfo"]["displayname"] |
| 124 | + logging.warning("WeiBoLogin succeed: %s", json_data_2) |
| 125 | + else: |
| 126 | + logging.warning("WeiBoLogin failed: %s", json_data_2) |
| 127 | + else: |
| 128 | + logging.warning("WeiBoLogin failed: %s", json_data_1) |
| 129 | + return True if self.user_uniqueid and self.user_nick else False |
| 130 | + |
| 131 | + def get_username(self): |
| 132 | + """ |
| 133 | + get username, encrypt file: http://tjs.sjs.sinajs.cn/t5/register/js/page/remote/loginLayer.js |
| 134 | + """ |
| 135 | + username_quote = urllib.parse.quote_plus(self.user_name) |
| 136 | + username_base64 = base64.b64encode(username_quote.encode("utf-8")) |
| 137 | + return username_base64.decode("utf-8") |
| 138 | + |
| 139 | + def get_json_data(self, su_value): |
| 140 | + """ |
| 141 | + get the value of "servertime", "nonce", "pubkey", "rsakv" and "showpin", etc |
| 142 | + """ |
| 143 | + post_data = urllib.parse.urlencode({ |
| 144 | + "entry": "weibo", |
| 145 | + "callback": "sinaSSOController.preloginCallBack", |
| 146 | + "rsakt": "mod", |
| 147 | + "checkpin": "1", |
| 148 | + "client": "ssologin.js(v1.4.18)", |
| 149 | + "su": su_value, |
| 150 | + "_": int(time.time()*1000), |
| 151 | + }) |
| 152 | + |
| 153 | + try: |
| 154 | + response = self.opener.open('http://login.sina.com.cn/sso/prelogin.php?'+post_data) |
| 155 | + data = spider.get_html_content(response, charset="utf-8") |
| 156 | + json_data = json.loads(re.search("\((?P<data>.*)\)", data).group("data")) |
| 157 | + except Exception as excep: |
| 158 | + json_data = {} |
| 159 | + logging.error("WeiBoLogin get_json_data error: %s", excep) |
| 160 | + |
| 161 | + logging.debug("WeiBoLogin get_json_data: %s", json_data) |
| 162 | + return json_data |
| 163 | + |
| 164 | + def get_password(self, servertime, nonce, pubkey): |
| 165 | + """ |
| 166 | + get legal password, encrypt file: http://tjs.sjs.sinajs.cn/t5/register/js/page/remote/loginLayer.js |
| 167 | + """ |
| 168 | + string = (str(servertime) + '\t' + str(nonce) + '\n' + str(self.pass_word)).encode("utf-8") |
| 169 | + public_key = rsa.PublicKey(int(pubkey, 16), int("10001", 16)) |
| 170 | + password = rsa.encrypt(string, public_key) |
| 171 | + password = binascii.b2a_hex(password) |
| 172 | + return password.decode() |
| 173 | + |
| 174 | + |
| 175 | +if __name__ == '__main__': |
| 176 | + logging.basicConfig(level=logging.DEBUG, format="%(asctime)s\t%(levelname)s\t%(message)s") |
| 177 | + # 测试登录,输入微博的用户名和密码 |
| 178 | + weibo = WeiBoLogin() |
| 179 | + weibo.login("username", "password") |
0 commit comments