Skip to content

Commit 9e316b8

Browse files
committed
add python_weibo
1 parent ce3af98 commit 9e316b8

File tree

2 files changed

+182
-1
lines changed

2 files changed

+182
-1
lines changed

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
# LearnPython
2-
以撸代码的形式学习Python
2+
以撸代码的形式学习Python, 具体说明在[知乎专栏-撸代码,学知识](https://zhuanlan.zhihu.com/pythoner)
33

44
============================================================
55
### python_base.py: 千行代码入门Python
66

77
### python_visual.py: 15张图入门Matplotlib
88

99
### python_spider.py: 一个很“水”的Python爬虫入门代码文件
10+
11+
### python_spider.py: “史上最详细”的Python模拟登录新浪微博流程
1012
============================================================
1113

1214
### 您可以fork该项目,并在修改后提交Pull request

python_weibo.py

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
# _*_ coding: utf-8 _*_
2+
3+
import re
4+
import rsa
5+
import ssl
6+
import time
7+
import json
8+
import base64
9+
import logging
10+
import binascii
11+
import urllib.parse
12+
13+
# 参考PSpider项目
14+
import spider
15+
ssl._create_default_https_context = ssl._create_unverified_context
16+
17+
18+
class WeiBoLogin(object):
19+
"""
20+
class of WeiBoLogin, to login weibo.com
21+
"""
22+
23+
def __init__(self):
24+
"""
25+
constructor
26+
"""
27+
self.user_name = None # 登录用户名
28+
self.pass_word = None # 登录密码
29+
self.user_uniqueid = None # 用户唯一ID
30+
self.user_nick = None # 用户昵称
31+
32+
self.cookie_jar, self.opener = None, None
33+
return
34+
35+
def login(self, user_name, pass_word, proxies=None):
36+
"""
37+
login weibo.com, return True or False
38+
"""
39+
# 变量赋值初始化
40+
self.user_name = user_name
41+
self.pass_word = pass_word
42+
self.user_uniqueid = None
43+
self.user_nick = None
44+
45+
# 构建cookie_jar和opener,这里不使用代理,同时保证整个流程中不需要关心cookie问题
46+
self.cookie_jar, self.opener = spider.make_cookiejar_opener(is_cookie=True, proxies=proxies)
47+
self.opener.addheaders = spider.make_headers(
48+
user_agent="pc",
49+
host="weibo.com",
50+
referer="http://weibo.com/",
51+
accept="text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
52+
accept_encoding="gzip, deflate",
53+
accept_language="zh-CN,zh;q=0.8"
54+
).items()
55+
56+
# (1) 打开weibo.com/login.php,先请求一些必要的cookie信息
57+
self.opener.open("http://weibo.com/login.php")
58+
59+
# (2) 根据用户名获取加密后的用户名
60+
s_user_name = self.get_username()
61+
62+
# (3) 利用加密后的用户名,获取其他一些数据:json格式
63+
json_data = self.get_json_data(su_value=s_user_name)
64+
if not json_data:
65+
return False
66+
67+
# (4) 根据第三步得到的json数据,获取加密后的密码
68+
s_pass_word = self.get_password(json_data["servertime"], json_data["nonce"], json_data["pubkey"])
69+
70+
# (5) 构造登录中用到的postdata
71+
post_dict = {
72+
"entry": "weibo",
73+
"gateway": "1",
74+
"from": "",
75+
"savestate": "7",
76+
"userticket": "1",
77+
"vsnf": "1",
78+
"service": "miniblog",
79+
"encoding": "UTF-8",
80+
"pwencode": "rsa2",
81+
"sr": "1280*800",
82+
"prelt": "529",
83+
"url": "http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack",
84+
"rsakv": json_data["rsakv"],
85+
"servertime": json_data["servertime"],
86+
"nonce": json_data["nonce"],
87+
"su": s_user_name,
88+
"sp": s_pass_word,
89+
"returntype": "TEXT",
90+
}
91+
92+
# (6) 判断是否需要输入验证码,如果需要,获取验证码并进行打码操作
93+
if json_data.get("showpin", None) == 1:
94+
url = "http://login.sina.com.cn/cgi/pin.php?r=%d&s=0&p=%s" % (int(time.time()), json_data["pcid"])
95+
with open("captcha.jpeg", "wb") as file_out:
96+
file_out.write(self.opener.open(url).read())
97+
code = input("请输入验证码:")
98+
# cid, code = self.yundama.get_captcha(self.opener.open(url).read(), "captcha.jpeg", "image/jpeg", codetype="1005")
99+
# if not code:
100+
# return False
101+
post_dict["pcid"] = json_data["pcid"]
102+
post_dict["door"] = code
103+
104+
# (7) 根据构造的postdata,登录微博
105+
login_url_1 = "http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)&_=%d" % int(time.time())
106+
json_data_1 = json.loads(spider.get_html_content(self.opener.open(login_url_1, data=spider.make_post_data(post_dict))))
107+
if json_data_1["retcode"] == "0":
108+
# 登录后有一个跳转, 构造跳转链接的postdata
109+
post_dict = {
110+
"callback": "sinaSSOController.callbackLoginStatus",
111+
"ticket": json_data_1["ticket"],
112+
"ssosavestate": int(time.time()),
113+
"client": "ssologin.js(v1.4.18)",
114+
"_": int(time.time()*1000),
115+
}
116+
login_url_2 = "https://passport.weibo.com/wbsso/login?" + urllib.parse.urlencode(post_dict)
117+
html_data = spider.get_html_content(self.opener.open(login_url_2), charset="gbk")
118+
json_data_2 = json.loads(re.search("\((?P<result>.*)\)", html_data).group("result"))
119+
120+
# 检查登录是否成功,并获取用户唯一ID,用户昵称等
121+
if json_data_2["result"] is True:
122+
self.user_uniqueid = json_data_2["userinfo"]["uniqueid"]
123+
self.user_nick = json_data_2["userinfo"]["displayname"]
124+
logging.warning("WeiBoLogin succeed: %s", json_data_2)
125+
else:
126+
logging.warning("WeiBoLogin failed: %s", json_data_2)
127+
else:
128+
logging.warning("WeiBoLogin failed: %s", json_data_1)
129+
return True if self.user_uniqueid and self.user_nick else False
130+
131+
def get_username(self):
132+
"""
133+
get username, encrypt file: http://tjs.sjs.sinajs.cn/t5/register/js/page/remote/loginLayer.js
134+
"""
135+
username_quote = urllib.parse.quote_plus(self.user_name)
136+
username_base64 = base64.b64encode(username_quote.encode("utf-8"))
137+
return username_base64.decode("utf-8")
138+
139+
def get_json_data(self, su_value):
140+
"""
141+
get the value of "servertime", "nonce", "pubkey", "rsakv" and "showpin", etc
142+
"""
143+
post_data = urllib.parse.urlencode({
144+
"entry": "weibo",
145+
"callback": "sinaSSOController.preloginCallBack",
146+
"rsakt": "mod",
147+
"checkpin": "1",
148+
"client": "ssologin.js(v1.4.18)",
149+
"su": su_value,
150+
"_": int(time.time()*1000),
151+
})
152+
153+
try:
154+
response = self.opener.open('http://login.sina.com.cn/sso/prelogin.php?'+post_data)
155+
data = spider.get_html_content(response, charset="utf-8")
156+
json_data = json.loads(re.search("\((?P<data>.*)\)", data).group("data"))
157+
except Exception as excep:
158+
json_data = {}
159+
logging.error("WeiBoLogin get_json_data error: %s", excep)
160+
161+
logging.debug("WeiBoLogin get_json_data: %s", json_data)
162+
return json_data
163+
164+
def get_password(self, servertime, nonce, pubkey):
165+
"""
166+
get legal password, encrypt file: http://tjs.sjs.sinajs.cn/t5/register/js/page/remote/loginLayer.js
167+
"""
168+
string = (str(servertime) + '\t' + str(nonce) + '\n' + str(self.pass_word)).encode("utf-8")
169+
public_key = rsa.PublicKey(int(pubkey, 16), int("10001", 16))
170+
password = rsa.encrypt(string, public_key)
171+
password = binascii.b2a_hex(password)
172+
return password.decode()
173+
174+
175+
if __name__ == '__main__':
176+
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s\t%(levelname)s\t%(message)s")
177+
# 测试登录,输入微博的用户名和密码
178+
weibo = WeiBoLogin()
179+
weibo.login("username", "password")

0 commit comments

Comments
 (0)