Skip to content

Commit 6ba9d92

Browse files
committed
Merge remote-tracking branch 'xianhu/master'
2 parents 5eefad4 + bccf920 commit 6ba9d92

File tree

5 files changed

+271
-5
lines changed

5 files changed

+271
-5
lines changed

README.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# LearnPython
22
以撸代码的形式学习Python, 具体说明在[知乎专栏-撸代码,学知识](https://zhuanlan.zhihu.com/pythoner)
33

4-
===============================================================================
4+
===================================================================================================
55
### python_base.py: 千行代码入门Python
66

77
### python_visual.py: 15张图入门Matplotlib
@@ -23,6 +23,12 @@
2323
### python_decorator.py: Python进阶: 通过实例详解装饰器(附代码)
2424

2525
### python_datetime.py: 你真的了解Python中的日期时间处理吗?
26-
===============================================================================
26+
27+
### python_metaclass.py: Python进阶: 一步步理解Python中的元类metaclass
28+
29+
### python_coroutine.py: Python进阶:理解Python中的异步IO和协程(Coroutine), 并应用在爬虫中
30+
31+
### python_aiohttp.py: Python中最好用的异步爬虫库Aiohttp代码实例
32+
===================================================================================================
2733

2834
### 您可以fork该项目,并在修改后提交Pull request

python_aiohttp.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
# _*_ coding: utf-8 _*_
2+
3+
"""
4+
python_aiohttp.py by xianhu
5+
"""
6+
7+
import asyncio
8+
import aiohttp
9+
10+
11+
# 简单实例
12+
async def aiohttp_test01(url):
13+
async with aiohttp.ClientSession() as session:
14+
async with session.get(url) as resp:
15+
print(resp.status)
16+
print(await resp.text())
17+
18+
loop = asyncio.get_event_loop()
19+
tasks = [aiohttp_test01("https://api.github.com/events")]
20+
loop.run_until_complete(asyncio.wait(tasks))
21+
loop.close()
22+
23+
# 其他Http方法
24+
# session.post('http://httpbin.org/post', data=b'data')
25+
# session.put('http://httpbin.org/put', data=b'data')
26+
# session.delete('http://httpbin.org/delete')
27+
# session.head('http://httpbin.org/get')
28+
# session.options('http://httpbin.org/get')
29+
# session.patch('http://httpbin.org/patch', data=b'data')
30+
31+
# 自定义Headers
32+
# payload = {'some': 'data'}
33+
# headers = {'content-type': 'application/json'}
34+
# await session.post(url, data=json.dumps(payload), headers=headers)
35+
36+
# 自定义Cookie
37+
# cookies = {'cookies_are': 'working'}
38+
# async with ClientSession(cookies=cookies) as session:
39+
# 访问Cookie: session.cookie_jar
40+
41+
# 在URLs中传递参数
42+
# 1. params = {'key1': 'value1', 'key2': 'value2'}
43+
# 2. params = [('key', 'value1'), ('key', 'value2')]
44+
# async with session.get('http://httpbin.org/get', params=params) as resp:
45+
# assert resp.url == 'http://httpbin.org/get?key2=value2&key1=value1'
46+
47+
# 发送数据
48+
# payload = {'key1': 'value1', 'key2': 'value2'}
49+
# async with session.post('http://httpbin.org/post', data=payload) as resp:
50+
# async with session.post(url, data=json.dumps(payload)) as resp:
51+
# print(await resp.text())
52+
53+
# 发送文件(1)
54+
# files = {'file': open('report.xls', 'rb')}
55+
# await session.post(url, data=files)
56+
57+
# 发送数据(2)
58+
# data = FormData()
59+
# data.add_field('file',
60+
# open('report.xls', 'rb'),
61+
# filename='report.xls',
62+
# content_type='application/vnd.ms-excel')
63+
# await session.post(url, data=data)
64+
65+
# 超时设置
66+
# aync with session.get('https://github.com', timeout=60) as r:
67+
68+
# 代理支持
69+
# async with aiohttp.ClientSession() as session:
70+
# async with session.get("http://python.org", proxy="http://some.proxy.com") as resp:
71+
# print(resp.status)
72+
73+
# async with aiohttp.ClientSession() as session:
74+
# proxy_auth = aiohttp.BasicAuth('user', 'pass')
75+
# async with session.get("http://python.org", proxy="http://some.proxy.com", proxy_auth=proxy_auth) as resp:
76+
# print(resp.status)
77+
# session.get("http://python.org", proxy="http://user:pass@some.proxy.com")
78+
79+
# 返回的内容
80+
# async with session.get('https://api.github.com/events') as resp:
81+
# print(await resp.text())
82+
# print(await resp.text(encoding='gbk'))
83+
# print(await resp.read())
84+
# print(await resp.json())
85+
86+
# 返回内容较大
87+
# with open(filename, 'wb') as fd:
88+
# while True:
89+
# chunk = await resp.content.read(chunk_size)
90+
# if not chunk:
91+
# break
92+
# fd.write(chunk)
93+
94+
# 返回的其他变量
95+
# async with session.get('http://httpbin.org/get') as resp:
96+
# print(resp.status) # 状态码
97+
# print(resp.headers) # Headers
98+
# print(resp.raw_headers) # 原始Headers
99+
# print(resp.cookies) # 返回的Cookie
100+
101+
# 访问历史History
102+
# resp = await session.get('http://example.com/some/redirect/')
103+
# resp: <ClientResponse(http://example.com/some/other/url/) [200]>
104+
# resp.history: (<ClientResponse(http://example.com/some/redirect/) [301]>,)
105+
106+
# 释放返回的Response
107+
# 1. async with session.get(url) as resp: pass
108+
# 2. await resp.release()
109+
110+
# 连接器: Connectors
111+
# conn = aiohttp.TCPConnector()
112+
# session = aiohttp.ClientSession(connector=conn)
113+
114+
# 限制连接池大小:
115+
# conn = aiohttp.TCPConnector(limit=30)
116+
# conn = aiohttp.TCPConnector(limit=None)

python_coroutine.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
# _*_ coding: utf-8 _*_
2+
3+
"""
4+
python_coroutine.py by xianhu
5+
"""
6+
7+
import asyncio
8+
import aiohttp
9+
import threading
10+
11+
12+
# 生产者、消费者例子
13+
def consumer(): # 定义消费者,由于有yeild关键词,此消费者为一个生成器
14+
print("[Consumer] Init Consumer ......")
15+
r = "init ok" # 初始化返回结果,并在启动消费者时,返回给生产者
16+
while True:
17+
n = yield r # 消费者通过yield关键词接收生产者产生的消息,同时返回结果给生产者
18+
print("[Consumer] conusme n = %s, r = %s" % (n, r))
19+
r = "consume %s OK" % n # 消费者消费结果,下个循环返回给生产者
20+
21+
22+
def produce(c): # 定义生产者,此时的 c 为一个生成器
23+
print("[Producer] Init Producer ......")
24+
r = c.send(None) # 启动消费者生成器,同时第一次接收返回结果
25+
print("[Producer] Start Consumer, return %s" % r)
26+
n = 0
27+
while n < 5:
28+
n += 1
29+
print("[Producer] While, Producing %s ......" % n)
30+
r = c.send(n) # 向消费者发送消息,同时准备接收结果。此时会切换到消费者执行
31+
print("[Producer] Consumer return: %s" % r)
32+
c.close() # 关闭消费者生成器
33+
print("[Producer] Close Producer ......")
34+
35+
# produce(consumer())
36+
37+
38+
# 异步IO例子:适配Python3.4,使用asyncio库
39+
@asyncio.coroutine
40+
def hello(index): # 通过装饰器asyncio.coroutine定义协程
41+
print('Hello world! index=%s, thread=%s' % (index, threading.currentThread()))
42+
yield from asyncio.sleep(1) # 模拟IO任务
43+
print('Hello again! index=%s, thread=%s' % (index, threading.currentThread()))@asyncio.coroutine
44+
45+
loop = asyncio.get_event_loop() # 得到一个事件循环模型
46+
tasks = [hello(1), hello(2)] # 初始化任务列表
47+
loop.run_until_complete(asyncio.wait(tasks)) # 执行任务
48+
loop.close() # 关闭事件循环列表
49+
50+
51+
# 异步IO例子:适配Python3.5,使用async和await关键字
52+
async def hello1(index): # 通过关键字async定义协程
53+
print('Hello world! index=%s, thread=%s' % (index, threading.currentThread()))
54+
await asyncio.sleep(1) # 模拟IO任务
55+
print('Hello again! index=%s, thread=%s' % (index, threading.currentThread()))
56+
57+
loop = asyncio.get_event_loop() # 得到一个事件循环模型
58+
tasks = [hello1(1), hello1(2)] # 初始化任务列表
59+
loop.run_until_complete(asyncio.wait(tasks)) # 执行任务
60+
loop.close() # 关闭事件循环列表
61+
62+
63+
# aiohttp 实例
64+
async def get(url):
65+
async with aiohttp.ClientSession() as session:
66+
async with session.get(url) as resp:
67+
print(url, resp.status)
68+
print(url, await resp.text())
69+
70+
loop = asyncio.get_event_loop() # 得到一个事件循环模型
71+
tasks = [ # 初始化任务列表
72+
get("http://zhushou.360.cn/detail/index/soft_id/3283370"),
73+
get("http://zhushou.360.cn/detail/index/soft_id/3264775"),
74+
get("http://zhushou.360.cn/detail/index/soft_id/705490")
75+
]
76+
loop.run_until_complete(asyncio.wait(tasks)) # 执行任务
77+
loop.close() # 关闭事件循环列表

python_metaclass.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# _*_ coding: utf-8 _*_
2+
3+
"""
4+
python_metaclass.py by xianhu
5+
"""
6+
7+
8+
class Foo:
9+
def hello(self):
10+
print("hello world!")
11+
return
12+
13+
foo = Foo()
14+
print(type(foo)) # <class '__main__.Foo'>
15+
print(type(foo.hello)) # <class 'method'>
16+
print(type(Foo)) # <class 'type'>
17+
18+
temp = Foo # 赋值给其他变量
19+
Foo.var = 11 # 增加参数
20+
print(Foo) # 作为函数参数
21+
22+
23+
# ========================================================================
24+
def init(self, name):
25+
self.name = name
26+
return
27+
28+
29+
def hello(self):
30+
print("hello %s" % self.name)
31+
return
32+
33+
Foo = type("Foo", (object,), {"__init__": init, "hello": hello, "cls_var": 10})
34+
foo = Foo("xianhu")
35+
print(foo.hello())
36+
print(Foo.cls_var)
37+
38+
print(foo.__class__)
39+
print(Foo.__class__)
40+
print(type.__class__)
41+
# ========================================================================
42+
43+
44+
class Author(type):
45+
def __new__(mcs, name, bases, dict):
46+
# 添加作者属性
47+
dict["author"] = "xianhu"
48+
return super(Author, mcs).__new__(mcs, name, bases, dict)
49+
50+
51+
class Foo(object, metaclass=Author):
52+
pass
53+
54+
foo = Foo()
55+
print(foo.author)

python_requests.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
python_requests.py by xianhu
55
"""
66

7-
import requests
7+
import requests.adapters
88

99
# 不同方式获取网页内容, 返回一个Response对象, 请求的参数可以为url或Request对象
1010
r0 = requests.get("https://github.com/timeline.json")
@@ -181,7 +181,7 @@
181181
# 若请求超过了设定的最大重定向次数, 则会抛出一个 TooManyRedirects 异常
182182
# 所有Requests显式抛出的异常都继承自 requests.exceptions.RequestException
183183

184-
# 所有异常
184+
# 所有异常:
185185
# exception requests.RequestException(*args, **kwargs): There was an ambiguous exception that occurred while handling your request.
186186
# exception requests.ConnectionError(*args, **kwargs): A Connection error occurred.
187187
# exception requests.HTTPError(*args, **kwargs): An HTTP error occurred.
@@ -223,13 +223,25 @@ def print_url(resp):
223223
requests.get("http://example.org", proxies=proxies)
224224
# 若代理需要使用HTTP Basic Auth, 可以使用http://user:password@host:port/, 比如"http": "http://user:pass@10.10.1.10:3128/"
225225

226-
# 除了基本的 HTTP 代理, Request 还支持 SOCKS 协议的代理
226+
# 除了基本的 HTTP 代理, Request 还支持 SOCKS 协议的代理, 此时需要单独安装:
227227
# $ pip install requests[socks]
228228
proxies = {
229229
"http": "socks5://user:pass@host:port",
230230
"https": "socks5://user:pass@host:port"
231231
}
232232
requests.get("http://example.org", proxies=proxies)
233233

234+
# Requests 传输适配器
235+
# 从 v1.0.0 以后,Requests 的内部采用了模块化设计。部分原因是为了实现传输适配器(Transport Adapter)。
236+
# 传输适配器提供了一个机制,让你可以为 HTTP 服务定义交互方法。尤其是它允许你应用服务前的配置。
237+
# Requests 自带了一个传输适配器,也就是 HTTPAdapter。 这个适配器使用了强大的 urllib3,为 Requests 提供了默认的 HTTP 和 HTTPS 交互。
238+
# 每当 Session 被初始化,就会有适配器附着在 Session 上,其中一个供 HTTP 使用,另一个供 HTTPS 使用。
239+
# Request 允许用户创建和使用他们自己的传输适配器,实现他们需要的特殊功能。创建好以后,传输适配器可以被加载到一个会话对象上,附带着一个说明,告诉会话适配器应该应用在哪个 web 服务上。
240+
s = requests.Session()
241+
s.mount("http://baidu.com", requests.adapters.HTTPAdapter())
242+
243+
# 出现错误: Connection pool is full, discarding connection: xxxx.com
244+
s.mount('https://', requests.adapters.HTTPAdapter(pool_connections=100, pool_maxsize=100))
245+
234246
# 关闭InsecurePlatformWarning
235247
# requests.packages.urllib3.disable_warnings()

0 commit comments

Comments
 (0)