-
- Notifications
You must be signed in to change notification settings - Fork 9.5k
Description
make requests respect no_proxy settings
bugfix attached
Expected Result
http requests to 'white listed urls' should bypass all proxies
white listed urls, as defined in the no_proxy env var
Actual Result
proxies are not bypassed
the sample script will raise
requests.exceptions.ConnectionError: SOCKSHTTPConnectionPool ....: Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.contrib.socks.SOCKSConnection object at ....>: Failed to establish a new connection: 0x01: General SOCKS server failure',))
Reproduction Steps
use case: torify python requests, but also allow requests to localhost etc.
sample script
#!/usr/bin/python2 # license = public domain import os import random import time import requests import BaseHTTPServer import thread import bs4 tor_host = '127.0.0.1' #tor_port = 9050 # system-wide tor tor_port = 9150 # torbrowser tor # do not use tor to connect to local or private hosts # see https://en.wikipedia.org/wiki/Reserved_IP_addresses no_proxy_list = [ # hostnames are not resolved locally with socks5h proxy 'localhost', 'localhost.localdomain', # IPv4 '127.0.0.0/8', # localhost # subnets '169.254.0.0/16', '255.255.255.255', # LAN aka private networks '10.0.0.0/8', '100.64.0.0/10', '172.16.0.0/12', '192.0.0.0/24', '192.168.0.0/16', '198.18.0.0/15', # IPv6 '::1/128', # localhost 'fc00::/7', # LAN 'fe80::/10', # link-local ] # variant 1 os.environ['no_proxy'] = ','.join(no_proxy_list) def get_tor_session(tor_host='127.0.0.1', tor_port=9050, torbrowser_headers=[], no_proxy_list=[]): session = requests.session() # variant 1 session.trust_env = True #session.trust_env = False # ignore environment variables # socks5h scheme = remote DNS = no DNS leaks p = 'socks5h://{0}:{1}'.format(tor_host, tor_port) session.proxies = { 'http' : p, 'https': p, # variant 2 'no': ','.join(no_proxy_list) } if torbrowser_headers == []: print('warning. got no torbrowser_headers') # at least imitate torbrowser from year 2018 torbrowser_headers = [ ('accept-language', 'en-US,en;q=0.5'), ('accept', 'text/html,application/xhtml+xml,' \ + 'application/xml;q=0.9,*/*;q=0.8'), ('user-agent', 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) ' \ + 'Gecko/20100101 Firefox/60.0'), ('upgrade-insecure-requests', '1'), ] for k, v in torbrowser_headers: # header 'host' is dynamic # header 'connection' = 'keep-alive' is set internally if k not in ['host', 'connection']: session.headers[k] = v return session tor = get_tor_session(tor_host, tor_port, [], no_proxy_list) test_host = '127.0.0.1' test_port = random.randint(8000, 16000) test_url = 'http://{0}:{1}/'.format(test_host, test_port) def test_tor_get(test_url): time.sleep(2) # wait for http server to start tor.get(test_url) thread.start_new_thread(test_tor_get, (test_url,)) test_headers = [] # global class test_handler(BaseHTTPServer.BaseHTTPRequestHandler): def do_GET(self): # handle GET request global test_headers test_headers = self.headers.items() self.send_response(204, 'No Content') self.end_headers() serv = BaseHTTPServer.HTTPServer((test_host, test_port), test_handler) serv.handle_request() # handle one request del serv print('tor.get headers') for k, v in test_headers: print('header %s: %s' % (k, v)) #print('tor ip '+tor.get("http://httpbin.org/ip").text) print('tor check ' + \ bs4.BeautifulSoup( tor.get("https://check.torproject.org/").text, 'html.parser' ).title.string.strip())
System Information
- python2
- current git-version of requests
Bugfix Quickfix
the bug is in sessions.py
proxies = merge_setting(proxies, self.proxies)
where [request_]proxies was set to {} by utils.get_environ_proxies *
but proxies is set to session_proxies
* with os.environ['no_proxy'] = '127.0.0.1'
this bugfix will respect both
- no_proxy environment variable aka os.environ['no_proxy']
- proxies['no'] parameter for requests.get and requests.session
patch
--- a/utils.py +++ b/utils.py @@ -757,7 +757,7 @@ :rtype: dict """ if should_bypass_proxies(url, no_proxy=no_proxy): - return {} + return {'__bypass_proxies': True} else: return getproxies() --- a/sessions.py +++ b/sessions.py @@ -698,8 +698,15 @@ verify = (os.environ.get('REQUESTS_CA_BUNDLE') or os.environ.get('CURL_CA_BUNDLE')) + if 'no' in self.proxies: + if should_bypass_proxies(url, no_proxy=self.proxies['no']): + proxies = {'__bypass_proxies': True} + # Merge all the kwargs. - proxies = merge_setting(proxies, self.proxies) + if '__bypass_proxies' in proxies: + proxies = {} # bypass proxies for this request + else: + proxies = merge_setting(proxies, self.proxies) stream = merge_setting(stream, self.stream) verify = merge_setting(verify, self.verify) cert = merge_setting(cert, self.cert)