Skip to content

Commit

Permalink
[3.8] gh-115197: Stop resolving host in urllib.request proxy bypass (G…
Browse files Browse the repository at this point in the history
…H-115210) (GH-116069)

Use of a proxy is intended to defer DNS for the hosts to the proxy itself, rather than a potential for information leak of the host doing DNS resolution itself for any reason.  Proxy bypass lists are strictly name based.  Most implementations of proxy support agree.
(cherry picked from commit c43b26d)

Co-authored-by: Weii Wang <[email protected]>
  • Loading branch information
miss-islington and weiiwang01 authored Mar 19, 2024
1 parent 854f645 commit 41be376
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 44 deletions.
29 changes: 27 additions & 2 deletions Lib/test/test_urllib2.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@
import subprocess

import urllib.request
# The proxy bypass method imported below has logic specific to the OSX
# proxy config data structure but is testable on all platforms.
# The proxy bypass method imported below has logic specific to the
# corresponding system but is testable on all platforms.
from urllib.request import (Request, OpenerDirector, HTTPBasicAuthHandler,
HTTPPasswordMgrWithPriorAuth, _parse_proxy,
_proxy_bypass_winreg_override,
_proxy_bypass_macosx_sysconf,
AbstractDigestAuthHandler)
from urllib.parse import urlparse
Expand Down Expand Up @@ -1438,6 +1439,30 @@ def test_proxy_https_proxy_authorization(self):
self.assertEqual(req.host, "proxy.example.com:3128")
self.assertEqual(req.get_header("Proxy-authorization"), "FooBar")

@unittest.skipUnless(os.name == "nt", "only relevant for Windows")
def test_winreg_proxy_bypass(self):
proxy_override = "www.example.com;*.example.net; 192.168.0.1"
proxy_bypass = _proxy_bypass_winreg_override
for host in ("www.example.com", "www.example.net", "192.168.0.1"):
self.assertTrue(proxy_bypass(host, proxy_override),
"expected bypass of %s to be true" % host)

for host in ("example.com", "www.example.org", "example.net",
"192.168.0.2"):
self.assertFalse(proxy_bypass(host, proxy_override),
"expected bypass of %s to be False" % host)

# check intranet address bypass
proxy_override = "example.com; <local>"
self.assertTrue(proxy_bypass("example.com", proxy_override),
"expected bypass of %s to be true" % host)
self.assertFalse(proxy_bypass("example.net", proxy_override),
"expected bypass of %s to be False" % host)
for host in ("test", "localhost"):
self.assertTrue(proxy_bypass(host, proxy_override),
"expect <local> to bypass intranet address '%s'"
% host)

@unittest.skipUnless(sys.platform == 'darwin', "only relevant for OSX")
def test_osx_proxy_bypass(self):
bypass = {
Expand Down
77 changes: 35 additions & 42 deletions Lib/urllib/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -2572,6 +2572,7 @@ def _proxy_bypass_macosx_sysconf(host, proxy_settings):
}
"""
from fnmatch import fnmatch
from ipaddress import AddressValueError, IPv4Address

hostonly, port = _splitport(host)

Expand All @@ -2588,20 +2589,17 @@ def ip2num(ipAddr):
return True

hostIP = None
try:
hostIP = int(IPv4Address(hostonly))
except AddressValueError:
pass

for value in proxy_settings.get('exceptions', ()):
# Items in the list are strings like these: *.local, 169.254/16
if not value: continue

m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
if m is not None:
if hostIP is None:
try:
hostIP = socket.gethostbyname(hostonly)
hostIP = ip2num(hostIP)
except OSError:
continue

if m is not None and hostIP is not None:
base = ip2num(m.group(1))
mask = m.group(2)
if mask is None:
Expand All @@ -2624,6 +2622,31 @@ def ip2num(ipAddr):
return False


# Same as _proxy_bypass_macosx_sysconf, testable on all platforms
def _proxy_bypass_winreg_override(host, override):
"""Return True if the host should bypass the proxy server.
The proxy override list is obtained from the Windows
Internet settings proxy override registry value.
An example of a proxy override value is:
"www.example.com;*.example.net; 192.168.0.1"
"""
from fnmatch import fnmatch

host, _ = _splitport(host)
proxy_override = override.split(';')
for test in proxy_override:
test = test.strip()
# "<local>" should bypass the proxy server for all intranet addresses
if test == '<local>':
if '.' not in host:
return True
elif fnmatch(host, test):
return True
return False


if sys.platform == 'darwin':
from _scproxy import _get_proxy_settings, _get_proxies

Expand Down Expand Up @@ -2718,7 +2741,7 @@ def proxy_bypass_registry(host):
import winreg
except ImportError:
# Std modules, so should be around - but you never know!
return 0
return False
try:
internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
Expand All @@ -2728,40 +2751,10 @@ def proxy_bypass_registry(host):
'ProxyOverride')[0])
# ^^^^ Returned as Unicode but problems if not converted to ASCII
except OSError:
return 0
return False
if not proxyEnable or not proxyOverride:
return 0
# try to make a host list from name and IP address.
rawHost, port = _splitport(host)
host = [rawHost]
try:
addr = socket.gethostbyname(rawHost)
if addr != rawHost:
host.append(addr)
except OSError:
pass
try:
fqdn = socket.getfqdn(rawHost)
if fqdn != rawHost:
host.append(fqdn)
except OSError:
pass
# make a check value list from the registry entry: replace the
# '<local>' string by the localhost entry and the corresponding
# canonical entry.
proxyOverride = proxyOverride.split(';')
# now check if we match one of the registry values.
for test in proxyOverride:
if test == '<local>':
if '.' not in rawHost:
return 1
test = test.replace(".", r"\.") # mask dots
test = test.replace("*", r".*") # change glob sequence
test = test.replace("?", r".") # change glob char
for val in host:
if re.match(test, val, re.I):
return 1
return 0
return False
return _proxy_bypass_winreg_override(host, proxyOverride)

def proxy_bypass(host):
"""Return True, if host should be bypassed.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
``urllib.request`` no longer resolves the hostname before checking it
against the system's proxy bypass list on macOS and Windows.

0 comments on commit 41be376

Please sign in to comment.