fix: Correctly create origin from URL in CORS CSRF middleware (#29366)

Deleting all instances of the path from the URL meant that referers like
`https://learning.edx.org/` were turned into `https:learning.edx.org`. The
solution here is to use `urlunparse` to put the URL back together, but only
with the desired components (scheme and authority/netloc).

This relates to our previous upgrade to django-cors-headers 3.x, which
changed to use origins instead of domains in its whitelist setting:

36df86d829 (diff-811d60a3e1d60ff694eace0242e77d6b810d8e9c63c36d7b3c2591a08ebbb94bR58)

Added regression test (fails on master, passes on branch.)

Also:

- Replace word "domain" with "origin" in few places to use the correct
  term. (We should probably change this more broadly in names and comments
  in this module as some point.)
- Simplify logging to just output what we know, and not try to recapitulate
  the logic too much.

ref: BOM-2961
This commit is contained in:
Tim McCormack
2021-11-22 17:49:57 +00:00
committed by GitHub
parent 74bda16638
commit 4efd2d161a
2 changed files with 18 additions and 29 deletions

View File

@@ -27,7 +27,6 @@ def is_cross_domain_request_allowed(request):
"""
referer = request.META.get('HTTP_REFERER')
referer_parts = urllib.parse.urlparse(referer) if referer else None
referer_hostname = referer_parts.hostname if referer_parts is not None else None
# Use CORS_ALLOW_INSECURE *only* for development and testing environments;
# it should never be enabled in production.
@@ -48,36 +47,25 @@ def is_cross_domain_request_allowed(request):
log.debug("Referer '%s' must have the scheme 'https'")
return False
scheme_with_host = referer
# if url is like `https://www.foo.bar/baz/` following check will return `https://www.foo.bar`
if referer and referer_parts.scheme and referer_parts.path:
scheme_with_host = referer.replace(referer_parts.path, '')
# Reduce the referer URL to just the scheme and authority
# components (no path, query, or fragment).
if referer_parts:
origin_parts = (referer_parts.scheme, referer_parts.netloc, '', '', '', '')
referer_origin = urllib.parse.urlunparse(origin_parts)
else:
referer_origin = None
domain_is_whitelisted = (
getattr(settings, 'CORS_ORIGIN_ALLOW_ALL', False) or
scheme_with_host in getattr(settings, 'CORS_ORIGIN_WHITELIST', [])
allow_all = getattr(settings, 'CORS_ORIGIN_ALLOW_ALL', False)
origin_is_whitelisted = (
allow_all or
referer_origin in getattr(settings, 'CORS_ORIGIN_WHITELIST', [])
)
if not domain_is_whitelisted:
if referer_hostname is None:
# If no referer is specified, we can't check if it's a cross-domain
# request or not.
log.debug("Referrer hostname is `None`, so it is not on the whitelist.")
elif referer_hostname != request.get_host():
log.info(
(
"Domain '%s' is not on the cross domain whitelist. "
"Add the domain to `CORS_ORIGIN_WHITELIST` or set "
"`CORS_ORIGIN_ALLOW_ALL` to True in the settings."
), referer_hostname
)
log.info("Request host is '%s' and referer is '%s'", request.get_host(), referer)
else:
log.debug(
(
"Domain '%s' is the same as the hostname in the request, "
"so we are not going to treat it as a cross-domain request."
), referer_hostname
)
if not origin_is_whitelisted:
log.info(
f"Origin {referer_origin!r} was not in `CORS_ORIGIN_WHITELIST`; "
f"full referer was {referer!r} and requested host was {request.get_host()!r}; "
f"CORS_ORIGIN_ALLOW_ALL={allow_all}"
)
return False
return True

View File

@@ -69,6 +69,7 @@ class TestCorsMiddlewareProcessRequest(TestCase):
'https://foo.com', 'https://www.foo.com', 'https://learning.edge.foo.bar']
)
@ddt.data(
'https://foo.com', 'https://foo.com/',
'https://foo.com/bar/', 'https://foo.com/bar/baz/', 'https://www.foo.com/bar/baz/',
'https://learning.edge.foo.bar', 'https://learning.edge.foo.bar/foo'
)