Dark launch: Fix site shadowing and 1+N queries in third party auth config fetching (#23824)

This performs a dark launch compare of the existing implementation (still in use) for fetching TPA provider configs and a new implementation, recording metrics on exceptions and mismatches. The new implementation should have two benefits, once we're switched over: - Fix 1+N queries on login page view where the site for each config was fetched in a loop (ARCHBOM-1139) - Don't allow configs with the same key on different sites to interfere with each other (regression test added) The new impl does not use TieredCache, but only the request cache, which we may want to adjust later.
2020-05-06 09:54:32 -04:00
parent 4f32e5728b
commit 79420640d5
4 changed files with 179 additions and 3 deletions
--- a/common/djangoapps/third_party_auth/provider.py
+++ b/common/djangoapps/third_party_auth/provider.py
@@ -4,8 +4,11 @@ Third-party auth provider configuration API.


 from django.contrib.sites.models import Site
+import edx_django_utils.monitoring as monitoring_utils
+from more_itertools import unique_everseen

 from openedx.core.djangoapps.theming.helpers import get_current_request
+from openedx.core.lib.cache_utils import request_cached

 from .models import (
    _LTI_BACKENDS,
@@ -17,6 +20,29 @@ from .models import (
    SAMLProviderConfig
 )

+CACHE_NAMESPACE = 'third_party_auth.provider'
+
+
+@request_cached(namespace=CACHE_NAMESPACE)
+def current_configs_for_site(provider_cls, site_id):
+    """
+    Like ConfigurationModel.current() but returns the current config in every
+    partition as delineated by KEY_FIELDS, but only looking at configs in the
+    specified site.
+
+    This is a hack to support these provider configs, which do not include site
+    in their KEY_FIELDS but need to be partitioned on site. See comment in
+    ``_enabled_providers`` for more details.
+    """
+
+    def partition_key(obj):
+        """Compute the partition key for this object."""
+        return tuple(getattr(obj, k) for k in provider_cls.KEY_FIELDS)
+
+    configs = provider_cls.objects.filter(site_id=str(site_id))
+    # Get the most recent record in each partition
+    return unique_everseen(configs.order_by('-change_date'), partition_key)
+

 class Registry(object):
    """
@@ -27,9 +53,40 @@ class Registry(object):
    @classmethod
    def _enabled_providers(cls):
        """
-        Helper method that returns a generator used to iterate over all providers
-        of the current site.
+        Generator returning all enabled providers of the current site.
+
+        Provider configurations are partitioned on site + some key (backend
+        name in the case of OAuth, slug for SAML, and consumer key for LTI).
        """
+        try:
+            old_values = set(cls._enabled_providers_old())
+        except:
+            # Make sure we have an error baseline to compare to
+            monitoring_utils.increment("temp_tpa_enabled_all_dark_launch_old_threw")
+            raise
+
+        # Dark launch call and metrics
+        try:
+            new_values = set(cls._enabled_providers_new())
+
+            try:
+                if old_values != new_values:
+                    data = "old[%s]new[%s]" % (
+                        ','.join([str(p.id) for p in old_values]),
+                        ','.join([str(p.id) for p in new_values]))
+                    monitoring_utils.set_custom_metric("temp_tpa_enabled_all_dark_launch_mismatch", data)
+            except:  # pylint: disable=bare-except
+                pass
+        except:  # pylint: disable=bare-except
+            monitoring_utils.increment("temp_tpa_enabled_all_dark_launch_new_threw")
+
+        # Could return old_values, but lets keep it a generator for consistency
+        for config in old_values:
+            yield config
+
+    @classmethod
+    def _enabled_providers_old(cls):
+        """Old implementation of _enabled_providers"""
        oauth2_backend_names = OAuth2ProviderConfig.key_values('backend_name', flat=True)
        for oauth2_backend_name in oauth2_backend_names:
            provider = OAuth2ProviderConfig.current(oauth2_backend_name)
@@ -46,6 +103,45 @@ class Registry(object):
            if provider.enabled_for_current_site and provider.backend_name in _LTI_BACKENDS:
                yield provider

+    @classmethod
+    def _enabled_providers_new(cls):
+        """New implementation of _enabled_providers for dark launch"""
+        site = Site.objects.get_current(get_current_request())
+
+        # Note that site is added as an explicit filter. 'site_id' isn't in the
+        # KEY_FIELDS for these models, but it should be, since we want "the most
+        # recent version of the config" to be most recent for a given key *and*
+        # site. Since site is not in KEY_FIELDS, looking up a config by just its
+        # key could result in finding a config for a different site. (The
+        # previous code here would then have returned nothing, having had a
+        # site-filtering step.) In short, we need to be careful not to let two
+        # configs belonging to two different sites but with the same key
+        # "shadow" one another.
+        #
+        # It's not clear if we can safely add site_id to the KEY_FIELDS, and
+        # it's also not clear if we're even going to want to keep support for
+        # sites here long term, so in the meantime we just only ask for
+        # configs within the current request's site.
+
+        # Get all OAuth2 provider configs for this site...
+        for config in current_configs_for_site(OAuth2ProviderConfig, site.pk):
+            # Ignore the config if it's disabled or we don't have a backend
+            # class for it. (Not having a backend class for a config should
+            # be pretty rare if it happens at all.)
+            if config.enabled and config.backend_name in _PSA_OAUTH2_BACKENDS:
+                yield config
+
+        if SAMLConfiguration.is_enabled(site, 'default'):
+            # ...followed by SAML configs, if feature is enabled...
+            for config in current_configs_for_site(SAMLProviderConfig, site.pk):
+                if config.enabled and config.backend_name in _PSA_SAML_BACKENDS:
+                    yield config
+
+        # ...and finally LTI configs
+        for config in current_configs_for_site(LTIProviderConfig, site.pk):
+            if config.enabled and config.backend_name in _LTI_BACKENDS:
+                yield config
+
    @classmethod
    def enabled(cls):
        """Returns list of enabled providers."""
@@ -113,6 +209,36 @@ class Registry(object):
        Yields:
            Instances of ProviderConfig.
        """
+        try:
+            old_values = set(cls._get_enabled_by_backend_name_old(backend_name))
+        except:
+            # Make sure we have an error baseline to compare to
+            monitoring_utils.increment("temp_tpa_by_backend_dark_launch_old_threw")
+            raise
+
+        # Dark launch call and metrics
+        try:
+            new_values = set(cls._get_enabled_by_backend_name_new(backend_name))
+
+            try:
+                if old_values != new_values:
+                    data = "backend[%s]old[%s]new[%s]" % (
+                        backend_name,
+                        ','.join([str(p.id) for p in old_values]),
+                        ','.join([str(p.id) for p in new_values]))
+                    monitoring_utils.set_custom_metric("temp_tpa_by_backend_dark_launch_mismatch", data)
+            except:  # pylint: disable=bare-except
+                pass
+        except:  # pylint: disable=bare-except
+            monitoring_utils.increment("temp_tpa_by_backend_dark_launch_new_threw")
+
+        # Could return old_values, but lets keep it a generator for consistency
+        for config in old_values:
+            yield config
+
+    @classmethod
+    def _get_enabled_by_backend_name_old(cls, backend_name):
+        """Old implementation of get_enabled_by_backend_name"""
        if backend_name in _PSA_OAUTH2_BACKENDS:
            oauth2_backend_names = OAuth2ProviderConfig.key_values('backend_name', flat=True)
            for oauth2_backend_name in oauth2_backend_names:
@@ -131,3 +257,10 @@ class Registry(object):
                provider = LTIProviderConfig.current(consumer_key)
                if provider.backend_name == backend_name and provider.enabled_for_current_site:
                    yield provider
+
+    @classmethod
+    def _get_enabled_by_backend_name_new(cls, backend_name):
+        """New implementation of get_enabled_by_backend_name for dark launch"""
+        for provider in cls._enabled_providers():
+            if provider.backend_name == backend_name:
+                yield provider
--- a/common/djangoapps/third_party_auth/tests/test_provider.py
+++ b/common/djangoapps/third_party_auth/tests/test_provider.py
@@ -147,6 +147,48 @@ class RegistryTest(testutil.TestCase):
        prov = self.configure_google_provider(visible=True, enabled=True, site=site_b)
        self.assertEqual(prov.enabled_for_current_site, False)

+    @with_site_configuration(SITE_DOMAIN_A)
+    @patch('edx_django_utils.monitoring.set_custom_metric')
+    def test_providers_with_same_key_independent_across_sites(self, mock_set_custom_metric):
+        """
+        Verify that having two providers configured with the same key
+        but for different sites do not shadow each other on retrieval.
+        """
+        site_a = Site.objects.get_or_create(domain=SITE_DOMAIN_A, name=SITE_DOMAIN_A)[0]
+        site_b = Site.objects.get_or_create(domain=SITE_DOMAIN_B, name=SITE_DOMAIN_B)[0]
+
+        self.enable_saml(site=site_a)
+        self.enable_saml(site=site_b)
+        configs = [
+            self.configure_saml_provider(enabled=True, site=site_b, slug="x", name="Site B first"),
+            self.configure_saml_provider(enabled=True, site=site_a, slug="x", name="Site A first"),
+            self.configure_saml_provider(enabled=True, site=site_a, slug="x", name="Site A second"),
+            self.configure_saml_provider(enabled=True, site=site_b, slug="x", name="Site B second"),
+        ]
+
+        # If sites are not partitioned in retrieval, a site_b record can appear
+        # to be the "current" config, and then be ignored by site_a (resulting
+        # in zero enabled providers for site_a).
+
+        # TODO(ARCHBOM-1139) Uncomment when finished with dark launch in provider.py
+        # self.assertEqual([p.name for p in provider.Registry.enabled()], ["Site A second"])
+        # Until then, just run the generator to its end and show that the dark launch would complain here
+        list(provider.Registry.enabled())
+        mock_set_custom_metric.assert_any_call('temp_tpa_enabled_all_dark_launch_mismatch',
+                                               'old[]new[%s]' % configs[2].id)
+
+    def test_mixed_providers(self):
+        """
+        Verify that multiple providers types can be configured at same time.
+        """
+        self.enable_saml()
+        self.configure_saml_provider(enabled=True, slug="saml-slug", name="Some SAML")
+        self.configure_google_provider(enabled=True)
+        self.configure_lti_provider(enabled=True)
+
+        configs = provider.Registry.enabled()
+        self.assertEqual({p.backend_name for p in configs}, {'tpa-saml', 'google-oauth2', 'lti'})
+
    def test_get_returns_enabled_provider(self):
        google_provider = self.configure_google_provider(enabled=True)
        self.assertEqual(google_provider.id, provider.Registry.get(google_provider.provider_id).id)
--- a/requirements/edx/base.in
+++ b/requirements/edx/base.in
@@ -103,6 +103,7 @@ mailsnake                           # Needed for mailchimp (mailing djangoapp)
 mako==1.0.2                         # Primary template language used for server-side page rendering
 Markdown                            # Convert text markup to HTML; used in capa problems, forums, and course wikis
 mongoengine==0.10.0                 # Object-document mapper for MongoDB, used in the LMS dashboard
+more-itertools                      # Additional utilities for working with iterators
 mysqlclient                         # Driver for the default production relational database
 newrelic                            # New Relic agent for performance monitoring
 nodeenv                             # Utility for managing Node.js environments; we use this for deployments and testing
--- a/requirements/edx/base.txt
+++ b/requirements/edx/base.txt
@@ -160,7 +160,7 @@ maxminddb==1.5.3          # via geoip2
 mock==3.0.5               # via -c requirements/edx/../constraints.txt, -r requirements/edx/paver.txt, xblock-drag-and-drop-v2, xblock-poll
 git+https://github.com/edx/MongoDBProxy.git@d92bafe9888d2940f647a7b2b2383b29c752f35a#egg=MongoDBProxy==0.1.0+edx.2  # via -r requirements/edx/github.in
 mongoengine==0.10.0       # via -r requirements/edx/base.in
-more-itertools==8.2.0     # via -r requirements/edx/paver.txt, zipp
+more-itertools==8.2.0     # via -r requirements/edx/base.in, -r requirements/edx/paver.txt, zipp
 mpmath==1.1.0             # via sympy
 mysqlclient==1.4.6        # via -r requirements/edx/base.in
 newrelic==5.12.1.141      # via -r requirements/edx/base.in, edx-django-utils