Merge pull request #14399 from edx/ormsbee/csm_crawler_locking
Disable student state writes for crawlers.
This commit is contained in:
@@ -687,7 +687,7 @@ class FieldDataCache(object):
|
||||
A cache of django model objects needed to supply the data
|
||||
for a module and its descendants
|
||||
"""
|
||||
def __init__(self, descriptors, course_id, user, select_for_update=False, asides=None):
|
||||
def __init__(self, descriptors, course_id, user, asides=None, read_only=False):
|
||||
"""
|
||||
Find any courseware.models objects that are needed by any descriptor
|
||||
in descriptors. Attempts to minimize the number of queries to the database.
|
||||
@@ -698,8 +698,8 @@ class FieldDataCache(object):
|
||||
descriptors: A list of XModuleDescriptors.
|
||||
course_id: The id of the current course
|
||||
user: The user for which to cache data
|
||||
select_for_update: Ignored
|
||||
asides: The list of aside types to load, or None to prefetch no asides.
|
||||
read_only: We should not perform writes (they become a no-op).
|
||||
"""
|
||||
if asides is None:
|
||||
self.asides = []
|
||||
@@ -709,6 +709,7 @@ class FieldDataCache(object):
|
||||
assert isinstance(course_id, CourseKey)
|
||||
self.course_id = course_id
|
||||
self.user = user
|
||||
self.read_only = read_only
|
||||
|
||||
self.cache = {
|
||||
Scope.user_state: UserStateCache(
|
||||
@@ -783,7 +784,7 @@ class FieldDataCache(object):
|
||||
@classmethod
|
||||
def cache_for_descriptor_descendents(cls, course_id, user, descriptor, depth=None,
|
||||
descriptor_filter=lambda descriptor: True,
|
||||
select_for_update=False, asides=None):
|
||||
asides=None, read_only=False):
|
||||
"""
|
||||
course_id: the course in the context of which we want StudentModules.
|
||||
user: the django user for whom to load modules.
|
||||
@@ -792,9 +793,8 @@ class FieldDataCache(object):
|
||||
the supplied descriptor. If depth is None, load all descendant StudentModules
|
||||
descriptor_filter is a function that accepts a descriptor and return whether the field data
|
||||
should be cached
|
||||
select_for_update: Ignored
|
||||
"""
|
||||
cache = FieldDataCache([], course_id, user, select_for_update, asides=asides)
|
||||
cache = FieldDataCache([], course_id, user, asides=asides, read_only=read_only)
|
||||
cache.add_descriptor_descendents(descriptor, depth, descriptor_filter)
|
||||
return cache
|
||||
|
||||
@@ -840,6 +840,8 @@ class FieldDataCache(object):
|
||||
kv_dict (dict): dict mapping from `DjangoKeyValueStore.Key`s to field values
|
||||
Raises: DatabaseError if any fields fail to save
|
||||
"""
|
||||
if self.read_only:
|
||||
return
|
||||
|
||||
saved_fields = []
|
||||
by_scope = defaultdict(dict)
|
||||
@@ -875,6 +877,8 @@ class FieldDataCache(object):
|
||||
|
||||
Raises: KeyError if key isn't found in the cache
|
||||
"""
|
||||
if self.read_only:
|
||||
return
|
||||
|
||||
if key.scope.user == UserScope.ONE and not self.user.is_anonymous():
|
||||
# If we're getting user data, we expect that the key matches the
|
||||
|
||||
@@ -49,6 +49,7 @@ from lms.djangoapps.lms_xblock.models import XBlockAsidesConfig
|
||||
from lms.djangoapps.lms_xblock.runtime import LmsModuleSystem
|
||||
from lms.djangoapps.verify_student.services import VerificationService, ReverificationService
|
||||
from openedx.core.djangoapps.bookmarks.services import BookmarksService
|
||||
from openedx.core.djangoapps.crawlers.models import CrawlersConfig
|
||||
from openedx.core.djangoapps.credit.services import CreditService
|
||||
from openedx.core.djangoapps.util.user_utils import SystemUser
|
||||
from openedx.core.lib.xblock_utils import (
|
||||
@@ -917,7 +918,8 @@ def get_module_by_usage_id(request, course_id, usage_id, disable_staff_debug_inf
|
||||
field_data_cache = FieldDataCache.cache_for_descriptor_descendents(
|
||||
course_id,
|
||||
user,
|
||||
descriptor
|
||||
descriptor,
|
||||
read_only=CrawlersConfig.is_crawler(request),
|
||||
)
|
||||
instance = get_module_for_descriptor(
|
||||
user,
|
||||
|
||||
@@ -50,6 +50,7 @@ from lms.djangoapps.commerce.utils import EcommerceService # pylint: disable=im
|
||||
from milestones.tests.utils import MilestonesTestCaseMixin
|
||||
from openedx.core.djangoapps.self_paced.models import SelfPacedConfiguration
|
||||
from openedx.core.lib.gating import api as gating_api
|
||||
from openedx.core.djangoapps.crawlers.models import CrawlersConfig
|
||||
from student.models import CourseEnrollment
|
||||
from student.tests.factories import AdminFactory, UserFactory, CourseEnrollmentFactory
|
||||
from util.tests.test_date_utils import fake_ugettext, fake_pgettext
|
||||
@@ -58,7 +59,7 @@ from util.views import ensure_valid_course_key
|
||||
from xmodule.modulestore import ModuleStoreEnum
|
||||
from xmodule.modulestore.django import modulestore
|
||||
from xmodule.modulestore.tests.django_utils import TEST_DATA_MIXED_MODULESTORE
|
||||
from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
|
||||
from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase, SharedModuleStoreTestCase
|
||||
from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory, check_mongo_calls
|
||||
from openedx.core.djangoapps.credit.api import set_credit_requirements
|
||||
from openedx.core.djangoapps.credit.models import CreditCourse, CreditProvider
|
||||
@@ -2065,3 +2066,83 @@ class TestRenderXBlockSelfPaced(TestRenderXBlock):
|
||||
|
||||
def course_options(self):
|
||||
return {'self_paced': True}
|
||||
|
||||
|
||||
class TestIndexViewCrawlerStudentStateWrites(SharedModuleStoreTestCase):
|
||||
"""
|
||||
Ensure that courseware index requests do not trigger student state writes.
|
||||
|
||||
This is to prevent locking issues that have caused latency spikes in the
|
||||
courseware_studentmodule table when concurrent requests each try to update
|
||||
the same rows for sequence, section, and course positions.
|
||||
"""
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
"""Set up the simplest course possible."""
|
||||
# setUpClassAndTestData() already calls setUpClass on SharedModuleStoreTestCase
|
||||
# pylint: disable=super-method-not-called
|
||||
with super(TestIndexViewCrawlerStudentStateWrites, cls).setUpClassAndTestData():
|
||||
cls.course = CourseFactory.create()
|
||||
with cls.store.bulk_operations(cls.course.id):
|
||||
cls.chapter = ItemFactory.create(category='chapter', parent_location=cls.course.location)
|
||||
cls.section = ItemFactory.create(category='sequential', parent_location=cls.chapter.location)
|
||||
cls.vertical = ItemFactory.create(category='vertical', parent_location=cls.section.location)
|
||||
|
||||
@classmethod
|
||||
def setUpTestData(cls):
|
||||
"""Set up and enroll our fake user in the course."""
|
||||
cls.password = 'test'
|
||||
cls.user = UserFactory(password=cls.password)
|
||||
CourseEnrollment.enroll(cls.user, cls.course.id)
|
||||
|
||||
def setUp(self):
|
||||
"""Do the client login."""
|
||||
super(TestIndexViewCrawlerStudentStateWrites, self).setUp()
|
||||
self.client.login(username=self.user.username, password=self.password)
|
||||
|
||||
def test_write_by_default(self):
|
||||
"""By default, always write student state, regardless of user agent."""
|
||||
with patch('courseware.model_data.UserStateCache.set_many') as patched_state_client_set_many:
|
||||
# Simulate someone using Chrome
|
||||
self._load_courseware('Mozilla/5.0 AppleWebKit/537.36')
|
||||
self.assertTrue(patched_state_client_set_many.called)
|
||||
patched_state_client_set_many.reset_mock()
|
||||
|
||||
# Common crawler user agent
|
||||
self._load_courseware('edX-downloader/0.1')
|
||||
self.assertTrue(patched_state_client_set_many.called)
|
||||
|
||||
def test_writes_with_config(self):
|
||||
"""Test state writes (or lack thereof) based on config values."""
|
||||
CrawlersConfig.objects.create(known_user_agents='edX-downloader,crawler_foo', enabled=True)
|
||||
with patch('courseware.model_data.UserStateCache.set_many') as patched_state_client_set_many:
|
||||
# Exact matching of crawler user agent
|
||||
self._load_courseware('crawler_foo')
|
||||
self.assertFalse(patched_state_client_set_many.called)
|
||||
|
||||
# Partial matching of crawler user agent
|
||||
self._load_courseware('edX-downloader/0.1')
|
||||
self.assertFalse(patched_state_client_set_many.called)
|
||||
|
||||
# Simulate an actual browser hitting it (we should write)
|
||||
self._load_courseware('Mozilla/5.0 AppleWebKit/537.36')
|
||||
self.assertTrue(patched_state_client_set_many.called)
|
||||
|
||||
# Disabling the crawlers config should revert us to default behavior
|
||||
CrawlersConfig.objects.create(enabled=False)
|
||||
self.test_write_by_default()
|
||||
|
||||
def _load_courseware(self, user_agent):
|
||||
"""Helper to load the actual courseware page."""
|
||||
url = reverse(
|
||||
'courseware_section',
|
||||
kwargs={
|
||||
'course_id': unicode(self.course.id),
|
||||
'chapter': unicode(self.chapter.location.name),
|
||||
'section': unicode(self.section.location.name),
|
||||
}
|
||||
)
|
||||
response = self.client.get(url, HTTP_USER_AGENT=user_agent)
|
||||
# Make sure we get back an actual 200, and aren't redirected because we
|
||||
# messed up the setup somehow (e.g. didn't enroll properly)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
@@ -26,6 +26,7 @@ from xblock.fragment import Fragment
|
||||
from opaque_keys.edx.keys import CourseKey
|
||||
from openedx.core.djangoapps.lang_pref import LANGUAGE_KEY
|
||||
from openedx.core.djangoapps.user_api.preferences.api import get_user_preference
|
||||
from openedx.core.djangoapps.crawlers.models import CrawlersConfig
|
||||
from shoppingcart.models import CourseRegistrationCode
|
||||
from student.models import CourseEnrollment
|
||||
from student.views import is_course_blocked
|
||||
@@ -101,7 +102,7 @@ class CoursewareIndex(View):
|
||||
self.course = get_course_with_access(request.user, 'load', self.course_key, depth=CONTENT_DEPTH)
|
||||
self.is_staff = has_access(request.user, 'staff', self.course)
|
||||
self._setup_masquerade_for_effective_user()
|
||||
return self._get()
|
||||
return self._get(request)
|
||||
except Redirect as redirect_error:
|
||||
return redirect(redirect_error.url)
|
||||
except UnicodeEncodeError:
|
||||
@@ -127,12 +128,12 @@ class CoursewareIndex(View):
|
||||
# Set the user in the request to the effective user.
|
||||
self.request.user = self.effective_user
|
||||
|
||||
def _get(self):
|
||||
def _get(self, request):
|
||||
"""
|
||||
Render the index page.
|
||||
"""
|
||||
self._redirect_if_needed_to_access_course()
|
||||
self._prefetch_and_bind_course()
|
||||
self._prefetch_and_bind_course(request)
|
||||
|
||||
if self.course.has_children_at_depth(CONTENT_DEPTH):
|
||||
self._reset_section_to_exam_if_required()
|
||||
@@ -324,13 +325,17 @@ class CoursewareIndex(View):
|
||||
if self.chapter:
|
||||
return self._find_block(self.chapter, self.section_url_name, 'section')
|
||||
|
||||
def _prefetch_and_bind_course(self):
|
||||
def _prefetch_and_bind_course(self, request):
|
||||
"""
|
||||
Prefetches all descendant data for the requested section and
|
||||
sets up the runtime, which binds the request user to the section.
|
||||
"""
|
||||
self.field_data_cache = FieldDataCache.cache_for_descriptor_descendents(
|
||||
self.course_key, self.effective_user, self.course, depth=CONTENT_DEPTH,
|
||||
self.course_key,
|
||||
self.effective_user,
|
||||
self.course,
|
||||
depth=CONTENT_DEPTH,
|
||||
read_only=CrawlersConfig.is_crawler(request),
|
||||
)
|
||||
|
||||
self.course = get_module_for_descriptor(
|
||||
|
||||
@@ -2160,6 +2160,9 @@ INSTALLED_APPS = (
|
||||
# Customized celery tasks, including persisting failed tasks so they can
|
||||
# be retried
|
||||
'openedx.core.djangoapps.celery_utils',
|
||||
|
||||
# Ability to detect and special-case crawler behavior
|
||||
'openedx.core.djangoapps.crawlers',
|
||||
)
|
||||
|
||||
# Migrations which are not in the standard module "migrations"
|
||||
|
||||
0
openedx/core/djangoapps/crawlers/__init__.py
Normal file
0
openedx/core/djangoapps/crawlers/__init__.py
Normal file
7
openedx/core/djangoapps/crawlers/admin.py
Normal file
7
openedx/core/djangoapps/crawlers/admin.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""Admin panel for configuring which user agents we consider to be Crawlers."""
|
||||
from django.contrib import admin
|
||||
|
||||
from config_models.admin import ConfigurationModelAdmin
|
||||
from .models import CrawlersConfig
|
||||
|
||||
admin.site.register(CrawlersConfig, ConfigurationModelAdmin)
|
||||
30
openedx/core/djangoapps/crawlers/migrations/0001_initial.py
Normal file
30
openedx/core/djangoapps/crawlers/migrations/0001_initial.py
Normal file
@@ -0,0 +1,30 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='CrawlersConfig',
|
||||
fields=[
|
||||
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
|
||||
('change_date', models.DateTimeField(auto_now_add=True, verbose_name='Change date')),
|
||||
('enabled', models.BooleanField(default=False, verbose_name='Enabled')),
|
||||
('known_user_agents', models.TextField(default=b'edX-downloader', help_text=b'A comma-separated list of known crawler user agents.', blank=True)),
|
||||
('changed_by', models.ForeignKey(on_delete=django.db.models.deletion.PROTECT, editable=False, to=settings.AUTH_USER_MODEL, null=True, verbose_name='Changed by')),
|
||||
],
|
||||
options={
|
||||
'ordering': ('-change_date',),
|
||||
'abstract': False,
|
||||
},
|
||||
),
|
||||
]
|
||||
45
openedx/core/djangoapps/crawlers/models.py
Normal file
45
openedx/core/djangoapps/crawlers/models.py
Normal file
@@ -0,0 +1,45 @@
|
||||
"""
|
||||
This module handles the detection of crawlers, so that we can handle them
|
||||
appropriately in other parts of the code.
|
||||
"""
|
||||
from django.db import models
|
||||
|
||||
from config_models.models import ConfigurationModel
|
||||
|
||||
|
||||
class CrawlersConfig(ConfigurationModel):
|
||||
"""Configuration for the crawlers django app."""
|
||||
known_user_agents = models.TextField(
|
||||
blank=True,
|
||||
help_text="A comma-separated list of known crawler user agents.",
|
||||
default='edX-downloader',
|
||||
)
|
||||
|
||||
def __unicode__(self):
|
||||
return u'CrawlersConfig("{}")'.format(self.known_user_agents)
|
||||
|
||||
@classmethod
|
||||
def is_crawler(cls, request):
|
||||
"""Determine if the request came from a crawler or not.
|
||||
|
||||
This method is simplistic and only looks at the user agent header at the
|
||||
moment, but could later be improved to be smarter about detection.
|
||||
"""
|
||||
current = cls.current()
|
||||
if not current.enabled:
|
||||
return False
|
||||
|
||||
req_user_agent = request.META.get('HTTP_USER_AGENT')
|
||||
crawler_agents = current.known_user_agents.split(",")
|
||||
|
||||
# If there was no user agent detected or no crawler agents configured,
|
||||
# then just return False.
|
||||
if (not req_user_agent) or (not crawler_agents):
|
||||
return False
|
||||
|
||||
# We perform prefix matching of the crawler agent here so that we don't
|
||||
# have to worry about version bumps.
|
||||
return any(
|
||||
req_user_agent.startswith(crawler_agent)
|
||||
for crawler_agent in crawler_agents
|
||||
)
|
||||
0
openedx/core/djangoapps/crawlers/tests.py
Normal file
0
openedx/core/djangoapps/crawlers/tests.py
Normal file
Reference in New Issue
Block a user