diff --git a/openedx/core/djangoapps/content/course_overviews/management/commands/simulate_publish.py b/openedx/core/djangoapps/content/course_overviews/management/commands/simulate_publish.py new file mode 100644 index 0000000000..cd2dd76639 --- /dev/null +++ b/openedx/core/djangoapps/content/course_overviews/management/commands/simulate_publish.py @@ -0,0 +1,308 @@ +""" +Many apps in the LMS maintain their own optimized data structures that they +update whenever a course publish is detected. To do this, they listen for the +SignalHandler.course_published signal. Sometimes we want to rebuild the data on +these apps regardless of an actual change in course content, either to recover +from a bug or to bootstrap a new app we're rolling out for the first time. To +date, each app has implemented its own management command for this kind of +bootstrapping work (e.g. generate_course_overviews, generate_course_blocks). + +This management command will emit the SignalHandler.course_published signal for +some subset of courses and signal listeners, and then rely on existing listener +behavior to trigger the necessary data updates. +""" +from __future__ import print_function +import copy +import logging +import os +import textwrap +import time +import sys + +from django.core.management.base import BaseCommand +from opaque_keys import InvalidKeyError +from opaque_keys.edx.keys import CourseKey + +from lms.djangoapps.ccx.tasks import course_published_handler as ccx_receiver_fn +from xmodule.modulestore.django import modulestore, SignalHandler + + +log = logging.getLogger('simulate_publish') + + +class Command(BaseCommand): + """ + Example usage: + + # Send the course_published signal to all listeners and courses with 10 + # seconds between courses. We might use a delay like this to make sure we + # don't flood the queue and unnecessarily delay normal publishing via + # Studio. + $ ./manage.py lms --settings=devstack_docker simulate_publish --delay 10 + + # Find all available listeners + $ ./manage.py lms --settings=devstack_docker simulate_publish --show_listeners + + # Send the publish signal to two courses and two listeners + $ ./manage.py lms --settings=devstack_docker simulate_publish --listeners \ + openedx.core.djangoapps.content.course_overviews.signals._listen_for_course_publish \ + openedx.core.djangoapps.bookmarks.signals.trigger_update_xblocks_cache_task \ + --courses course-v1:edX+DemoX+Demo_Course edX/MODULESTORE_100/2018 + + A Dry Run will produce output that looks like: + + DRY-RUN: This command would have sent course_published to... + 1 Receivers: + openedx.core.djangoapps.content.course_overviews.signals._listen_for_course_publish + 27 Courses: + course-v1:DEV_153+A2E_CHINESE+JAN2018 + course-v1:edX+100+MITPhysics + course-v1:edX+DemoX+Demo_Course + course-v1:edX+E2E-101+course + course-v1:edX+MEMORY+2018 + course-v1:edX+MK101+2018 + edX/MODULESTORE_100/2018_1 + edX/MODULESTORE_100/2018_2 + edX/MODULESTORE_100/2018_3 + edX/MODULESTORE_100/2018_4 + (+ 17 more) + """ + help = ( + u"Simulate course publish signals without actually modifying course " + u"content. This command is useful for triggering various async tasks " + u"that listen for course_published signals." + ) + + # Having this be a class attribute makes it easier to substitute during + # tests, and thereby avoid global side-effects that will mysteriously fail + # tests that need signal handling later on. + course_published_signal = copy.copy(SignalHandler.course_published) + + def add_arguments(self, parser): + # pylint: disable=expression-not-assigned + parser.add_argument( + '--show-receivers', + dest='show_receivers', + action='store_true', + help=(u'Display the list of possible receiver functions and exit.') + ), + parser.add_argument( + '--dry-run', + dest='dry_run', + action='store_true', + help=( + u"Just show a preview of what would happen. This may make an " + u"expensive modulestore query to find courses, but it will " + u"not emit any signals." + ) + ), + parser.add_argument( + '--receivers', + dest='receivers', + action='store', + nargs='+', + help=( + u'Send course_published to specific receivers. If this flag is ' + u'not present, course_published will be sent to all receivers. ' + u'The CCX receiver is always included unless --skip-ccx is ' + u'explicitly passed (otherwise CCX courses would never get ' + u'called for any signal).' + ) + ) + parser.add_argument( + '--courses', + dest='courses', + action='store', + nargs='+', + help=( + u'Send course_published for specific courses. If this flag is ' + u'not present, course_published will be sent to all courses.' + ) + ) + parser.add_argument( + '--delay', + dest='delay', + action='store', + type=int, + default=0, + help=( + u"Number of seconds to sleep between emitting course_published " + u"signals, so that we don't flood our queues." + ) + ) + parser.add_argument( + '--force-lms', + dest='force_lms', + action='store_true', + help=( + u"This command should be run under cms (Studio), not LMS. " + u"Regular publishes happen via Studio, and this script will " + u"exit with an error if you attempt to run it in an LMS " + u"process. However, if you know what you're doing and need to " + u"override that behavior, use this flag." + ) + ), + parser.add_argument( + '--skip-ccx', + dest='skip_ccx', + action='store_true', + help=( + u"CCX receivers are special echoing receivers that relay " + u"the course_published signal to all CCX courses derived from " + u"a modulestore-stored course. That means we almost always " + u"want to emit to them (even when using --receivers), or none " + u"of our signals will reach any CCX derived courses. However, " + u"if you know what you're doing, you can disable this behavior " + u"with this flag, so that CCX receivers are omitted." + ) + ), + + def handle(self, *args, **options): + if options['show_receivers']: + return self.print_show_receivers() + + log.info( + "simulate_publish starting, dry-run=%s, delay=%d seconds", + options['dry_run'], + options['delay'] + ) + + if os.environ.get('SERVICE_VARIANT', 'cms').startswith('lms'): + if options['force_lms']: + log.info("Forcing simulate_publish to run in LMS process.") + else: + log.fatal( + "simulate_publish should be run as a CMS (Studio) " + + "command, not %s (override with --force-lms).", + os.environ.get('SERVICE_VARIANT') + ) + sys.exit(1) + + if options['receivers']: + self.modify_receivers(options['receivers'], options['skip_ccx']) + elif options['skip_ccx']: + log.info("Disconnecting CCX handler (--skip-ccx is True)") + self.course_published_signal.disconnect(ccx_receiver_fn) + + course_keys = self.get_course_keys(options['courses']) + + if options['dry_run']: + return self.print_dry_run(course_keys) + + # Now that our signal receivers and courses are set up properly, do the + # actual work of emitting signals. + for i, course_key in enumerate(course_keys, start=1): + log.info( + "Emitting course_published signal (%d of %d) for course %s", + i, len(course_keys), course_key + ) + if options['delay']: + time.sleep(options['delay']) + self.course_published_signal.send_robust(sender=self, course_key=course_key) + + def modify_receivers(self, receiver_names, skip_ccx): + """ + Modify our signal to only have the user-specified receivers. + + This method modifies the process global SignalHandler.course_published + to disconnect any receivers that were not in the `receiver_names` list. + If any of the receiver_names is not found (i.e. is not in the list of + receivers printed in self.print_show_receivers), it is a fatal error and + we will exit the process. + """ + all_receiver_names = get_receiver_names() + unknown_receiver_names = set(receiver_names) - all_receiver_names + if unknown_receiver_names: + log.fatal( + "The following receivers were specified but not recognized: %s", + ", ".join(sorted(unknown_receiver_names)) + ) + log.fatal("Known receivers: %s", ", ".join(sorted(all_receiver_names))) + sys.exit(1) + log.info("%d receivers specified: %s", len(receiver_names), ", ".join(receiver_names)) + receiver_names_set = set(receiver_names) + for receiver_fn in get_receiver_fns(): + if receiver_fn == ccx_receiver_fn and not skip_ccx: + continue + fn_name = name_from_fn(receiver_fn) + if fn_name not in receiver_names_set: + log.info("Disconnecting %s", fn_name) + self.course_published_signal.disconnect(receiver_fn) + + def get_course_keys(self, courses): + """ + Return a list of CourseKeys that we will emit signals to. + + `courses` is an optional list of strings that can be parsed into + CourseKeys. If `courses` is empty or None, we will default to returning + all courses in the modulestore (which can be very expensive). If one of + the strings passed in the list for `courses` does not parse correctly, + it is a fatal error and will cause us to exit the entire process. + """ + # Use specific courses if specified, but fall back to all courses. + course_keys = [] + if courses: + log.info("%d courses specified: %s", len(courses), ", ".join(courses)) + for course_id in courses: + try: + course_keys.append(CourseKey.from_string(course_id)) + except InvalidKeyError: + log.fatal("%s is not a parseable CourseKey", course_id) + sys.exit(1) + else: + log.info("No courses specified, reading all courses from modulestore...") + course_keys = sorted( + (course.id for course in modulestore().get_course_summaries()), + key=unicode # Different types of CourseKeys can't be compared without this. + ) + log.info("%d courses read from modulestore.", len(course_keys)) + + return course_keys + + def print_show_receivers(self): + """Print receivers with accompanying docstrings for context.""" + receivers = {name_from_fn(fn): fn for fn in get_receiver_fns()} + print(len(receivers), "receivers found:") + for receiver_name, receiver_fn in sorted(receivers.items()): + print(" ", receiver_name) + docstring = textwrap.dedent(receiver_fn.__doc__ or "[No docstring]").strip() + for line in docstring.split('\n'): + print(" ", line) + + def print_dry_run(self, course_keys): + """Give a preview of what courses and signals we will emit to.""" + print("DRY-RUN: This command would have sent course_published to...") + dry_run_reveiver_names = sorted(get_receiver_names()) + print(len(dry_run_reveiver_names), "Receivers:") + for name in dry_run_reveiver_names: + if name == name_from_fn(ccx_receiver_fn): + print(" ", name, "(automatically added, use --skip-ccx to omit)") + else: + print(" ", name) + COURSES_TO_SHOW = 10 + print(len(course_keys), "Courses:") + for course_key in course_keys[:COURSES_TO_SHOW]: + print(" ", course_key) + if len(course_keys) > COURSES_TO_SHOW: + print(" (+ {} more)".format(len(course_keys) - COURSES_TO_SHOW)) + + +def get_receiver_names(): + """Return an unordered set of receiver names (full.module.path.function)""" + return set( + name_from_fn(fn_ref()) + for _, fn_ref in Command.course_published_signal.receivers + ) + + +def get_receiver_fns(): + """Return the list of active receiver functions.""" + return [ + fn_ref() # fn_ref is a weakref to a function, fn_ref() gives us the function + for _, fn_ref in Command.course_published_signal.receivers + ] + + +def name_from_fn(fn): + """Human readable module.function name.""" + return u"{}.{}".format(fn.__module__, fn.__name__) diff --git a/openedx/core/djangoapps/content/course_overviews/management/commands/tests/test_simulate_publish.py b/openedx/core/djangoapps/content/course_overviews/management/commands/tests/test_simulate_publish.py new file mode 100644 index 0000000000..459db8ef56 --- /dev/null +++ b/openedx/core/djangoapps/content/course_overviews/management/commands/tests/test_simulate_publish.py @@ -0,0 +1,147 @@ +""" +Tests the simulate_publish management command. +""" +from openedx.core.djangoapps.content.course_overviews.management.commands.simulate_publish import ( + Command, name_from_fn +) +from openedx.core.djangoapps.content.course_overviews.models import CourseOverview +import openedx.core.djangoapps.content.course_overviews.signals +import lms.djangoapps.ccx.tasks +from xmodule.modulestore import ModuleStoreEnum +from xmodule.modulestore.django import SwitchedSignal +from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase +from xmodule.modulestore.tests.factories import CourseFactory + + +class TestSimulatePublish(SharedModuleStoreTestCase): + """Test simulate_publish, our fake course-publish signal command.""" + + @classmethod + def setUpClass(cls): + """ + Create courses in modulestore. + + Modulestore signals are suppressed by ModuleStoreIsolationMixin, so this + method should not trigger things like CourseOverview creation. + """ + super(TestSimulatePublish, cls).setUpClass() + cls.command = Command() + # org.0/course_0/Run_0 + cls.course_key_1 = CourseFactory.create(default_store=ModuleStoreEnum.Type.mongo).id + # course-v1:org.1+course_1+Run_1 + cls.course_key_2 = CourseFactory.create(default_store=ModuleStoreEnum.Type.split).id + # course-v1:org.2+course_2+Run_2 + cls.course_key_3 = CourseFactory.create(default_store=ModuleStoreEnum.Type.split).id + + def setUp(self): + """ + Most of this is isolating and re-initializing our signal handler. It + might look like you can move this to setUpClass, but be very careful if + doing so, to make sure side-effects don't leak out between tests. + """ + super(TestSimulatePublish, self).setUp() + + # Instead of using the process global SignalHandler.course_published, we + # create our own SwitchedSignal to manually send to. + Command.course_published_signal = SwitchedSignal('test_course_publish') + + # Course Overviews Handler + # pylint: disable=protected-access + Command.course_published_signal.connect( + openedx.core.djangoapps.content.course_overviews.signals._listen_for_course_publish + ) + # CCX Handler + Command.course_published_signal.connect( + lms.djangoapps.ccx.tasks.course_published_handler + ) + Command.course_published_signal.connect(self.sample_receiver_1) + Command.course_published_signal.connect(self.sample_receiver_2) + + self.received_1 = [] + self.received_2 = [] + + def tearDown(self): + """Cleap up our signals.""" + # pylint: disable=protected-access + Command.course_published_signal.disconnect( + openedx.core.djangoapps.content.course_overviews.signals._listen_for_course_publish + ) + Command.course_published_signal.disconnect( + lms.djangoapps.ccx.tasks.course_published_handler + ) + Command.course_published_signal.disconnect(self.sample_receiver_1) + Command.course_published_signal.disconnect(self.sample_receiver_2) + super(TestSimulatePublish, self).tearDown() + + def options(self, **kwargs): + """ + Return an options dict that can be passed to self.command.handle() + + Passed in **kwargs will override existing defaults. Most defaults are + the same as they are for running the management command manually (e.g. + dry_run is False, show_receivers is False), except that the list of + receivers is by default limited to the two that exist in this test + class. We do this to keep these tests faster and more self contained. + """ + default_receivers = [ + name_from_fn(self.sample_receiver_1), + name_from_fn(self.sample_receiver_2), + ] + default_options = dict( + show_receivers=False, + dry_run=False, + receivers=default_receivers, + courses=None, + delay=0, + force_lms=False, + skip_ccx=False, + ) + default_options.update(kwargs) + return default_options + + def test_specific_courses(self): + """Test sending only to specific courses.""" + self.command.handle( + **self.options( + courses=[unicode(self.course_key_1), unicode(self.course_key_2)] + ) + ) + self.assertIn(self.course_key_1, self.received_1) + self.assertIn(self.course_key_2, self.received_1) + self.assertNotIn(self.course_key_3, self.received_1) + self.assertEqual(self.received_1, self.received_2) + + def test_specific_receivers(self): + """Test sending only to specific receivers.""" + self.command.handle( + **self.options( + receivers=[name_from_fn(self.sample_receiver_1)] + ) + ) + self.assertIn(self.course_key_1, self.received_1) + self.assertIn(self.course_key_2, self.received_1) + self.assertIn(self.course_key_3, self.received_1) + self.assertEqual(self.received_2, []) + + def test_course_overviews(self): + """Integration test with CourseOverviews.""" + self.assertEqual(CourseOverview.objects.all().count(), 0) + # pylint: disable=protected-access + self.command.handle( + **self.options( + receivers=[ + name_from_fn(openedx.core.djangoapps.content.course_overviews.signals._listen_for_course_publish) + ] + ) + ) + self.assertEqual(CourseOverview.objects.all().count(), 3) + self.assertEqual(self.received_1, []) + self.assertEqual(self.received_2, []) + + def sample_receiver_1(self, sender, course_key, **kwargs): # pylint: disable=unused-argument + """Custom receiver for testing.""" + self.received_1.append(course_key) + + def sample_receiver_2(self, sender, course_key, **kwargs): # pylint: disable=unused-argument + """Custom receiver for testing.""" + self.received_2.append(course_key)