From ac39692a8a34a40b1b9037e793a29caca6450c02 Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Wed, 13 Nov 2019 16:51:49 -0800 Subject: [PATCH] Fix: don't store .children usage keys in definition data cache I needed this change because I found a bug: 1. Create a block with children in a content library 2. Delete that content library 3. Create a new identical block with children in a new content library. 4. If the OLX is identical to the original block, this new block will not load in the LMS. The reason for the bug is that the .children field contains usage keys (which encode the library, for example), but the values were being stored in BlockstoreFieldData which caches really aggressively and caches based on the hash of the OLX. Since the OLX is identical, it assumes the .children values should be identical as well. The fix was to move children to a children-specific field data store, and only store the part of the child data that is encoded by the OLX (the data) in BlockstoreFieldData. This is a better match for the way the caching works and cleaned up a hacky part of the runtime (at least it's slightly less hacky now). --- .../content_libraries/tests/test_runtime.py | 38 +++++- openedx/core/djangoapps/xblock/apps.py | 2 - .../xblock/runtime/blockstore_field_data.py | 109 ++++++++++++++++-- .../xblock/runtime/blockstore_runtime.py | 37 +----- .../core/djangoapps/xblock/runtime/runtime.py | 5 +- 5 files changed, 145 insertions(+), 46 deletions(-) diff --git a/openedx/core/djangoapps/content_libraries/tests/test_runtime.py b/openedx/core/djangoapps/content_libraries/tests/test_runtime.py index 43778856d7..92c742e95d 100644 --- a/openedx/core/djangoapps/content_libraries/tests/test_runtime.py +++ b/openedx/core/djangoapps/content_libraries/tests/test_runtime.py @@ -20,7 +20,7 @@ from openedx.core.djangoapps.content_libraries.tests.base import ( ) from openedx.core.djangoapps.content_libraries.tests.user_state_block import UserStateTestBlock from openedx.core.djangoapps.xblock import api as xblock_api -from openedx.core.djangolib.testing.utils import skip_unless_lms +from openedx.core.djangolib.testing.utils import skip_unless_lms, skip_unless_cms from openedx.core.lib import blockstore_api from student.tests.factories import UserFactory from xmodule.unit_block import UnitBlock @@ -62,13 +62,45 @@ class ContentLibraryRuntimeTest(ContentLibraryContentTestMixin, TestCase): content library. """ + @skip_unless_cms # creating child blocks only works properly in Studio + def test_identical_olx(self): + """ + Test library blocks with children that also have identical OLX. Since + the blockstore runtime caches authored field data based on the hash of + the OLX, this can catch some potential bugs, especially given that the + "children" field stores usage IDs, not definition IDs. + """ + # Create a unit containing a + unit_block_key = library_api.create_library_block(self.library.key, "unit", "u1").usage_key + library_api.create_library_block_child(unit_block_key, "problem", "p1") + library_api.publish_changes(self.library.key) + # Now do the same in a different library: + library2 = library_api.create_library( + collection_uuid=self.collection.uuid, + org=self.organization, + slug="idolx", + title=("Identical OLX Test Lib 2"), + description="", + ) + unit_block2_key = library_api.create_library_block(library2.key, "unit", "u1").usage_key + library_api.create_library_block_child(unit_block2_key, "problem", "p1") + library_api.publish_changes(library2.key) + # Load both blocks: + unit_block = xblock_api.load_block(unit_block_key, self.student_a) + unit_block2 = xblock_api.load_block(unit_block2_key, self.student_a) + self.assertEqual( + library_api.get_library_block_olx(unit_block_key), + library_api.get_library_block_olx(unit_block2_key), + ) + self.assertNotEqual(unit_block.children, unit_block2.children) + def test_has_score(self): """ Test that the LMS-specific 'has_score' attribute is getting added to blocks. """ - unit_block_key = library_api.create_library_block(self.library.key, "unit", "u1").usage_key - problem_block_key = library_api.create_library_block(self.library.key, "problem", "p1").usage_key + unit_block_key = library_api.create_library_block(self.library.key, "unit", "score-unit1").usage_key + problem_block_key = library_api.create_library_block(self.library.key, "problem", "score-prob1").usage_key library_api.publish_changes(self.library.key) unit_block = xblock_api.load_block(unit_block_key, self.student_a) problem_block = xblock_api.load_block(problem_block_key, self.student_a) diff --git a/openedx/core/djangoapps/xblock/apps.py b/openedx/core/djangoapps/xblock/apps.py index 5a4f72be43..04d9e76a00 100644 --- a/openedx/core/djangoapps/xblock/apps.py +++ b/openedx/core/djangoapps/xblock/apps.py @@ -5,10 +5,8 @@ Django app configuration for the XBlock Runtime django app from django.apps import AppConfig, apps from django.conf import settings -from xblock.runtime import DictKeyValueStore, KvsFieldData from openedx.core.djangoapps.site_configuration import helpers as configuration_helpers -from openedx.core.djangoapps.xblock.runtime.blockstore_field_data import BlockstoreFieldData class XBlockAppConfig(AppConfig): diff --git a/openedx/core/djangoapps/xblock/runtime/blockstore_field_data.py b/openedx/core/djangoapps/xblock/runtime/blockstore_field_data.py index 4b35abc687..d849bfcfb9 100644 --- a/openedx/core/djangoapps/xblock/runtime/blockstore_field_data.py +++ b/openedx/core/djangoapps/xblock/runtime/blockstore_field_data.py @@ -10,6 +10,7 @@ from xblock.exceptions import InvalidScopeError, NoSuchDefinition from xblock.fields import Field, BlockScope, Scope, UserScope, Sentinel from xblock.field_data import FieldData +from openedx.core.djangoapps.xblock.learning_context.manager import get_learning_context_impl from openedx.core.djangolib.blockstore_cache import ( get_bundle_version_files_cached, get_bundle_draft_files_cached, @@ -20,6 +21,7 @@ log = logging.getLogger(__name__) ActiveBlock = namedtuple('ActiveBlock', ['olx_hash', 'changed_fields']) DELETED = Sentinel('DELETED') # Special value indicating a field was reset to its default value +CHILDREN_INCLUDES = Sentinel('CHILDREN_INCLUDES') # Key for a pseudo-field that stores the XBlock's children info MAX_DEFINITIONS_LOADED = 100 # How many of the most recently used XBlocks' field data to keep in memory at max. @@ -120,14 +122,13 @@ class BlockstoreFieldData(FieldData): Given a block and the name of one of its fields, check that we will be able to read/write it. """ + if name == CHILDREN_INCLUDES: + return # This is a pseudo-field used in conjunction with BlockstoreChildrenData field = self._getfield(block, name) - if field.scope == Scope.children: - if name != 'children': - raise InvalidScopeError("Expect Scope.children only for field named 'children', not '{}'".format(name)) - elif field.scope == Scope.parent: - # This field data store is focused on definition-level field data, and parent is mostly - # relevant at the usage level. Luckily this doesn't even seem to be used? - raise NotImplementedError("Setting Scope.parent is not supported by BlockstoreFieldData.") + if field.scope in (Scope.children, Scope.parent): + # This field data store is focused on definition-level field data, and children/parent is mostly + # relevant at the usage level. Scope.parent doesn't even seem to be used? + raise NotImplementedError("Setting Scope.children/parent is not supported by BlockstoreFieldData.") else: if field.scope.user != UserScope.NONE: raise InvalidScopeError("BlockstoreFieldData only supports UserScope.NONE fields") @@ -172,7 +173,7 @@ class BlockstoreFieldData(FieldData): try: saved_fields = self.loaded_definitions[entry.olx_hash] except KeyError: - if name == 'children': + if name == CHILDREN_INCLUDES: # Special case: parse_xml calls add_node_as_child which calls 'block.children.append()' # BEFORE parse_xml is done, and .append() needs to read the value of children. So return [] # start with an empty list, it will get filled in. @@ -255,3 +256,95 @@ class BlockstoreFieldData(FieldData): # we have only half as many as MAX_DEFINITIONS_LOADED in memory, if possible. while olx_hashes_safe_to_delete and (len(self.loaded_definitions) > MAX_DEFINITIONS_LOADED / 2): del self.loaded_definitions[olx_hashes_safe_to_delete.pop()] + + +class BlockstoreChildrenData(FieldData): + """ + An XBlock FieldData implementation that reads 'children' data out of + the definition fields in BlockstoreFieldData. + + The children field contains usage keys and so is usage-specific; the + BlockstoreFieldData can only store field data that is not usage-specific. So + we store data about the elements that define the children + in BlockstoreFieldData (since that is not usage-specific), and this field + data implementation loads that data and transforms it + into the usage keys that comprise the standard .children field. + """ + def __init__(self, blockstore_field_data): + """ + Initialize this BlockstoreChildrenData instance. + """ + # The data store that holds Scope.usage and Scope.definition data: + self.authored_data_store = blockstore_field_data + super(BlockstoreChildrenData, self).__init__() + + def _check_field(self, block, name): # pylint: disable=unused-argument + """ + Given a block and the name of one of its fields, check that we will be + able to read/write it. + """ + if name != 'children': + raise InvalidScopeError("BlockstoreChildrenData can only read/write from a field named 'children'") + + def get(self, block, name): + """ + Get the "children' field value. + + We do this by reading the parsed values from + the regular authored data store and then transforming them to usage IDs. + """ + self._check_field(block, name) + children_includes = self.get_includes(block) + if not children_includes: + return [] + # Now the .children field is required to be a list of usage IDs: + learning_context = get_learning_context_impl(block.scope_ids.usage_id) + child_usages = [] + for parsed_include in children_includes: + child_usages.append( + learning_context.usage_for_child_include( + block.scope_ids.usage_id, block.scope_ids.def_id, parsed_include, + ) + ) + return child_usages + + def set(self, block, name, value): + """ + Set the value of the field; requires name='children' + """ + self._check_field(block, name) + children_includes = self.authored_data_store.get(block, CHILDREN_INCLUDES) + if len(value) != len(children_includes): + raise RuntimeError( + "This runtime does not allow changing .children directly - use runtime.add_child_include instead." + ) + # This is a no-op; the value of 'children' is derived from CHILDREN_INCLUDES + # so we never write to the children field directly. All we do is make sure it + # looks like it's still in sync with CHILDREN_INCLUDES + + def get_includes(self, block): + """ + Get the list of elements representing this XBlock's + children. + """ + try: + return self.authored_data_store.get(block, CHILDREN_INCLUDES) + except KeyError: + # KeyError raised by an XBlock field data store means "use the + # default value", and the default value for the children field is an + # empty list. + return [] + + def append_include(self, block, parsed_include): + """ + Append an element to this XBlock's list of children + """ + self.authored_data_store.set(block, CHILDREN_INCLUDES, self.get_includes(block) + [parsed_include]) + + def delete(self, block, name): + """ + Reset the value of the field named `name` to the default + """ + self._check_field(block, name) + self.authored_data_store.set(block, CHILDREN_INCLUDES, []) + self.set(block, name, []) diff --git a/openedx/core/djangoapps/xblock/runtime/blockstore_runtime.py b/openedx/core/djangoapps/xblock/runtime/blockstore_runtime.py index e49b48b7c0..821a2232d9 100644 --- a/openedx/core/djangoapps/xblock/runtime/blockstore_runtime.py +++ b/openedx/core/djangoapps/xblock/runtime/blockstore_runtime.py @@ -85,15 +85,8 @@ class BlockstoreXBlockRuntime(XBlockRuntime): This runtime API should normally be used via runtime.get_block() -> block.parse_xml() -> runtime.add_node_as_child """ - parent_usage = block.scope_ids.usage_id - parent_definition = block.scope_ids.def_id - learning_context = get_learning_context_impl(parent_usage) parsed_include = parse_xblock_include(node) - usage_key = learning_context.usage_for_child_include(parent_usage, parent_definition, parsed_include) - block.children.append(usage_key) - if parent_definition.draft_name: - # Store the data which we'll need later if saving changes to this block - self.child_includes_of(block).append(parsed_include) + self.add_child_include(block, parsed_include) def add_child_include(self, block, parsed_include): """ @@ -103,33 +96,15 @@ class BlockstoreXBlockRuntime(XBlockRuntime): modify block.children to append a usage ID, since that doesn't provide enough information to serialize the block's elements. """ - learning_context = get_learning_context_impl(block.scope_ids.usage_id) - child_usage_key = learning_context.usage_for_child_include( - block.scope_ids.usage_id, block.scope_ids.def_id, parsed_include, - ) - block.children.append(child_usage_key) - self.child_includes_of(block).append(parsed_include) + self.system.children_data_store.append_include(block, parsed_include) + block.children = self.system.children_data_store.get(block, 'children') def child_includes_of(self, block): """ - Get the (mutable) list of directives that define the - children of this block's definition. + Get the list of directives that define the children + of this block's definition. """ - # A hack: when serializing an XBlock, we need to re-create the - # elements that were in its original XML. But doing so requires the usage="..." hint attribute, which is - # technically part of the parent definition but which is not otherwise stored anywhere; we only have the derived - # usage_key, but asking the learning context to transform the usage_key back to the usage="..." hint attribute - # is non-trivial and could lead to bugs, because it could happen differently if the same parent definition is - # used in a library compared to a course (each would have different usage keys for the same usage hint). - # So, if this is a draft XBlock (we are editing it), we store the actual parsed so we can - # re-use them exactly when serializing this block back to XML. - # This implies that changes to an XBlock's children cannot be made by manipulating the .children field and - # then calling save(). - assert block.scope_ids.def_id.draft_name, "Manipulating includes is only relevant for draft XBlocks." - attr_name = "children_includes_{}".format(id(block)) # Force use of this accessor method - if not hasattr(block, attr_name): - setattr(block, attr_name, []) - return getattr(block, attr_name) + return self.system.children_data_store.get_includes(block) def save_block(self, block): """ diff --git a/openedx/core/djangoapps/xblock/runtime/runtime.py b/openedx/core/djangoapps/xblock/runtime/runtime.py index 10186ebc9f..22c0efef93 100644 --- a/openedx/core/djangoapps/xblock/runtime/runtime.py +++ b/openedx/core/djangoapps/xblock/runtime/runtime.py @@ -24,7 +24,7 @@ from web_fragments.fragment import Fragment from lms.djangoapps.courseware.model_data import DjangoKeyValueStore, FieldDataCache from lms.djangoapps.grades.api import signals as grades_signals from openedx.core.djangoapps.xblock.apps import get_xblock_app_config -from openedx.core.djangoapps.xblock.runtime.blockstore_field_data import BlockstoreFieldData +from openedx.core.djangoapps.xblock.runtime.blockstore_field_data import BlockstoreFieldData, BlockstoreChildrenData from openedx.core.djangoapps.xblock.runtime.ephemeral_field_data import EphemeralKeyValueStore from openedx.core.djangoapps.xblock.runtime.mixin import LmsBlockMixin from openedx.core.djangoapps.xblock.utils import get_xblock_id_for_anonymous_user @@ -269,7 +269,7 @@ class XBlockRuntime(RuntimeShim, Runtime): Scope.content: self.system.authored_data_store, Scope.settings: self.system.authored_data_store, Scope.parent: self.system.authored_data_store, - Scope.children: self.system.authored_data_store, + Scope.children: self.system.children_data_store, Scope.user_state_summary: student_data_store, Scope.user_state: student_data_store, Scope.user_info: student_data_store, @@ -395,6 +395,7 @@ class XBlockRuntimeSystem(object): self.id_generator = MemoryIdManager() # We don't really use id_generator until we need to support asides self.runtime_class = runtime_class self.authored_data_store = BlockstoreFieldData() + self.children_data_store = BlockstoreChildrenData(self.authored_data_store) assert student_data_mode in (self.STUDENT_DATA_EPHEMERAL, self.STUDENT_DATA_PERSISTED) self.student_data_mode = student_data_mode self._error_trackers = {}