Merge pull request #32273 from open-craft/field-data-refactor

Make Split Mongo's CachedDescriptorSystem load field-data service on demand
This commit is contained in:
Jeremy Ristau
2023-06-13 08:05:49 -04:00
committed by GitHub
9 changed files with 233 additions and 133 deletions

View File

@@ -487,11 +487,10 @@ class BlockRenderTestCase(SharedModuleStoreTestCase, LoginEnrollmentTestCase):
self.mock_user, request, block, field_data_cache, course.id, course=course
)
# check that _unwrapped_field_data is the same as the original
# check that block.runtime.service(block, 'field-data-unbound') is the same as the original
# _field_data, but now _field_data as been reset.
# pylint: disable=protected-access
assert block._unwrapped_field_data is original_field_data # lint-amnesty, pylint: disable=no-member
assert block._unwrapped_field_data is not block._field_data # lint-amnesty, pylint: disable=no-member
assert block.runtime.service(block, 'field-data-unbound') is original_field_data
assert block.runtime.service(block, 'field-data-unbound') is not block._field_data # pylint: disable=protected-access, line-too-long
# now bind this block to a few other students
for user in [UserFactory(), UserFactory(), self.mock_user]:
@@ -513,7 +512,8 @@ class BlockRenderTestCase(SharedModuleStoreTestCase, LoginEnrollmentTestCase):
# the OverrideFieldData should point to the date FieldData
assert isinstance(block._field_data._authored_data._source.fallback, DateLookupFieldData) # lint-amnesty, pylint: disable=no-member, line-too-long
assert block._field_data._authored_data._source.fallback._defaults is block._unwrapped_field_data # lint-amnesty, pylint: disable=no-member, line-too-long
assert block._field_data._authored_data._source.fallback._defaults \
is block.runtime.service(block, 'field-data-unbound')
def test_hash_resource(self):
"""

View File

@@ -52,11 +52,9 @@ class TestGatedContent(MilestonesTestCaseMixin, SharedModuleStoreTestCase):
org='edX',
number='EDX101',
run='EDX101_RUN1',
display_name='edX 101'
)
with modulestore().bulk_operations(course.id):
course.enable_subsection_gating = True
grading_policy = {
display_name='edX 101',
enable_subsection_gating=True,
grading_policy={
"GRADER": [{
"type": "Homework",
"min_count": 3,
@@ -64,10 +62,9 @@ class TestGatedContent(MilestonesTestCaseMixin, SharedModuleStoreTestCase):
"short_label": "HW",
"weight": 1.0
}]
}
course.grading_policy = grading_policy
course.save()
},
)
with modulestore().bulk_operations(course.id):
# create chapter
cls.chapter1 = BlockFactory.create(
parent=course,

View File

@@ -3,11 +3,13 @@ Support for inheritance of fields down an XBlock hierarchy.
"""
import warnings
from django.utils import timezone
from xblock.core import XBlockMixin
from xblock.fields import Boolean, Dict, Float, Integer, List, Scope, String
from xblock.runtime import KeyValueStore, KvsFieldData
from xmodule.error_block import ErrorBlock
from xmodule.fields import Date, Timedelta
from xmodule.partitions.partitions import UserPartition
@@ -280,7 +282,10 @@ def compute_inherited_metadata(block):
NOTE: This means that there is no such thing as lazy loading at the
moment--this accesses all the children."""
if block.has_children:
parent_metadata = block.xblock_kvs.inherited_settings.copy()
if isinstance(block.xblock_kvs, InheritanceKeyValueStore):
parent_metadata = block.xblock_kvs.inherited_settings.copy()
else:
parent_metadata = {}
# add any of block's explicitly set fields to the inheriting list
for field in InheritanceMixin.fields.values(): # lint-amnesty, pylint: disable=no-member
if field.is_set_on(block):
@@ -301,10 +306,23 @@ def inherit_metadata(block, inherited_data):
`inherited_data`: A dictionary mapping field names to the values that
they should inherit
"""
try:
if isinstance(block, ErrorBlock):
return
block_type = block.scope_ids.block_type
if isinstance(block.xblock_kvs, InheritanceKeyValueStore):
# This XBlock's field_data is backed by InheritanceKeyValueStore, which supports pre-computed inherited fields
block.xblock_kvs.inherited_settings = inherited_data
except AttributeError: # the kvs doesn't have inherited_settings probably b/c it's an error block
pass
else:
# We cannot apply pre-computed field data to this XBlock during import, but inheritance should still work
# normally when it's used in Studio/LMS, which use a different runtime.
# Though if someone ever needs a hacky temporary fix here, it's possible here to force it with:
# init_dict = {key: getattr(block, key) for key in block.fields.keys()}
# block._field_data = InheritanceKeyValueStore(init_dict)
warnings.warn(
f'Cannot inherit metadata to {block_type} block with KVS {block.xblock_kvs}',
stacklevel=2,
)
def own_metadata(block):
@@ -316,7 +334,17 @@ def own_metadata(block):
class InheritingFieldData(KvsFieldData):
"""A `FieldData` implementation that can inherit value from parents to children."""
"""
A `FieldData` implementation that can inherit value from parents to children.
This wraps a KeyValueStore, and will work fine with any KVS implementation.
Sometimes this wraps a subclass of InheritanceKeyValueStore, but that's not
a requirement.
This class is the way that inheritance "normally" works in modulestore.
During XML import/export, however, a different mechanism is used:
InheritanceKeyValueStore.
"""
def __init__(self, inheritable_names, **kwargs):
"""
@@ -379,6 +407,15 @@ class InheritanceKeyValueStore(KeyValueStore):
dict-based storage of fields and lookup of inherited values.
Note: inherited_settings is a dict of key to json values (internal xblock field repr)
Using this KVS is an alternative to using InheritingFieldData(). That one works with any KVS, like
DictKeyValueStore, and doesn't require any special behavior. On the other hand, this InheritanceKeyValueStore only
does inheritance properly if you first use compute_inherited_metadata() to walk the tree of XBlocks and pre-compute
the inherited metadata for the whole tree, storing it in the inherited_settings field of each instance of this KVS.
🟥 Warning: Unlike the base class, this KVS makes the assumption that you're using a completely separate KVS
instance for every XBlock, so that we only have to look at the "field_name" part of the key. You cannot use this
as a drop-in replacement for DictKeyValueStore for this reason.
"""
def __init__(self, initial_values=None, inherited_settings=None):
super().__init__()

View File

@@ -170,6 +170,12 @@ class CachingDescriptorSystem(MakoDescriptorSystem, EditInfoRuntimeMixin): # li
A system that has a cache of block json that it will use to load blocks
from, with a backup of calling to the underlying modulestore for more data
"""
# This CachingDescriptorSystem runtime sets block._field_data on each block via construct_xblock_from_class(),
# rather than the newer approach of providing a "field-data" service via runtime.service(). As a result, during
# bind_for_student() we can't just set ._bound_field_data; we must overwrite block._field_data.
uses_deprecated_field_data = True
def __repr__(self):
return "CachingDescriptorSystem{!r}".format((
self.modulestore,
@@ -303,6 +309,18 @@ class CachingDescriptorSystem(MakoDescriptorSystem, EditInfoRuntimeMixin): # li
error_msg=exc_info_to_str(sys.exc_info())
)
def service(self, block, service_name):
"""
Return a service, or None.
Services are objects implementing arbitrary other interfaces.
"""
# A very minimal shim for compatibility with the new API for how we access field data in split mongo:
if service_name == 'field-data-unbound':
return block._field_data # pylint: disable=protected-access
elif service_name == 'field-data':
return block._bound_field_data if hasattr(block, "_bound_field_data") else block._field_data
return super().service(block, service_name)
def _convert_reference_to_key(self, ref_string):
"""
Convert a single serialized UsageKey string in a ReferenceField into a UsageKey.

View File

@@ -2,6 +2,7 @@
import logging
import sys
import weakref
from fs.osfs import OSFS
from lazy import lazy
@@ -13,6 +14,7 @@ from xmodule.error_block import ErrorBlock
from xmodule.errortracker import exc_info_to_str
from xmodule.library_tools import LibraryToolsService
from xmodule.mako_block import MakoDescriptorSystem
from xmodule.modulestore import BlockData
from xmodule.modulestore.edit_info import EditInfoRuntimeMixin
from xmodule.modulestore.exceptions import ItemNotFoundError
from xmodule.modulestore.inheritance import InheritanceMixin, inheriting_field_data
@@ -73,6 +75,8 @@ class CachingDescriptorSystem(MakoDescriptorSystem, EditInfoRuntimeMixin): # li
self.default_class = default_class
self.local_modules = {}
self._services['library_tools'] = LibraryToolsService(modulestore, user_id=None)
# Cache of block field datas, keyed by the XBlock instance (since the ScopeId changes!)
self.block_field_datas = weakref.WeakKeyDictionary()
@lazy
def _parent_map(self): # lint-amnesty, pylint: disable=missing-function-docstring
@@ -159,7 +163,7 @@ class CachingDescriptorSystem(MakoDescriptorSystem, EditInfoRuntimeMixin): # li
# is the intended one when not given a course_entry_override; thus, the caching of the last branch/course id.
def xblock_from_json(self, class_, course_key, block_key, block_data, course_entry_override=None, **kwargs):
"""
Load and return block info.
Instantiate an XBlock of the specified class, using the provided data.
"""
if course_entry_override is None:
course_entry_override = self.course_entry
@@ -167,37 +171,93 @@ class CachingDescriptorSystem(MakoDescriptorSystem, EditInfoRuntimeMixin): # li
# most recent retrieval is most likely the right one for next caller (see comment above fn)
self.course_entry = CourseEnvelope(course_entry_override.course_key, self.course_entry.structure)
definition_id = block_data.definition
# If no usage id is provided, generate an in-memory id
if block_key is None:
block_key = BlockKey(block_data.block_type, LocalId())
# Construct the Block Usage Locator:
block_locator = course_key.make_usage_key(block_type=block_key.type, block_id=block_key.id)
# If no definition id is provided, generate an in-memory id
definition_id = block_data.definition or LocalId()
try:
block = self.construct_xblock_from_class(
class_,
ScopeIds(None, block_key.type, definition_id, block_locator),
for_parent=kwargs.get('for_parent'),
# Pass this tuple on for use by _init_field_data_for_block() when field data is initialized.
cds_init_args=(block_data, definition_id, kwargs.get("field_decorator")),
# Passing field_data here is deprecated, so we don't. Get it via block.service(block, "field-data").
# https://github.com/openedx/XBlock/blob/e89cbc5/xblock/mixins.py#L200-L207
)
except Exception: # pylint: disable=broad-except
log.warning("Failed to load descriptor", exc_info=True)
return ErrorBlock.from_json(
block_data,
self,
course_entry_override.course_key.make_usage_key(
block_type='error',
block_id=block_key.id
),
error_msg=exc_info_to_str(sys.exc_info())
)
edit_info = block_data.edit_info
block._edited_by = edit_info.edited_by # pylint: disable=protected-access
block._edited_on = edit_info.edited_on # pylint: disable=protected-access
block.previous_version = edit_info.previous_version
block.update_version = edit_info.update_version
block.source_version = edit_info.source_version
block.definition_locator = DefinitionLocator(block_key.type, definition_id)
# If this is an in-memory block, store it in this system
if isinstance(block_locator.block_id, LocalId):
self.local_modules[block_locator] = block
return block
def service(self, block, service_name):
"""
Return a service, or None.
Services are objects implementing arbitrary other interfaces.
"""
# Implement field data service:
if service_name in ('field-data', 'field-data-unbound'):
if hasattr(block, "_bound_field_data") and service_name != "field-data-unbound":
# Return the user-specific wrapped field data that gets set onto the block during XModule.bind_for_user
# This complexity is due to XModule heritage. If we didn't load the block as one step, then sometimes
# "rebind" it to be user-specific as a later step, we could load the field data and overrides at once.
return block._bound_field_data # pylint: disable=protected-access
if block not in self.block_field_datas:
try:
self._init_field_data_for_block(block)
except:
# Don't try again pointlessly every time another field is accessed
self.block_field_datas[block] = None
raise
return self.block_field_datas[block]
return super().service(block, service_name)
def _init_field_data_for_block(self, block):
"""
Initialize the field-data service for the given block.
"""
block_key = BlockKey.from_usage_key(block.scope_ids.usage_id)
course_key = block.scope_ids.usage_id.context_key
try:
(block_data, definition_id, field_decorator) = block.get_cds_init_args()
except KeyError:
# This block was not instantiate via xblock_from_json()/modulestore but rather via the "direct" XBlock API
# such as using construct_xblock_from_class(). It probably doesn't yet exist in modulestore.
block_data = BlockData()
definition_id = LocalId()
field_decorator = None
class_ = self.load_block_type(block.scope_ids.block_type)
convert_fields = lambda field: self.modulestore.convert_references_to_keys(
course_key, class_, field, self.course_entry.structure['blocks'],
)
if definition_id is not None and not block_data.definition_loaded:
definition_loader = DefinitionLazyLoader(
self.modulestore,
course_key,
block_key.type,
definition_id,
convert_fields,
)
else:
definition_loader = None
# If no definition id is provide, generate an in-memory id
if definition_id is None:
definition_id = LocalId()
# Construct the Block Usage Locator:
block_locator = course_key.make_usage_key(
block_type=block_key.type,
block_id=block_key.id,
)
converted_fields = convert_fields(block_data.fields)
converted_defaults = convert_fields(block_data.defaults)
if block_key in self._parent_map:
@@ -218,58 +278,42 @@ class CachingDescriptorSystem(MakoDescriptorSystem, EditInfoRuntimeMixin): # li
except AttributeError:
pass
try:
kvs = SplitMongoKVS(
definition_loader,
converted_fields,
converted_defaults,
parent=parent,
aside_fields=aside_fields,
field_decorator=kwargs.get('field_decorator')
if not isinstance(definition_id, LocalId) and not block_data.definition_loaded:
definition_loader = DefinitionLazyLoader(
self.modulestore,
course_key,
block_key.type,
definition_id,
convert_fields,
)
else:
definition_loader = None
if InheritanceMixin in self.modulestore.xblock_mixins:
field_data = inheriting_field_data(kvs)
else:
field_data = KvsFieldData(kvs)
kvs = SplitMongoKVS(
definition_loader,
converted_fields,
converted_defaults,
parent=parent,
aside_fields=aside_fields,
field_decorator=field_decorator,
)
block = self.construct_xblock_from_class(
class_,
ScopeIds(None, block_key.type, definition_id, block_locator),
field_data,
for_parent=kwargs.get('for_parent')
)
except Exception: # pylint: disable=broad-except
log.warning("Failed to load descriptor", exc_info=True)
return ErrorBlock.from_json(
block_data,
self,
course_entry_override.course_key.make_usage_key(
block_type='error',
block_id=block_key.id
),
error_msg=exc_info_to_str(sys.exc_info())
)
edit_info = block_data.edit_info
block._edited_by = edit_info.edited_by # pylint: disable=protected-access
block._edited_on = edit_info.edited_on # pylint: disable=protected-access
block.previous_version = edit_info.previous_version
block.update_version = edit_info.update_version
block.source_version = edit_info.source_version
block.definition_locator = DefinitionLocator(block_key.type, definition_id)
if InheritanceMixin in self.modulestore.xblock_mixins:
field_data = inheriting_field_data(kvs)
else:
field_data = KvsFieldData(kvs)
# Save the field_data now, because some of the wrappers will try to read fields from the block while they
# determine if they want to wrap the field_data or not.
self.block_field_datas[block] = field_data
# Now add in any wrappers that want to be added:
for wrapper in self.modulestore.xblock_field_data_wrappers:
block._field_data = wrapper(block, block._field_data) # pylint: disable=protected-access
self.block_field_datas[block] = wrapper(block, self.block_field_datas[block])
# decache any pending field settings
block.save()
# If this is an in-memory block, store it in this system
if isinstance(block_locator.block_id, LocalId):
self.local_modules[block_locator] = block
return block
# Note: user-specific wrappers like LmsFieldData or OverrideFieldData do not get set here. They get set later
# during bind_for_student(), which wraps the field-data and sets block._bound_field_data. Calling
# runtime.service(block, "field-data") or block._field_data (deprecated) will load block._bound_field_data if
# the block has been bound to a user, otherwise it returns the wrapped field data we just created above.
def get_edited_by(self, xblock):
"""

View File

@@ -7,13 +7,13 @@ import json
import logging
import os
import re
import warnings
import sys
from collections import defaultdict
from contextlib import contextmanager
from importlib import import_module
from fs.osfs import OSFS
from lazy import lazy
from lxml import etree
from opaque_keys.edx.keys import CourseKey
from opaque_keys.edx.locator import BlockUsageLocator, CourseLocator, LibraryLocator
@@ -37,7 +37,7 @@ from xmodule.x_module import ( # lint-amnesty, pylint: disable=unused-import
)
from .exceptions import ItemNotFoundError
from .inheritance import InheritanceKeyValueStore, compute_inherited_metadata, inheriting_field_data
from .inheritance import compute_inherited_metadata, inheriting_field_data
edx_xml_parser = etree.XMLParser(dtd_validation=False, load_dtd=False, remove_blank_text=True)
@@ -239,6 +239,24 @@ class ImportSystem(XMLParsingSystem, MakoDescriptorSystem): # lint-amnesty, pyl
**kwargs
)
# pylint: disable=keyword-arg-before-vararg
def construct_xblock_from_class(self, cls, scope_ids, field_data=None, *args, **kwargs):
"""
Construct a new xblock of type cls, mixing in the mixins
defined for this application.
"""
if field_data:
# Currently, *some* XBlocks (those with XmlMixin) use XmlMixin.parse_xml() which instantiates
# its own key value store for field data. That is something which should be left to the runtime, for
# consistent behavior across all XBlocks, not controlled by individual XBlock implementations.
# We cannot just ignore field_data here though, because parse_xml may have pre-loaded data into it that we
# would otherwise lose.
warnings.warn(
'XBlocks should not instantiate their own field_data store during parse_xml()',
DeprecationWarning, stacklevel=2,
)
return super().construct_xblock_from_class(cls, scope_ids, field_data, *args, **kwargs)
# id_generator is ignored, because each ImportSystem is already local to
# a course, and has it's own id_generator already in place
def add_node_as_child(self, block, node, id_generator): # lint-amnesty, pylint: disable=signature-differs
@@ -899,26 +917,10 @@ class LibraryXMLModuleStore(XMLModuleStore):
"""
return LibraryLocator(org=org, library=library)
@staticmethod
def patch_block_kvs(library_block):
"""
Metadata inheritance can be done purely through XBlocks, but in the import phase
a root block with an InheritanceKeyValueStore is assumed to be at the top of the hierarchy.
This should change in the future, but as XBlocks don't have this KVS, we have to patch it
here manually.
"""
init_dict = {key: getattr(library_block, key) for key in library_block.fields.keys()}
# if set, invalidate '_unwrapped_field_data' so it will be reset
# the next time it will be called
lazy.invalidate(library_block, '_unwrapped_field_data')
# pylint: disable=protected-access
library_block._field_data = inheriting_field_data(InheritanceKeyValueStore(init_dict))
def content_importers(self, system, course_block, course_dir, url_name):
"""
Handle Metadata inheritance for Libraries.
"""
self.patch_block_kvs(course_block)
compute_inherited_metadata(course_block)
def get_library(self, library_id, depth=0, **kwargs): # pylint: disable=unused-argument

View File

@@ -276,12 +276,6 @@ class ImportTestCase(BaseCourseTestCase): # lint-amnesty, pylint: disable=missi
)
block = system.process_xml(start_xml)
# pylint: disable=protected-access
original_unwrapped = block._unwrapped_field_data
LibraryXMLModuleStore.patch_block_kvs(block)
# '_unwrapped_field_data' is reset in `patch_block_kvs`
# pylint: disable=protected-access
assert original_unwrapped is not block._unwrapped_field_data
compute_inherited_metadata(block)
# Check the course block, since it has inheritance
block = block.get_children()[0]
@@ -322,7 +316,6 @@ class ImportTestCase(BaseCourseTestCase): # lint-amnesty, pylint: disable=missi
</course>
</library>'''.format(org=ORG, course=COURSE, url_name=url_name)
block = system.process_xml(start_xml)
LibraryXMLModuleStore.patch_block_kvs(block)
compute_inherited_metadata(block)
# Run the checks on the course node instead.
block = block.get_children()[0]
@@ -390,7 +383,6 @@ class ImportTestCase(BaseCourseTestCase): # lint-amnesty, pylint: disable=missi
</course>
</library>'''.format(due=course_due, org=ORG, course=COURSE, url_name=url_name)
block = system.process_xml(start_xml)
LibraryXMLModuleStore.patch_block_kvs(block)
# Chapter is two levels down here.
child = block.get_children()[0].get_children()[0]
# pylint: disable=protected-access

View File

@@ -10,7 +10,6 @@ from functools import partial
import yaml
from django.conf import settings
from lazy import lazy
from lxml import etree
from opaque_keys.edx.asides import AsideDefinitionKeyV2, AsideUsageKeyV2
from opaque_keys.edx.keys import UsageKey
@@ -314,9 +313,22 @@ class XModuleMixin(XModuleFields, XBlock):
def __init__(self, *args, **kwargs):
self._asides = []
# Initialization data used by CachingDescriptorSystem to defer FieldData initialization
self._cds_init_args = kwargs.pop("cds_init_args", None)
super().__init__(*args, **kwargs)
def get_cds_init_args(self):
""" Get initialization data used by CachingDescriptorSystem to defer FieldData initialization """
if self._cds_init_args is None:
raise KeyError("cds_init_args was not provided for this XBlock")
if self._cds_init_args is False:
raise RuntimeError("Tried to get CachingDescriptorSystem cds_init_args twice for the same XBlock.")
args = self._cds_init_args
# Free the memory and set this False to flag any double-access bugs. This only needs to be read once.
self._cds_init_args = False
return args
@property
def runtime(self):
return self._runtime
@@ -367,7 +379,7 @@ class XModuleMixin(XModuleFields, XBlock):
def location(self, value):
assert isinstance(value, UsageKey)
self.scope_ids = self.scope_ids._replace(
def_id=value,
def_id=value, # Note: assigning a UsageKey as def_id is OK in old mongo / import system but wrong in split
usage_id=value,
)
@@ -417,14 +429,6 @@ class XModuleMixin(XModuleFields, XBlock):
self.save()
return self._field_data._kvs # pylint: disable=protected-access
@lazy
def _unwrapped_field_data(self):
"""
This property hold the value _field_data here before we wrap it in
the LmsFieldData or OverrideFieldData classes.
"""
return self._field_data
def add_aside(self, aside):
"""
save connected asides
@@ -651,14 +655,17 @@ class XModuleMixin(XModuleFields, XBlock):
if field in self._dirty_fields:
del self._dirty_fields[field]
if wrappers is None:
wrappers = []
wrapped_field_data = self._unwrapped_field_data
for wrapper in wrappers:
wrapped_field_data = wrapper(wrapped_field_data)
self._field_data = wrapped_field_data
if wrappers:
# Put user-specific wrappers around the field-data service for this block.
# Note that these are different from modulestore.xblock_field_data_wrappers, which are not user-specific.
wrapped_field_data = self.runtime.service(self, 'field-data-unbound')
for wrapper in wrappers:
wrapped_field_data = wrapper(wrapped_field_data)
self._bound_field_data = wrapped_field_data
if getattr(self.runtime, "uses_deprecated_field_data", False):
# This approach is deprecated but old mongo's CachingDescriptorSystem still requires it.
# For Split mongo's CachingDescriptor system, don't set ._field_data this way.
self._field_data = wrapped_field_data
@property
def non_editable_metadata_fields(self):

View File

@@ -360,6 +360,9 @@ class XmlMixin:
field_data['children'] = children
field_data['xml_attributes']['filename'] = definition.get('filename', ['', None]) # for git link
# TODO: we shouldn't be instantiating our own field data instance here, but rather just call to
# runtime.construct_xblock_from_class() and then set fields on the returned block.
# See the base XBlock class (XmlSerializationMixin.parse_xml) for how it should be done.
kvs = InheritanceKeyValueStore(initial_values=field_data)
field_data = KvsFieldData(kvs)