From 7f325aee730dfcd1117363f73de6472daaad58e0 Mon Sep 17 00:00:00 2001 From: Nimisha Asthagiri Date: Mon, 26 Oct 2015 11:26:21 -0400 Subject: [PATCH] Block Cache framework. --- lms/envs/test.py | 4 + openedx/core/lib/block_cache/__init__.py | 59 ++ openedx/core/lib/block_cache/block_cache.py | 84 +++ .../core/lib/block_cache/block_structure.py | 606 ++++++++++++++++++ .../block_cache/block_structure_factory.py | 197 ++++++ openedx/core/lib/block_cache/exceptions.py | 10 + .../core/lib/block_cache/tests/__init__.py | 0 .../lib/block_cache/tests/test_block_cache.py | 113 ++++ .../block_cache/tests/test_block_structure.py | 218 +++++++ .../tests/test_block_structure_factory.py | 114 ++++ .../tests/test_transformer_registry.py | 68 ++ .../core/lib/block_cache/tests/test_utils.py | 234 +++++++ openedx/core/lib/block_cache/transformer.py | 128 ++++ .../lib/block_cache/transformer_registry.py | 46 ++ openedx/core/lib/cache_utils.py | 13 +- 15 files changed, 1893 insertions(+), 1 deletion(-) create mode 100644 openedx/core/lib/block_cache/__init__.py create mode 100644 openedx/core/lib/block_cache/block_cache.py create mode 100644 openedx/core/lib/block_cache/block_structure.py create mode 100644 openedx/core/lib/block_cache/block_structure_factory.py create mode 100644 openedx/core/lib/block_cache/exceptions.py create mode 100644 openedx/core/lib/block_cache/tests/__init__.py create mode 100644 openedx/core/lib/block_cache/tests/test_block_cache.py create mode 100644 openedx/core/lib/block_cache/tests/test_block_structure.py create mode 100644 openedx/core/lib/block_cache/tests/test_block_structure_factory.py create mode 100644 openedx/core/lib/block_cache/tests/test_transformer_registry.py create mode 100644 openedx/core/lib/block_cache/tests/test_utils.py create mode 100644 openedx/core/lib/block_cache/transformer.py create mode 100644 openedx/core/lib/block_cache/transformer_registry.py diff --git a/lms/envs/test.py b/lms/envs/test.py index 0ed5986cf7..ba943ab5e6 100644 --- a/lms/envs/test.py +++ b/lms/envs/test.py @@ -219,6 +219,10 @@ CACHES = { 'course_structure_cache': { 'BACKEND': 'django.core.cache.backends.dummy.DummyCache', }, + 'block_cache': { + 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache', + 'LOCATION': 'edx_location_block_cache', + }, } # Dummy secret key for dev diff --git a/openedx/core/lib/block_cache/__init__.py b/openedx/core/lib/block_cache/__init__.py new file mode 100644 index 0000000000..28ba7eaae0 --- /dev/null +++ b/openedx/core/lib/block_cache/__init__.py @@ -0,0 +1,59 @@ +""" +The block_cache django app provides an extensible framework for caching +data of block structures from the modulestore. + +Dual-Phase. The framework is meant to be used in 2 phases. + + * Collect Phase (for expensive and full-tree traversals) - In the + first phase, the "collect" phase, any and all data from the + modulestore should be collected and cached for later access to + the block structure. Instantiating any and all xBlocks in the block + structure is also done at this phase, since that is also (currently) + a costly operation. + + Any full tree traversals should also be done during this phase. For + example, if data for a block depends on its parents, the traversal + should happen during the collection phase and any required data + for the block should be percolated down the tree and stored as + aggregate values on the descendants. This allows for faster and + direct access to blocks in the Transform phase. + + * Transform Phase (for fast access to blocks) - In the second + phase, the "transform" phase, only the previously collected and + cached data should be accessed. There should be no access to the + modulestore or instantiation of xBlocks in this phase. + + +To make this framework extensible, the Transformer and +Extensibility design patterns are used. This django app only +provides the underlying framework for Block Structure Transformers +and a Transformer Registry. Clients are expected to provide actual +implementations of Transformers or add them to the extensible Registry. + +Transformers. As inspired by +http://www.ccs.neu.edu/home/riccardo/courses/csu370-fa07/lect18.pdf, +a Block Structure Transformer takes in a block structure (or tree) and +manipulates the structure and the data of its blocks according to its +own requirements. Its output can then be used for further +transformations by other transformers down the pipeline. + +Note: For performance and space optimization, our implementation +differs from the paper in that our transformers mutate the block +structure in-place rather than returning a modified copy of it. + +Block Structure. The BlockStructure and its family of classes +provided with this framework are the base data types for accessing +and manipulating block structures. BlockStructures are constructed +using the BlockStructureFactory and then used as the currency across +Transformers. + +Registry. Transformers are registered using the platform's +PluginManager (e.g., Stevedore). This is currently done by updating +setup.py. Only registered transformers are called during the Collect +Phase. And only registered transformers can be used during the +Transform phase. + +Note: A partial subset (as an ordered list) of the registered +transformers can be requested during the Transform phase, allowing +the client to manipulate exactly which transformers to call. +""" diff --git a/openedx/core/lib/block_cache/block_cache.py b/openedx/core/lib/block_cache/block_cache.py new file mode 100644 index 0000000000..dba7adc608 --- /dev/null +++ b/openedx/core/lib/block_cache/block_cache.py @@ -0,0 +1,84 @@ +""" +Top-level module for the Block Cache framework with higher order +functions for getting and clearing cached blocks. +""" +from .block_structure_factory import BlockStructureFactory +from .exceptions import TransformerException +from .transformer_registry import TransformerRegistry + + +def get_blocks(cache, modulestore, usage_info, root_block_usage_key, transformers): + """ + Top-level function in the Block Cache framework that manages + the cache (populating it and updating it when needed), calls the + transformers as appropriate (collect and transform methods), and + accessing the modulestore when needed (at cache miss). + + Arguments: + cache (django.core.cache.backends.base.BaseCache) - The + cache to use for storing/retrieving the block structure's + collected data. + + modulestore (ModuleStoreRead) - The modulestore that + contains the data for the xBlock objects corresponding to + the block structure. + + usage_info (any negotiated type) - A usage-specific object + that is forwarded to all requested Transformers in order + to apply a usage-specific transform. For example, an + instance of usage_info would contain a user object for + which the transform should be applied. + + root_block_usage_key (UsageKey) - The usage_key for the root + of the block structure that is being accessed. + + transformers ([BlockStructureTransformer]) - The list of + transformers whose transform methods are to be called. + This list should be a subset of the list of registered + transformers in the Transformer Registry. + """ + + # Verify that all requested transformers are registered in the + # Transformer Registry. + unregistered_transformers = TransformerRegistry.find_unregistered(transformers) + if unregistered_transformers: + raise TransformerException( + "The following requested transformers are not registered: {}".format(unregistered_transformers) + ) + + # Load the cached block structure. + root_block_structure = BlockStructureFactory.create_from_cache(root_block_usage_key, cache, transformers) + + # On cache miss, execute the collect phase and update the cache. + if not root_block_structure: + + # Create the block structure from the modulestore. + root_block_structure = BlockStructureFactory.create_from_modulestore(root_block_usage_key, modulestore) + + # Collect data from each registered transformer. + for transformer in TransformerRegistry.get_registered_transformers(): + root_block_structure._add_transformer(transformer) # pylint: disable=protected-access + transformer.collect(root_block_structure) + + # Collect all fields that were requested by the transformers. + root_block_structure._collect_requested_xblock_fields() # pylint: disable=protected-access + + # Cache this information. + BlockStructureFactory.serialize_to_cache(root_block_structure, cache) + + # Execute requested transforms on block structure. + for transformer in transformers: + transformer.transform(usage_info, root_block_structure) + + # Prune the block structure to remove any unreachable blocks. + root_block_structure._prune_unreachable() # pylint: disable=protected-access + + return root_block_structure + + +def clear_block_cache(cache, root_block_usage_key): + """ + Removes the block structure associated with the given root block + key. + """ + BlockStructureFactory.remove_from_cache(root_block_usage_key, cache) diff --git a/openedx/core/lib/block_cache/block_structure.py b/openedx/core/lib/block_cache/block_structure.py new file mode 100644 index 0000000000..f6a0817daa --- /dev/null +++ b/openedx/core/lib/block_cache/block_structure.py @@ -0,0 +1,606 @@ +""" +Module with family of classes for block structures. + BlockStructure - responsible for block existence and relations. + BlockStructureBlockData - responsible for block & transformer data. + BlockStructureModulestoreData - responsible for xBlock data. + +The following internal data structures are implemented: + _BlockRelations - Data structure for a single block's relations. + _BlockData - Data structure for a single block's data. +""" +# pylint: disable=protected-access +from collections import defaultdict +from logging import getLogger + +from openedx.core.lib.graph_traversals import traverse_topologically, traverse_post_order + +from .exceptions import TransformerException + + +logger = getLogger(__name__) # pylint: disable=invalid-name + + +# A dictionary key value for storing a transformer's version number. +TRANSFORMER_VERSION_KEY = '_version' + + +class _BlockRelations(object): + """ + Data structure to encapsulate relationships for a single block, + including its children and parents. + """ + def __init__(self): + + # List of usage keys of this block's parents. + # list [UsageKey] + self.parents = [] + + # List of usage keys of this block's children. + # list [UsageKey] + self.children = [] + + +class BlockStructure(object): + """ + Base class for a block structure. BlockStructures are constructed + using the BlockStructureFactory and then used as the currency across + Transformers. + + This base class keeps track of the block structure's root_block_usage_key, + the existence of the blocks, and their parents and children + relationships (graph nodes and edges). + """ + def __init__(self, root_block_usage_key): + + # The usage key of the root block for this structure. + # UsageKey + self.root_block_usage_key = root_block_usage_key + + # Map of a block's usage key to its block relations. The + # existence of a block in the structure is determined by its + # presence in this map. + # defaultdict {UsageKey: _BlockRelations} + self._block_relations = defaultdict(_BlockRelations) + + # Add the root block. + self._add_block(self._block_relations, root_block_usage_key) + + def __iter__(self): + """ + The default iterator for a block structure is a topological + traversal since it's the more common case and we currently + need to support DAGs. + """ + return self.topological_traversal() + + #--- Block structure relation methods ---# + + def get_parents(self, usage_key): + """ + Returns the parents of the block identified by the given + usage_key. + + Arguments: + usage_key - The usage key of the block whose parents + are to be returned. + + Returns: + [UsageKey] - A list of usage keys of the block's parents. + """ + return self._block_relations[usage_key].parents if self.has_block(usage_key) else [] + + def get_children(self, usage_key): + """ + Returns the children of the block identified by the given + usage_key. + + Arguments: + usage_key - The usage key of the block whose children + are to be returned. + + Returns: + [UsageKey] - A list of usage keys of the block's children. + """ + return self._block_relations[usage_key].children if self.has_block(usage_key) else [] + + def has_block(self, usage_key): + """ + Returns whether a block with the given usage_key is in this + block structure. + + Arguments: + usage_key - The usage key of the block whose children + are to be returned. + + Returns: + bool - Whether or not a block with the given usage_key + is present in this block structure. + """ + return usage_key in self._block_relations + + #--- Block structure traversal methods ---# + + def topological_traversal( + self, + filter_func=None, + yield_descendants_of_unyielded=False, + ): + """ + Performs a topological sort of the block structure and yields + the usage_key of each block as it is encountered. + + Arguments: + See the description in + openedx.core.lib.graph_traversals.traverse_topologically. + + Returns: + generator - A generator object created from the + traverse_topologically method. + """ + return traverse_topologically( + start_node=self.root_block_usage_key, + get_parents=self.get_parents, + get_children=self.get_children, + filter_func=filter_func, + yield_descendants_of_unyielded=yield_descendants_of_unyielded, + ) + + def post_order_traversal( + self, + filter_func=None, + ): + """ + Performs a post-order sort of the block structure and yields + the usage_key of each block as it is encountered. + + Arguments: + See the description in + openedx.core.lib.graph_traversals.traverse_post_order. + + Returns: + generator - A generator object created from the + traverse_post_order method. + """ + return traverse_post_order( + start_node=self.root_block_usage_key, + get_children=self.get_children, + filter_func=filter_func, + ) + + #--- Internal methods ---# + # To be used within the block_cache framework or by tests. + + def _prune_unreachable(self): + """ + Mutates this block structure by removing any unreachable blocks. + """ + + # Create a new block relations map to store only those blocks + # that are still linked + pruned_block_relations = defaultdict(_BlockRelations) + old_block_relations = self._block_relations + + # Build the structure from the leaves up by doing a post-order + # traversal of the old structure, thereby encountering only + # reachable blocks. + for block_key in self.post_order_traversal(): + # If the block is in the old structure, + if block_key in old_block_relations: + # Add it to the new pruned structure + self._add_block(pruned_block_relations, block_key) + + # Add a relationship to only those old children that + # were also added to the new pruned structure. + for child in old_block_relations[block_key].children: + if child in pruned_block_relations: + self._add_to_relations(pruned_block_relations, block_key, child) + + # Replace this structure's relations with the newly pruned one. + self._block_relations = pruned_block_relations + + def _get_block_keys(self): + """ + Returns an iterator of all the block keys in the block + structure. + """ + return self._block_relations.iterkeys() + + def _add_relation(self, parent_key, child_key): + """ + Adds a parent to child relationship in this block structure. + + Arguments: + parent_key (UsageKey) - Usage key of the parent block. + child_key (UsageKey) - Usage key of the child block. + """ + self._add_to_relations(self._block_relations, parent_key, child_key) + + @staticmethod + def _add_to_relations(block_relations, parent_key, child_key): + """ + Adds a parent to child relationship in the given block + relations map. + + Arguments: + block_relations (defaultdict({UsageKey: _BlockRelations})) - + Internal map of a block's usage key to its + parents/children relations. + + parent_key (UsageKey) - Usage key of the parent block. + + child_key (UsageKey) - Usage key of the child block. + """ + block_relations[child_key].parents.append(parent_key) + block_relations[parent_key].children.append(child_key) + + @staticmethod + def _add_block(block_relations, usage_key): + """ + Adds the given usage_key to the given block_relations map. + + Arguments: + block_relations (defaultdict({UsageKey: _BlockRelations})) - + Internal map of a block's usage key to its + parents/children relations. + + usage_key (UsageKey) - Usage key of the block that is to + be added to the given block_relations. + """ + block_relations[usage_key] = _BlockRelations() + + +class _BlockData(object): + """ + Data structure to encapsulate collected data for a single block. + """ + def __init__(self): + # Map of xblock field name to the field's value for this block. + # dict {string: any picklable type} + self.xblock_fields = {} + + # Map of transformer name to the transformer's data for this + # block. + # defaultdict {string: dict} + self.transformer_data = defaultdict(dict) + + +class BlockStructureBlockData(BlockStructure): + """ + Subclass of BlockStructure that is responsible for managing block + and transformer data. + """ + def __init__(self, root_block_usage_key): + super(BlockStructureBlockData, self).__init__(root_block_usage_key) + + # Map of a block's usage key to its collected data, including + # its xBlock fields and block-specific transformer data. + # defaultdict {UsageKey: _BlockData} + self._block_data_map = defaultdict(_BlockData) + + # Map of a transformer's name to its non-block-specific data. + # defaultdict {string: dict} + self._transformer_data = defaultdict(dict) + + def get_xblock_field(self, usage_key, field_name, default=None): + """ + Returns the collected value of the xBlock field for the + requested block for the requested field_name; returns default if + not found. + + Arguments: + usage_key (UsageKey) - Usage key of the block whose xBlock + field is requested. + + field_name (string) - The name of the field that is + requested. + + default (any type) - The value to return if a field value is + not found. + """ + block_data = self._block_data_map.get(usage_key) + return block_data.xblock_fields.get(field_name, default) if block_data else default + + def get_transformer_data(self, transformer, key, default=None): + """ + Returns the value associated with the given key from the given + transformer's data dictionary; returns default if not found. + + Arguments: + transformer (BlockStructureTransformer) - The transformer + whose collected data is requested. + + key (string) - A dictionary key to the transformer's data + that is requested. + """ + return self._transformer_data.get(transformer.name(), {}).get(key, default) + + def set_transformer_data(self, transformer, key, value): + """ + Updates the given transformer's data dictionary with the given + key and value. + + Arguments: + transformer (BlockStructureTransformer) - The transformer + whose data is to be updated. + + key (string) - A dictionary key to the transformer's data. + + value (any picklable type) - The value to associate with the + given key for the given transformer's data. + """ + self._transformer_data[transformer.name()][key] = value + + def get_transformer_block_field(self, usage_key, transformer, key, default=None): + """ + Returns the value associated with the given key for the given + transformer for the block identified by the given usage_key; + returns default if not found. + + Arguments: + usage_key (UsageKey) - Usage key of the block whose + transformer data is requested. + + transformer (BlockStructureTransformer) - The transformer + whose dictionary data is requested. + + key (string) - A dictionary key to the transformer's data + that is requested. + + default (any type) - The value to return if a dictionary + entry is not found. + """ + transformer_data = self.get_transformer_block_data(usage_key, transformer) + return transformer_data.get(key, default) + + def set_transformer_block_field(self, usage_key, transformer, key, value): + """ + Updates the given transformer's data dictionary with the given + key and value for the block identified by the given usage_key. + + Arguments: + usage_key (UsageKey) - Usage key of the block whose + transformer data is to be updated. + + transformer (BlockStructureTransformer) - The transformer + whose data is to be updated. + + key (string) - A dictionary key to the transformer's data. + + value (any picklable type) - The value to associate with the + given key for the given transformer's data for the + requested block. + """ + self._block_data_map[usage_key].transformer_data[transformer.name()][key] = value + + def get_transformer_block_data(self, usage_key, transformer): + """ + Returns the entire transformer data dict for the given + transformer for the block identified by the given usage_key; + returns an empty dict {} if not found. + + Arguments: + usage_key (UsageKey) - Usage key of the block whose + transformer data is requested. + + transformer (BlockStructureTransformer) - The transformer + whose dictionary data is requested. + + key (string) - A dictionary key to the transformer's data + that is requested. + """ + default = {} + block_data = self._block_data_map.get(usage_key) + if not block_data: + return default + else: + return block_data.transformer_data.get(transformer.name(), default) + + def remove_transformer_block_data(self, usage_key, transformer): + """ + Deletes the given transformer's entire data dict for the + block identified by the given usage_key. + + Arguments: + usage_key (UsageKey) - Usage key of the block whose + transformer data is to be deleted. + + transformer (BlockStructureTransformer) - The transformer + whose data entry is to be deleted. + """ + self._block_data_map[usage_key].transformer_data.pop(transformer.name(), None) + + def remove_block(self, usage_key, keep_descendants): + """ + Removes the block identified by the usage_key and all of its + related data from the block structure. If descendants of the + removed block are to be kept, the structure's relations are + updated to reconnect the block's parents with its children. + + Note: While the immediate relations of the block are updated + (removed), all descendants of the block will remain in the + structure unless the _prune_unreachable method is called. + + Arguments: + usage_key (UsageKey) - Usage key of the block that is to be + removed. + + keep_descendants (bool) - If True, the block structure's + relations (graph edges) are updated such that the + removed block's children become children of the + removed block's parents. + """ + children = self._block_relations[usage_key].children + parents = self._block_relations[usage_key].parents + + # Remove block from its children. + for child in children: + self._block_relations[child].parents.remove(usage_key) + + # Remove block from its parents. + for parent in parents: + self._block_relations[parent].children.remove(usage_key) + + # Remove block. + self._block_relations.pop(usage_key, None) + self._block_data_map.pop(usage_key, None) + + # Recreate the graph connections if descendants are to be kept. + if keep_descendants: + for child in children: + for parent in parents: + self._add_relation(parent, child) + + def remove_block_if(self, removal_condition, keep_descendants=False, **kwargs): + """ + A higher-order function that traverses the block structure + using topological sort and removes any blocks encountered that + satisfy the removal_condition. + + Arguments: + removal_condition ((usage_key)->bool) - A function that + takes a block's usage key as input and returns whether + or not to remove that block from the block structure. + + keep_descendants (bool) - See the description in + remove_block. + + kwargs (dict) - Optional keyword arguments to be forwarded + to topological_traversal. + """ + def filter_func(block_key): + """ + Filter function for removing blocks that satisfy the + removal_condition. + """ + if removal_condition(block_key): + self.remove_block(block_key, keep_descendants) + return False + return True + + # Note: For optimization, we remove blocks using the filter + # function, since the graph traversal method can skip over + # descendants that are unyielded. However, note that the + # optimization is not currently present because of DAGs, + # but it will be as soon as we remove support for DAGs. + for _ in self.topological_traversal(filter_func=filter_func, **kwargs): + pass + + def _get_transformer_data_version(self, transformer): + """ + Returns the version number stored for the given transformer. + + Arguments: + transformer (BlockStructureTransformer) - The transformer + whose stored version is requested. + """ + + return self.get_transformer_data(transformer, TRANSFORMER_VERSION_KEY, 0) + + def _add_transformer(self, transformer): + """ + Adds the given transformer to the block structure by recording + its current version number. + """ + if transformer.VERSION == 0: + raise TransformerException('VERSION attribute is not set on transformer {0}.', transformer.name()) + self.set_transformer_data(transformer, TRANSFORMER_VERSION_KEY, transformer.VERSION) + + +class BlockStructureModulestoreData(BlockStructureBlockData): + """ + Subclass of BlockStructureBlockData that is responsible for managing + xBlocks and corresponding functionality that should only be called + during the Collect phase. + + Note: Although this class interface uses xBlock terminology, it is + designed and implemented generically so it can work with any + interface and implementation of an xBlock. + """ + def __init__(self, root_block_usage_key): + super(BlockStructureModulestoreData, self).__init__(root_block_usage_key) + + # Map of a block's usage key to its instantiated xBlock. + # dict {UsageKey: XBlock} + self._xblock_map = {} + + # Set of xBlock field names that have been requested for + # collection. + # set(string) + self._requested_xblock_fields = set() + + def request_xblock_fields(self, *field_names): + """ + Records request for collecting data for the given xBlock fields. + + A Transformer should call this method when it needs to collect + data for a common xBlock field that may also be used by other + transformers. This minimizes storage usage across transformers. + Contrast this with each transformer collecting the same xBlock + data within its own transformer data storage. + + Arguments: + field_names (list(string)) - A list of names of common + xBlock fields whose values should be collected. + """ + self._requested_xblock_fields.update(set(field_names)) + + def get_xblock(self, usage_key): + """ + Returns the instantiated xBlock for the given usage key. + + Arguments: + usage_key (UsageKey) - Usage key of the block whose + xBlock object is to be returned. + """ + return self._xblock_map[usage_key] + + #--- Internal methods ---# + # To be used within the block_cache framework or by tests. + + def _add_xblock(self, usage_key, xblock): + """ + Associates the given xBlock object with the given usage_key. + + Arguments: + usage_key (UsageKey) - Usage key of the given xBlock. This + value is passed in separately as opposed to retrieving + it from the given xBlock since this interface is + agnostic to and decoupled from the xBlock interface. + + xblock (XBlock) - An instantiated XBlock object that is + to be stored for later access. + """ + self._xblock_map[usage_key] = xblock + + def _collect_requested_xblock_fields(self): + """ + Iterates through all instantiated xBlocks that were added and + collects all xBlock fields that were requested. + """ + if not self._requested_xblock_fields: + return + + for xblock_usage_key, xblock in self._xblock_map.iteritems(): + for field_name in self._requested_xblock_fields: + self._set_xblock_field(xblock_usage_key, xblock, field_name) + + def _set_xblock_field(self, usage_key, xblock, field_name): + """ + Updates the given block's xBlock fields data with the xBlock + value for the given field name. + + Arguments: + usage_key (UsageKey) - Usage key of the given xBlock. This + value is passed in separately as opposed to retrieving + it from the given xBlock since this interface is + agnostic to and decoupled from the xBlock interface. + + xblock (XBlock) - An instantiated XBlock object whose + field is being accessed and collected for later + retrieval. + + field_name (string) - The name of the xBlock field that is + being collected and stored. + """ + if hasattr(xblock, field_name): + self._block_data_map[usage_key].xblock_fields[field_name] = getattr(xblock, field_name) diff --git a/openedx/core/lib/block_cache/block_structure_factory.py b/openedx/core/lib/block_cache/block_structure_factory.py new file mode 100644 index 0000000000..d927de0f34 --- /dev/null +++ b/openedx/core/lib/block_cache/block_structure_factory.py @@ -0,0 +1,197 @@ +""" +Module for factory class for BlockStructure objects. +""" +# pylint: disable=protected-access +from logging import getLogger + +from openedx.core.lib.cache_utils import zpickle, zunpickle + +from .block_structure import BlockStructureBlockData, BlockStructureModulestoreData + + +logger = getLogger(__name__) # pylint: disable=C0103 + + +class BlockStructureFactory(object): + """ + Factory class for BlockStructure objects. + """ + @classmethod + def create_from_modulestore(cls, root_block_usage_key, modulestore): + """ + Creates and returns a block structure from the modulestore + starting at the given root_block_usage_key. + + Arguments: + root_block_usage_key (UsageKey) - The usage_key for the root + of the block structure that is to be created. + + modulestore (ModuleStoreRead) - The modulestore that + contains the data for the xBlocks within the block + structure starting at root_block_usage_key. + + Returns: + BlockStructureModulestoreData - The created block structure + with instantiated xBlocks from the given modulestore + starting at root_block_usage_key. + """ + # Create block structure. + block_structure = BlockStructureModulestoreData(root_block_usage_key) + + # Create internal set of blocks visited to use when recursing. + blocks_visited = set() + + def build_block_structure(xblock): + """ + Recursively update the block structure with the given xBlock + and its descendants. + """ + # Check if the xblock was already visited (can happen in + # DAGs). + if xblock.location in blocks_visited: + return + + # Add the xBlock. + blocks_visited.add(xblock.location) + block_structure._add_xblock(xblock.location, xblock) + + # Add relations with its children and recurse. + for child in xblock.get_children(): + block_structure._add_relation(xblock.location, child.location) + build_block_structure(child) + + root_xblock = modulestore.get_item(root_block_usage_key, depth=None) + build_block_structure(root_xblock) + return block_structure + + @classmethod + def serialize_to_cache(cls, block_structure, cache): + """ + Store a compressed and pickled serialization of the given + block structure into the given cache. + + The key in the cache is 'root.key.'. + The data stored in the cache includes the structure's + block relations, transformer data, and block data. + + Arguments: + block_structure (BlockStructure) - The block structure + that is to be serialized to the given cache. + + cache (django.core.cache.backends.base.BaseCache) - The + cache into which cacheable data of the block structure + is to be serialized. + """ + data_to_cache = ( + block_structure._block_relations, + block_structure._transformer_data, + block_structure._block_data_map + ) + zp_data_to_cache = zpickle(data_to_cache) + cache.set( + cls._encode_root_cache_key(block_structure.root_block_usage_key), + zp_data_to_cache + ) + logger.debug( + "Wrote BlockStructure %s to cache, size: %s", + block_structure.root_block_usage_key, + len(zp_data_to_cache), + ) + + @classmethod + def create_from_cache(cls, root_block_usage_key, cache, transformers): + """ + Deserializes and returns the block structure starting at + root_block_usage_key from the given cache, if it's found in the cache. + + The given root_block_usage_key must equate the root_block_usage_key + previously passed to serialize_to_cache. + + Arguments: + root_block_usage_key (UsageKey) - The usage_key for the root + of the block structure that is to be deserialized from + the given cache. + + cache (django.core.cache.backends.base.BaseCache) - The + cache from which the block structure is to be + deserialized. + + transformers ([BlockStructureTransformer]) - A list of + transformers for which the block structure will be + transformed. + + Returns: + BlockStructure - The deserialized block structure starting + at root_block_usage_key, if found in the cache. + + NoneType - If the root_block_usage_key is not found in the cache + or if the cached data is outdated for one or more of the + given transformers. + """ + + # Find root_block_usage_key in the cache. + zp_data_from_cache = cache.get(cls._encode_root_cache_key(root_block_usage_key)) + if not zp_data_from_cache: + logger.debug( + "BlockStructure %r not found in the cache.", + root_block_usage_key, + ) + return None + else: + logger.debug( + "Read BlockStructure %r from cache, size: %s", + root_block_usage_key, + len(zp_data_from_cache), + ) + + # Deserialize and construct the block structure. + block_relations, transformer_data, block_data_map = zunpickle(zp_data_from_cache) + block_structure = BlockStructureBlockData(root_block_usage_key) + block_structure._block_relations = block_relations + block_structure._transformer_data = transformer_data + block_structure._block_data_map = block_data_map + + # Verify that the cached data for all the given transformers are + # for their latest versions. + outdated_transformers = {} + for transformer in transformers: + cached_transformer_version = block_structure._get_transformer_data_version(transformer) + if transformer.VERSION != cached_transformer_version: + outdated_transformers[transformer.name()] = "version: {}, cached: {}".format( + transformer.VERSION, + cached_transformer_version, + ) + if outdated_transformers: + logger.info( + "Collected data for the following transformers are outdated:\n%s.", + '\n'.join([t_name + ": " + t_value for t_name, t_value in outdated_transformers.iteritems()]), + ) + return None + + return block_structure + + @classmethod + def remove_from_cache(cls, root_block_usage_key, cache): + """ + Removes the block structure for the given root_block_usage_key + from the given cache. + + Arguments: + root_block_usage_key (UsageKey) - The usage_key for the root + of the block structure that is to be removed from + the given cache. + + cache (django.core.cache.backends.base.BaseCache) - The + cache from which the block structure is to be + removed. + """ + cache.delete(cls._encode_root_cache_key(root_block_usage_key)) + # TODO also remove all block data? + + @classmethod + def _encode_root_cache_key(cls, root_block_usage_key): + """ + Returns the cache key to use for storing the block structure + for the given root_block_usage_key. + """ + return "root.key." + unicode(root_block_usage_key) diff --git a/openedx/core/lib/block_cache/exceptions.py b/openedx/core/lib/block_cache/exceptions.py new file mode 100644 index 0000000000..898150fc5a --- /dev/null +++ b/openedx/core/lib/block_cache/exceptions.py @@ -0,0 +1,10 @@ +""" +Application-specific exceptions raised by the block cache framework. +""" + + +class TransformerException(Exception): + """ + Exception class for Transformer related errors. + """ + pass diff --git a/openedx/core/lib/block_cache/tests/__init__.py b/openedx/core/lib/block_cache/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/openedx/core/lib/block_cache/tests/test_block_cache.py b/openedx/core/lib/block_cache/tests/test_block_cache.py new file mode 100644 index 0000000000..1c94e049a8 --- /dev/null +++ b/openedx/core/lib/block_cache/tests/test_block_cache.py @@ -0,0 +1,113 @@ +""" +Tests for block_cache.py +""" + +from django.core.cache import get_cache +from mock import patch +from unittest import TestCase + +from ..block_cache import get_blocks +from ..exceptions import TransformerException +from .test_utils import ( + MockModulestoreFactory, MockCache, MockTransformer, ChildrenMapTestMixin +) + + +@patch('openedx.core.lib.block_cache.transformer_registry.TransformerRegistry.get_available_plugins') +class TestBlockCache(TestCase, ChildrenMapTestMixin): + """ + Test class for block cache functionality. + """ + + class TestTransformer1(MockTransformer): + """ + Test Transformer class. + """ + @classmethod + def block_key(cls): + """ + Returns the dictionary key for transformer block data. + """ + return 't1.key1' + + @classmethod + def block_val(cls, block_key): + """ + Returns the dictionary value for transformer block data for + the block identified by the given block key. + """ + return 't1.val1.' + unicode(block_key) + + @classmethod + def collect(cls, block_structure): + """ + Sets transformer block data for each block in the structure + as it is visited using topological traversal. + """ + for block_key in block_structure.topological_traversal(): + block_structure.set_transformer_block_field( + block_key, cls, cls.block_key(), cls.block_val(block_key) + ) + + def transform(self, usage_info, block_structure): + """ + Verifies the transformer block data set for each block + in the structure. + """ + def assert_collected_value(block_key): + """ + Verifies the transformer block data for the given + block equates the value stored in the collect method. + """ + assert ( + block_structure.get_transformer_block_field( + block_key, + self, + self.block_key() + ) == self.block_val(block_key) + ) + + for block_key in block_structure.topological_traversal(): + assert_collected_value(block_key) + + def setUp(self): + super(TestBlockCache, self).setUp() + self.children_map = self.SIMPLE_CHILDREN_MAP + self.usage_info = None + self.mock_cache = MockCache() + self.modulestore = MockModulestoreFactory.create(self.children_map) + self.transformers = [self.TestTransformer1()] + + def test_get_blocks(self, mock_available_transforms): + mock_available_transforms.return_value = {transformer.name(): transformer for transformer in self.transformers} + block_structure = get_blocks( + self.mock_cache, self.modulestore, self.usage_info, root_block_usage_key=0, transformers=self.transformers + ) + self.assert_block_structure(block_structure, self.children_map) + + def test_unregistered_transformers(self, mock_available_transforms): + mock_available_transforms.return_value = {} + with self.assertRaisesRegexp(TransformerException, "requested transformers are not registered"): + get_blocks( + self.mock_cache, + self.modulestore, + self.usage_info, + root_block_usage_key=0, + transformers=self.transformers, + ) + + def test_block_caching(self, mock_available_transforms): + mock_available_transforms.return_value = {transformer.name(): transformer for transformer in self.transformers} + + cache = get_cache('block_cache') + + for iteration in range(2): + self.modulestore.get_items_call_count = 0 + block_structure = get_blocks( + cache, self.modulestore, self.usage_info, root_block_usage_key=0, transformers=self.transformers + ) + self.assert_block_structure(block_structure, self.children_map) + if iteration == 0: + self.assertGreater(self.modulestore.get_items_call_count, 0) + else: + self.assertEquals(self.modulestore.get_items_call_count, 0) diff --git a/openedx/core/lib/block_cache/tests/test_block_structure.py b/openedx/core/lib/block_cache/tests/test_block_structure.py new file mode 100644 index 0000000000..994c758d87 --- /dev/null +++ b/openedx/core/lib/block_cache/tests/test_block_structure.py @@ -0,0 +1,218 @@ +""" +Tests for block_structure.py +""" +# pylint: disable=protected-access +from collections import namedtuple +from copy import deepcopy +import ddt +import itertools +from unittest import TestCase + +from openedx.core.lib.graph_traversals import traverse_post_order + +from ..block_structure import BlockStructure, BlockStructureModulestoreData, BlockStructureBlockData +from ..exceptions import TransformerException +from .test_utils import MockXBlock, MockTransformer, ChildrenMapTestMixin + + +@ddt.ddt +class TestBlockStructure(TestCase, ChildrenMapTestMixin): + """ + Tests for BlockStructure + """ + @ddt.data( + [], + ChildrenMapTestMixin.SIMPLE_CHILDREN_MAP, + ChildrenMapTestMixin.LINEAR_CHILDREN_MAP, + ChildrenMapTestMixin.DAG_CHILDREN_MAP, + ) + def test_relations(self, children_map): + block_structure = self.create_block_structure(BlockStructure, children_map) + + # get_children + for parent, children in enumerate(children_map): + self.assertSetEqual(set(block_structure.get_children(parent)), set(children)) + + # get_parents + for child, parents in enumerate(self.get_parents_map(children_map)): + self.assertSetEqual(set(block_structure.get_parents(child)), set(parents)) + + # has_block + for node in range(len(children_map)): + self.assertTrue(block_structure.has_block(node)) + self.assertFalse(block_structure.has_block(len(children_map) + 1)) + + +@ddt.ddt +class TestBlockStructureData(TestCase, ChildrenMapTestMixin): + """ + Tests for BlockStructureBlockData and BlockStructureModulestoreData + """ + def test_non_versioned_transformer(self): + class TestNonVersionedTransformer(MockTransformer): + """ + Test transformer with default version number (0). + """ + VERSION = 0 + + block_structure = BlockStructureModulestoreData(root_block_usage_key=0) + + with self.assertRaisesRegexp(TransformerException, "VERSION attribute is not set"): + block_structure._add_transformer(TestNonVersionedTransformer()) + + def test_transformer_data(self): + # transformer test cases + TransformerInfo = namedtuple("TransformerInfo", "transformer structure_wide_data block_specific_data") # pylint: disable=invalid-name + transformers_info = [ + TransformerInfo( + transformer=MockTransformer(), + structure_wide_data=[("t1.global1", "t1.g.val1"), ("t1.global2", "t1.g.val2")], + block_specific_data={ + "B1": [("t1.key1", "t1.b1.val1"), ("t1.key2", "t1.b1.val2")], + "B2": [("t1.key1", "t1.b2.val1"), ("t1.key2", "t1.b2.val2")], + "B3": [("t1.key1", True), ("t1.key2", False)], + "B4": [("t1.key1", None), ("t1.key2", False)], + }, + ), + TransformerInfo( + transformer=MockTransformer(), + structure_wide_data=[("t2.global1", "t2.g.val1"), ("t2.global2", "t2.g.val2")], + block_specific_data={ + "B1": [("t2.key1", "t2.b1.val1"), ("t2.key2", "t2.b1.val2")], + "B2": [("t2.key1", "t2.b2.val1"), ("t2.key2", "t2.b2.val2")], + }, + ), + ] + + # create block structure + block_structure = BlockStructureModulestoreData(root_block_usage_key=0) + + # set transformer data + for t_info in transformers_info: + block_structure._add_transformer(t_info.transformer) + for key, val in t_info.structure_wide_data: + block_structure.set_transformer_data(t_info.transformer, key, val) + for block, block_data in t_info.block_specific_data.iteritems(): + for key, val in block_data: + block_structure.set_transformer_block_field(block, t_info.transformer, key, val) + + # verify transformer data + for t_info in transformers_info: + self.assertEquals( + block_structure._get_transformer_data_version(t_info.transformer), + MockTransformer.VERSION + ) + for key, val in t_info.structure_wide_data: + self.assertEquals( + block_structure.get_transformer_data(t_info.transformer, key), + val, + ) + for block, block_data in t_info.block_specific_data.iteritems(): + for key, val in block_data: + self.assertEquals( + block_structure.get_transformer_block_field(block, t_info.transformer, key), + val, + ) + + def test_xblock_data(self): + # block test cases + blocks = [ + MockXBlock("A", {}), + MockXBlock("B", {"field1": "B.val1"}), + MockXBlock("C", {"field1": "C.val1", "field2": "C.val2"}), + MockXBlock("D", {"field1": True, "field2": False}), + MockXBlock("E", {"field1": None, "field2": False}), + ] + + # add each block + block_structure = BlockStructureModulestoreData(root_block_usage_key=0) + for block in blocks: + block_structure._add_xblock(block.location, block) + + # request fields + fields = ["field1", "field2", "field3"] + block_structure.request_xblock_fields(*fields) + + # verify fields have not been collected yet + for block in blocks: + for field in fields: + self.assertIsNone(block_structure.get_xblock_field(block.location, field)) + + # collect fields + block_structure._collect_requested_xblock_fields() + + # verify values of collected fields + for block in blocks: + for field in fields: + self.assertEquals( + block_structure.get_xblock_field(block.location, field), + block.field_map.get(field), + ) + + @ddt.data( + *itertools.product( + [True, False], + range(7), + [ + ChildrenMapTestMixin.SIMPLE_CHILDREN_MAP, + ChildrenMapTestMixin.LINEAR_CHILDREN_MAP, + ChildrenMapTestMixin.DAG_CHILDREN_MAP, + ], + ) + ) + @ddt.unpack + def test_remove_block(self, keep_descendants, block_to_remove, children_map): + ### skip test if invalid + if (block_to_remove >= len(children_map)) or (keep_descendants and block_to_remove == 0): + return + + ### create structure + block_structure = self.create_block_structure(BlockStructureBlockData, children_map) + parents_map = self.get_parents_map(children_map) + + ### verify blocks pre-exist + self.assert_block_structure(block_structure, children_map) + + ### remove block + block_structure.remove_block(block_to_remove, keep_descendants) + missing_blocks = [block_to_remove] + + ### compute and verify updated children_map + removed_children_map = deepcopy(children_map) + removed_children_map[block_to_remove] = [] + for parent in parents_map[block_to_remove]: + removed_children_map[parent].remove(block_to_remove) + + if keep_descendants: + # update the graph connecting the old parents to the old children + for child in children_map[block_to_remove]: + for parent in parents_map[block_to_remove]: + removed_children_map[parent].append(child) + + self.assert_block_structure(block_structure, removed_children_map, missing_blocks) + + ### prune the structure + block_structure._prune_unreachable() + + ### compute and verify updated children_map + pruned_children_map = deepcopy(removed_children_map) + + if not keep_descendants: + pruned_parents_map = self.get_parents_map(pruned_children_map) + # update all descendants + for child in children_map[block_to_remove]: + # if the child has another parent, continue + if pruned_parents_map[child]: + continue + for block in traverse_post_order(child, get_children=lambda block: pruned_children_map[block]): + # add descendant to missing blocks and empty its + # children + missing_blocks.append(block) + pruned_children_map[block] = [] + + self.assert_block_structure(block_structure, pruned_children_map, missing_blocks) + + def test_remove_block_if(self): + block_structure = self.create_block_structure(BlockStructureBlockData, ChildrenMapTestMixin.LINEAR_CHILDREN_MAP) + block_structure.remove_block_if(lambda block: block == 2) + self.assert_block_structure(block_structure, [[1], [], [], []], missing_blocks=[2]) diff --git a/openedx/core/lib/block_cache/tests/test_block_structure_factory.py b/openedx/core/lib/block_cache/tests/test_block_structure_factory.py new file mode 100644 index 0000000000..e7649bb93a --- /dev/null +++ b/openedx/core/lib/block_cache/tests/test_block_structure_factory.py @@ -0,0 +1,114 @@ +""" +Tests for block_structure_factory.py +""" +# pylint: disable=protected-access +from mock import patch +from unittest import TestCase + +from ..block_structure_factory import BlockStructureFactory +from .test_utils import ( + MockCache, MockModulestoreFactory, MockTransformer, ChildrenMapTestMixin +) + + +class TestBlockStructureFactory(TestCase, ChildrenMapTestMixin): + """ + Tests for BlockStructureFactory + """ + def setUp(self): + super(TestBlockStructureFactory, self).setUp() + self.children_map = self.SIMPLE_CHILDREN_MAP + self.modulestore = MockModulestoreFactory.create(self.children_map) + + self.block_structure = BlockStructureFactory.create_from_modulestore( + root_block_usage_key=0, modulestore=self.modulestore + ) + + self.transformers = [MockTransformer] + mock_registry = patch( + 'openedx.core.lib.block_cache.transformer_registry.TransformerRegistry.get_available_plugins' + ) + mock_registry.return_value = {transformer.name(): transformer for transformer in self.transformers} + self.addCleanup(mock_registry.stop) + mock_registry.start() + + def add_transformers(self): + """ + Add each registered transformer to the block structure. + Mimic collection by setting test transformer block data. + """ + for transformer in self.transformers: + self.block_structure._add_transformer(transformer) + self.block_structure.set_transformer_block_field( + usage_key=0, transformer=transformer, key='test', value='{} val'.format(transformer.name()) + ) + + def test_create_from_modulestore(self): + self.assert_block_structure(self.block_structure, self.children_map) + + def test_not_in_cache(self): + cache = MockCache() + + self.assertIsNone( + BlockStructureFactory.create_from_cache( + root_block_usage_key=0, + cache=cache, + transformers=self.transformers, + ) + ) + + def test_uncollected_transformers(self): + cache = MockCache() + + # serialize the structure to cache, but without collecting any transformer data + BlockStructureFactory.serialize_to_cache(self.block_structure, cache) + + with patch('openedx.core.lib.block_cache.block_structure_factory.logger.info') as mock_logger: + # cached data does not have collected information for all registered transformers + self.assertIsNone( + BlockStructureFactory.create_from_cache( + root_block_usage_key=0, + cache=cache, + transformers=self.transformers, + ) + ) + self.assertTrue(mock_logger.called) + + def test_cache(self): + cache = MockCache() + + # collect transformer data + self.add_transformers() + + # serialize to cache + BlockStructureFactory.serialize_to_cache(self.block_structure, cache) + + # test re-create from cache + self.modulestore.get_items_call_count = 0 + from_cache_block_structure = BlockStructureFactory.create_from_cache( + root_block_usage_key=0, + cache=cache, + transformers=self.transformers, + ) + self.assertIsNotNone(from_cache_block_structure) + self.assert_block_structure(from_cache_block_structure, self.children_map) + self.assertEquals(self.modulestore.get_items_call_count, 0) + + def test_remove_from_cache(self): + cache = MockCache() + + # collect transformer data + self.add_transformers() + + # serialize to cache + BlockStructureFactory.serialize_to_cache(self.block_structure, cache) + + # remove from cache + BlockStructureFactory.remove_from_cache(root_block_usage_key=0, cache=cache) + self.assertIsNone( + BlockStructureFactory.create_from_cache( + root_block_usage_key=0, + cache=cache, + transformers=self.transformers + ) + ) diff --git a/openedx/core/lib/block_cache/tests/test_transformer_registry.py b/openedx/core/lib/block_cache/tests/test_transformer_registry.py new file mode 100644 index 0000000000..fc7beeb3dc --- /dev/null +++ b/openedx/core/lib/block_cache/tests/test_transformer_registry.py @@ -0,0 +1,68 @@ +""" +Tests for transformer_registry.py +""" + +import ddt +from mock import patch +from unittest import TestCase + +from ..transformer_registry import TransformerRegistry +from .test_utils import MockTransformer + + +class TestTransformer1(MockTransformer): + """ + 1st test instance of the MockTransformer that is registered. + """ + pass + + +class TestTransformer2(MockTransformer): + """ + 2nd test instance of the MockTransformer that is registered. + """ + pass + + +class UnregisteredTestTransformer3(MockTransformer): + """ + 3rd test instance of the MockTransformer that is not registered. + """ + pass + + +@ddt.ddt +class TransformerRegistryTestCase(TestCase): + """ + Test cases for TransformerRegistry. + """ + @ddt.data( + # None case + ([], []), + + # 1 registered + ([TestTransformer1()], []), + + # 2 registered + ([TestTransformer1(), TestTransformer2()], []), + + # 1 unregistered + ([UnregisteredTestTransformer3()], [UnregisteredTestTransformer3.name()]), + + # 1 registered and 1 unregistered + ([TestTransformer1(), UnregisteredTestTransformer3()], [UnregisteredTestTransformer3.name()]), + ) + @ddt.unpack + def test_find_unregistered(self, transformers, expected_unregistered): + + with ( + patch('openedx.core.lib.block_cache.transformer_registry.TransformerRegistry.get_available_plugins') + ) as mock_registry: + mock_registry.return_value = { + transformer.name(): transformer + for transformer in [TestTransformer1, TestTransformer2] + } + + self.assertSetEqual( + TransformerRegistry.find_unregistered(transformers), set(expected_unregistered) + ) diff --git a/openedx/core/lib/block_cache/tests/test_utils.py b/openedx/core/lib/block_cache/tests/test_utils.py new file mode 100644 index 0000000000..114673a0fe --- /dev/null +++ b/openedx/core/lib/block_cache/tests/test_utils.py @@ -0,0 +1,234 @@ +""" +Common utilities for tests in block_cache module +""" +# pylint: disable=protected-access +from ..transformer import BlockStructureTransformer + + +class MockXBlock(object): + """ + A mock XBlock to be used in unit tests, thereby decoupling the + implementation of the block cache framework from the xBlock + implementation. This class provides only the minimum xBlock + capabilities needed by the block cache framework. + """ + def __init__(self, location, field_map=None, children=None, modulestore=None): + self.location = location + self.field_map = field_map or {} + + self.children = children or [] + self.modulestore = modulestore + + def __getattr__(self, attr): + try: + return self.field_map[attr] + except KeyError: + raise AttributeError + + def get_children(self): + """ + Returns the children of the mock XBlock. + """ + return [self.modulestore.get_item(child) for child in self.children] + + +class MockModulestore(object): + """ + A mock Modulestore to be used in unit tests, providing only the + minimum methods needed by the block cache framework. + """ + def __init__(self): + self.get_items_call_count = 0 + self.blocks = None + + def set_blocks(self, blocks): + """ + Updates the mock modulestore with a dictionary of blocks. + + Arguments: + blocks ({block key, MockXBlock}) - A map of block_key + to its mock xBlock. + """ + self.blocks = blocks + + def get_item(self, block_key, depth=None): # pylint: disable=unused-argument + """ + Returns the mock XBlock (MockXBlock) associated with the + given block_key. + """ + self.get_items_call_count += 1 + return self.blocks.get(block_key) + + +class MockCache(object): + """ + A mock Cache object, providing only the minimum features needed + by the block cache framework. + """ + def __init__(self): + # An in-memory map of cache keys to cache values. + self.map = {} + + def set(self, key, val): + """ + Associates the given key with the given value in the cache. + """ + self.map[key] = val + + def get(self, key, default=None): + """ + Returns the value associated with the given key in the cache; + returns default if not found. + """ + return self.map.get(key, default) + + def set_many(self, map_): + """ + For each dictionary entry in the given map, updates the cache + with that entry. + """ + for key, val in map_.iteritems(): + self.set(key, val) + + def get_many(self, keys): + """ + Returns a dictionary of entries for each key found in the cache. + """ + return {key: self.map[key] for key in keys if key in self.map} + + def delete(self, key): + """ + Deletes the given key from the cache. + """ + del self.map[key] + + +class MockModulestoreFactory(object): + """ + A factory for creating MockModulestore objects. + """ + @classmethod + def create(cls, children_map): + """ + Creates and returns a MockModulestore from the given + children_map. + + Arguments: + children_map ({block_key: [block_key]}) - A dictionary + mapping a block key to a list of block keys of the + block's corresponding children. + """ + modulestore = MockModulestore() + modulestore.set_blocks({ + block_key: MockXBlock(block_key, children=children, modulestore=modulestore) + for block_key, children in enumerate(children_map) + }) + return modulestore + + +class MockTransformer(BlockStructureTransformer): + """ + A mock BlockStructureTransformer class. + """ + VERSION = 1 + + @classmethod + def name(cls): + # Use the class' name for Mock transformers. + return cls.__name__ + + def transform(self, usage_info, block_structure): + pass + + +class ChildrenMapTestMixin(object): + """ + A Test Mixin with utility methods for testing with block structures + created and manipulated using children_map and parents_map. + """ + + # 0 + # / \ + # 1 2 + # / \ + # 3 4 + SIMPLE_CHILDREN_MAP = [[1, 2], [3, 4], [], [], []] + + # 0 + # / + # 1 + # / + # 2 + # / + # 3 + LINEAR_CHILDREN_MAP = [[1], [2], [3], []] + + # 0 + # / \ + # 1 2 + # \ / \ + # 3 4 + # / \ + # 5 6 + DAG_CHILDREN_MAP = [[1, 2], [3], [3, 4], [5, 6], [], [], []] + + def create_block_structure(self, block_structure_cls, children_map): + """ + Factory method for creating and returning a block structure + for the given children_map. + """ + # create empty block structure + block_structure = block_structure_cls(root_block_usage_key=0) + + # _add_relation + for parent, children in enumerate(children_map): + for child in children: + block_structure._add_relation(parent, child) + return block_structure + + def get_parents_map(self, children_map): + """ + Converts and returns the given children_map to a parents_map. + """ + parent_map = [[] for _ in children_map] + for parent, children in enumerate(children_map): + for child in children: + parent_map[child].append(parent) + return parent_map + + def assert_block_structure(self, block_structure, children_map, missing_blocks=None): + """ + Verifies that the relations in the given block structure + equate the relations described in the children_map. Use the + missing_blocks parameter to pass in any blocks that were removed + from the block structure but still have a positional entry in + the children_map. + """ + if not missing_blocks: + missing_blocks = [] + + for block_key, children in enumerate(children_map): + # Verify presence + self.assertEquals( + block_structure.has_block(block_key), + block_key not in missing_blocks, + 'Expected presence in block_structure for block_key {} to match absence in missing_blocks.'.format( + unicode(block_key) + ), + ) + + # Verify children + if block_key not in missing_blocks: + self.assertEquals( + set(block_structure.get_children(block_key)), + set(children), + ) + + # Verify parents + parents_map = self.get_parents_map(children_map) + for block_key, parents in enumerate(parents_map): + if block_key not in missing_blocks: + self.assertEquals( + set(block_structure.get_parents(block_key)), + set(parents), + ) diff --git a/openedx/core/lib/block_cache/transformer.py b/openedx/core/lib/block_cache/transformer.py new file mode 100644 index 0000000000..1e175be17a --- /dev/null +++ b/openedx/core/lib/block_cache/transformer.py @@ -0,0 +1,128 @@ +""" +This module provides the abstract base class for all Block Structure +Transformers. +""" +from abc import abstractmethod + + +class BlockStructureTransformer(object): + """ + Abstract base class for all block structure transformers. + """ + + # All Transformers are expected to maintain a VERSION class + # attribute. While the value for the base class is set to 0, + # the value for each concrete transformer should be 1 or higher. + # + # A transformer's version attribute is used by the block_cache + # framework in order to determine whether any collected data for a + # transformer is outdated. When a transformer's data is collected + # and cached, it's version number at the time of collection is + # stored along with the data. That version number is then checked + # at the time of accessing the collected data (during the transform + # phase). + # + # The version number of a Transformer should be incremented each + # time the implementation of its collect method is updated such that + # its collected data is changed. + # + VERSION = 0 + + @classmethod + def name(cls): + """ + Unique identifier for the transformer's class. It is used to + identify the transformer's cached data. So it should be unique + and not conflict with other transformers. Consider using the + same name that is used in the Transformer Registry. For example, + for Stevedore, it is specified in the setup.py file. + + Once the transformer is in use and its data is cached, do not + modify this name value without consideration of backward + compatibility with previously collected data. + """ + raise NotImplementedError + + @classmethod + def collect(cls, block_structure): + """ + Collects and stores any xBlock and modulestore data into the + block_structure that's necessary for later execution of the + transformer's transform method. Transformers should store such + data in the block_structure using the following methods: + set_transformer_data + set_transformer_block_field + request_xblock_fields + + Transformers can call block_structure.request_xblock_fields for + any common xBlock fields that should be collected by the + framework. + + Any full block tree traversals should be implemented in this + collect phase, leaving the transform phase for fast and direct + access to a sub-block. If a block's transform output is + dependent on its ancestors' data, the ancestor's data should be + percolated down to the descendants. So when a (non-root) block + is directly accessed in the transform, all of its relevant data + is readily available (without needing to access its ancestors). + + Traversals of the block_structure can be implemented using the + following methods: + topological_traversal + post_order_traversal + + Arguments: + block_structure (BlockStructureModulestoreData) - A mutable + block structure that is to be modified with collected + data to be cached for the transformer. + """ + pass + + @abstractmethod + def transform(self, usage_info, block_structure): + """ + Transforms the given block_structure for the given usage_info, + assuming the block_structure contains cached data from a prior + call to the collect method of the latest version of the + Transformer. + + No access to the modulestore nor instantiation of xBlocks should + be performed during the execution of this method. However, + accesses to user-specific data (outside of the modulestore and + not via xBlocks) is permitted in order to apply the transform + for the given usage_info. + + Note: The root of the given block_structure is not necessarily + the same as the root of the block_structure passed to the prior + collect method. The collect method is given the top-most root + of the structure, while the transform method may be called upon + any sub-structure or even a single block within the originally + collected structure. + + A Transformer may choose to remove entire sub-structures during + the transform method and may do so using the remove_block and + remove_block_if methods. + + Amongst the many methods available for a block_structure, the + following methods are commonly used during transforms: + get_xblock_field + get_transformer_data + get_transformer_block_field + remove_block + remove_block_if + topological_traversal + post_order_traversal + + Arguments: + usage_info (any negotiated type) - A usage-specific object + that is passed to the block_cache and forwarded to all + requested Transformers in order to apply a + usage-specific transform. For example, an instance of + usage_info would contain a user object for which the + transform should be applied. + + block_structure (BlockStructureBlockData) - A mutable + block structure, with already collected data for the + transformer, that is to be transformed in place. + """ + pass diff --git a/openedx/core/lib/block_cache/transformer_registry.py b/openedx/core/lib/block_cache/transformer_registry.py new file mode 100644 index 0000000000..a24ba72214 --- /dev/null +++ b/openedx/core/lib/block_cache/transformer_registry.py @@ -0,0 +1,46 @@ +""" +Block Structure Transformer Registry implemented using the platform's +PluginManager. +""" +from openedx.core.lib.api.plugins import PluginManager + + +class TransformerRegistry(PluginManager): + """ + Registry for all of the block structure transformers that have been + made available. + + All block structure transformers should implement + `BlockStructureTransformer`. + """ + NAMESPACE = 'openedx.block_structure_transformer' + + @classmethod + def get_registered_transformers(cls): + """ + Returns a set of all registered transformers. + + Returns: + {BlockStructureTransformer} - All transformers that are + registered with the platform's PluginManager. + """ + return set(cls.get_available_plugins().itervalues()) + + @classmethod + def find_unregistered(cls, transformers): + """ + Find and returns the names of all the transformers from the + given list that aren't registered with the platform's + PluginManager. + + Arguments: + transformers ([BlockStructureTransformer] - List of + transformers to check in the registry. + + Returns: + [string] - The names of a subset of the given + transformers that weren't found in the registry. + """ + registered_transformer_names = set(reg_trans.name() for reg_trans in cls.get_registered_transformers()) + requested_transformer_names = set(transformer.name() for transformer in transformers) + return requested_transformer_names - registered_transformer_names diff --git a/openedx/core/lib/cache_utils.py b/openedx/core/lib/cache_utils.py index 85fc56d638..a1b7388507 100644 --- a/openedx/core/lib/cache_utils.py +++ b/openedx/core/lib/cache_utils.py @@ -1,8 +1,9 @@ """ Utilities related to caching. """ - +import cPickle as pickle import functools +import zlib from xblock.core import XBlock @@ -47,3 +48,13 @@ def hashvalue(arg): return unicode(arg.location) else: return unicode(arg) + + +def zpickle(data): + """Given any data structure, returns a zlib compressed pickled serialization.""" + return zlib.compress(pickle.dumps(data, pickle.HIGHEST_PROTOCOL)) + + +def zunpickle(zdata): + """Given a zlib compressed pickled serialization, returns the deserialized data.""" + return pickle.loads(zlib.decompress(zdata))