From ca1265a679efe56e939e6da9cc15286c9e58fab6 Mon Sep 17 00:00:00 2001 From: Jeremy Bowman Date: Wed, 11 Dec 2019 14:37:07 -0500 Subject: [PATCH] Make XModuleDescriptor hashable again (#22501) --- common/lib/xmodule/xmodule/x_module.py | 11 +++++++++++ openedx/core/lib/graph_traversals.py | 21 +++++++++------------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/common/lib/xmodule/xmodule/x_module.py b/common/lib/xmodule/xmodule/x_module.py index 4ec74f62f5..051e9cd6e6 100644 --- a/common/lib/xmodule/xmodule/x_module.py +++ b/common/lib/xmodule/xmodule/x_module.py @@ -1263,6 +1263,17 @@ class XModuleDescriptor(XModuleDescriptorToXBlockMixin, HTMLSnippet, ResourceTem all(getattr(self, field.name) == getattr(other, field.name) for field in self.fields.values())) + def __hash__(self): # pylint: disable=useless-super-delegation + """ + This isn't technically appropriate since descriptors are actually mutable, + but in practice we rarely modify them after creation or instantiate two + equivalent descriptors in the same process. And we perform graph + operations on large collections of XBlocks that have simply unacceptable + performance if we have to rely on lists and equality rather than sets, + dictionaries, and identity-based hash functions. + """ + return super(XModuleDescriptor, self).__hash__() + def __repr__(self): return ( "{0.__class__.__name__}(" diff --git a/openedx/core/lib/graph_traversals.py b/openedx/core/lib/graph_traversals.py index 3f73802db5..d7810567de 100644 --- a/openedx/core/lib/graph_traversals.py +++ b/openedx/core/lib/graph_traversals.py @@ -127,7 +127,7 @@ def traverse_topologically( used to limit which nodes are actually yielded. Arguments: - start_node (any type) - The starting node for the + start_node (any hashable type) - The starting node for the traversal. get_parents (node->[node]) - Function that returns a list of @@ -204,7 +204,7 @@ def traverse_post_order(start_node, get_children, filter_func=None): stack = deque([_Node(start_node, get_children)]) # Keep track of which nodes have been visited. - visited = [] + visited = set() while stack: # Peek at the current node at the top of the stack. @@ -226,7 +226,7 @@ def traverse_post_order(start_node, get_children, filter_func=None): # Since there are no children left, visit the node and # remove it from the stack. yield current.node - visited.append(current.node) + visited.add(current.node) stack.pop() else: @@ -264,8 +264,7 @@ def _traverse_generic( # Keep track of which nodes have been visited and whether they # were in fact yielded. - visited = [] # nodes - yield_results = [] # booleans + yield_results = {} # dict(node:boolean) # While there are more nodes on the stack... while stack: @@ -281,17 +280,16 @@ def _traverse_generic( parents = get_parents(current_node) # If all of the parents have not yet been visited, continue. - if not all(parent in visited for parent in parents): + if not all(parent in yield_results for parent in parents): continue # If none of the parents have yielded, continue, unless # specified otherwise (via yield_descendants_of_unyielded). - elif not yield_descendants_of_unyielded and not any( - yield_results[visited.index(parent)] for parent in parents): + elif not yield_descendants_of_unyielded and not any(yield_results[parent] for parent in parents): continue # If the current node has already been visited, continue. - if current_node not in visited: + if current_node not in yield_results: # For a topological sort, it's important that we visit # the children even if the parent isn't yielded, in case @@ -318,7 +316,7 @@ def _traverse_generic( unvisited_children = list( child for child in get_children(current_node) - if child not in visited + if child not in yield_results ) # Add the node's unvisited children to the stack in reverse @@ -334,5 +332,4 @@ def _traverse_generic( # Keep track of whether or not the node was yielded so we # know whether or not to yield its children. - visited.append(current_node) - yield_results.append(should_yield_node) + yield_results[current_node] = should_yield_node