Files
edx-platform/scripts/structures_pruning/tests/test_splitmongo.py
Muhammad Farhan Khan 7808913916 chore: Moved structures.py from tubular repository (#34328)
* chore: Moved structures.py from tubular repository
2024-03-12 09:46:34 -04:00

503 lines
19 KiB
Python

"""
Test Structure pruning related Split Mongo code.
IMPORTANT: If you are making changes to this code, please re-enable the
TestSplitMongoBackend tests and run them locally against the MongoDB instance
in your Docker Devstack. See the TestSplitMongoBackend docstring for more info.
"""
import itertools
import sys
import textwrap
import unittest
from datetime import datetime
from io import StringIO
from os import path
from unittest.mock import patch
import ddt
from bson.objectid import ObjectId
from opaque_keys.edx.locator import CourseLocator, LibraryLocator
from pymongo import MongoClient
# Add top-level project path to sys.path before importing scripts code
sys.path.append(path.abspath(path.join(path.dirname(__file__), '../..')))
from scripts.structures_pruning.utils.splitmongo import (
ActiveVersionBranch, ChangePlan, Structure, SplitMongoBackend, StructuresGraph
)
def create_test_graph(*version_histories):
"""
Given any number of lists, where each list represents a history of Structure
IDs from oldest to newest, return a StructureGraph matching that
specification. Course names, branch names, and other attributes that exist
for debugging/reporting but do not change pruning behavior will be
automatically generated with plausible values.
"""
all_structures = {}
all_active_version_branches = []
active_id_pool = ("A{:023x}".format(i) for i in itertools.count(1))
course_key_pool = (
CourseLocator('edx', 'splitmongo', str(i)) for i in itertools.count(1)
)
branch_pool = itertools.cycle(['draft-branch', 'published-branch'])
for version_history in version_histories:
assert version_history # The history can't be empty
structure_ids = [str(version) for version in version_history]
# Create the Original
original_id = structure_ids[0]
history = [Structure(original_id, original_id, None)]
# Create all other Structures (if any)
for previous_id, current_id in zip(structure_ids, structure_ids[1:]):
history.append(Structure(current_id, original_id, previous_id))
# Add to our overall Structures dict (overwrites should be identical or
# our test data is bad).
for structure in history:
if structure.id in all_structures:
assert structure == all_structures[structure.id]
else:
all_structures[structure.id] = structure
active_version_id = structure_ids[-1]
all_active_version_branches.append(
ActiveVersionBranch(
id=next(active_id_pool),
branch=next(branch_pool),
structure_id=active_version_id,
key=next(course_key_pool),
edited_on=datetime(2012, 5, 2)
)
)
return StructuresGraph(all_active_version_branches, all_structures)
@ddt.ddt
class TestCourseChangePlan(unittest.TestCase):
"""
ChangePlans for single and multiple courses.
"""
def test_simple(self):
"""Simple happy path ChangePlans."""
graph = create_test_graph(["1", "2", "3", "4"])
# Preserve no intermediate structures -- prune the middle structures.
plan_no_intermediate = ChangePlan.create(graph, 0, False, False)
self.assertEqual(plan_no_intermediate.delete, ["2", "3"])
self.assertEqual(plan_no_intermediate.update_parents, [("4", "1")])
# Preserve one intermediate structure
plan_1_intermediate = ChangePlan.create(graph, 1, False, False)
self.assertEqual(plan_1_intermediate.delete, ["2"])
self.assertEqual(plan_1_intermediate.update_parents, [("3", "1")])
# Preserve two intermediate structures -- Do nothing
plan_2_intermediate = ChangePlan.create(graph, 2, False, False)
self.assertEqual(plan_2_intermediate.delete, [])
self.assertEqual(plan_2_intermediate.update_parents, [])
@ddt.data(
create_test_graph(["1"]), # Original (is also Active)
create_test_graph(["1", "2"]), # "1" = Original, "2" = Active
)
def test_no_changes(self, graph):
"""These scenarios should result in no Changes."""
plan_1 = ChangePlan.create(graph, 0, False, False)
plan_2 = ChangePlan.create(graph, 2, False, False)
self.assertEqual(plan_1, plan_2)
self.assertEqual(plan_1.delete, [])
self.assertEqual(plan_1.update_parents, [])
def test_overlapping_shared_history(self):
"""Test multiple branches that overlap in what history to preserve."""
graph = create_test_graph(
["1", "2", "3"],
["1", "2", "3", "4", "5"],
["1", "2", "3", "6"],
["1", "2", "7", "8", "9", "10"],
)
plan = ChangePlan.create(graph, 1, False, False)
# We specified only one intermediate structure in each branch should be
# preserved. So why do we only delete "7" and "8" here?
# "1" is the original structure, and will always be preserved.
# "2" is the intermediate structure preserved by the first branch. It
# won't be deleted, even if other branches might want to flag it for
# deletion.
# "3" would be deleted by the second branch, but it's Active in the
# first, and so is preserved. Active Structures are never deleted.
# "4" is preserved by the second branch.
# "5" is the Active Structure for the second branch.
# "6" is the Active Structure for the third branch.
# "7" is marked for deletion by the fourth branch.
# "8" is marked for deletion by the fourth branch.
# "9" is preserved by the fourth branch.
# "10" is the Active Structure for the fourth branch.
self.assertEqual(plan.delete, ["7", "8"])
self.assertEqual(plan.update_parents, [("9", "1")])
def test_non_overlapping_shared_history(self):
"""Test shared history, preserved intermediate set doesn't overlap."""
graph = create_test_graph(
["1", "2", "3"],
["1", "2", "3", "4", "5", "6"],
)
plan = ChangePlan.create(graph, 0, False, False)
self.assertEqual(plan.delete, ["2", "4", "5"])
self.assertEqual(plan.update_parents, [("3", "1"), ("6", "1")])
graph_save_1 = create_test_graph(
["1", "2", "3", "4"],
["1", "2", "3", "4", "5", "6", "7"],
)
plan_save_1 = ChangePlan.create(graph_save_1, 1, False, False)
self.assertEqual(plan_save_1.delete, ["2", "5"])
self.assertEqual(plan_save_1.update_parents, [("3", "1"), ("6", "1")])
def test_details_output(self):
"""Test our details file output."""
graph = create_test_graph(
["1"],
["2", "3"],
["4", "5", "6"]
)
buff = StringIO()
buff.name = "test_file.txt"
plan = ChangePlan.create(graph, 0, False, False, buff)
details_txt = buff.getvalue()
# pylint: disable=line-too-long
expected_output = textwrap.dedent(
"""
== Summary ==
Active Version Branches: 3
Total Structures: 6
Structures to Save: 5
Structures to Delete: 1
Structures to Rewrite Parent Link: 1
== Active Versions ==
Active Version A00000000000000000000001 [2012-05-02 00:00:00] draft-branch for course-v1:edx+splitmongo+1
+ 1 (active) (original)
Active Version A00000000000000000000002 [2012-05-02 00:00:00] published-branch for course-v1:edx+splitmongo+2
+ 3 (active)
+ 2 (original)
Active Version A00000000000000000000003 [2012-05-02 00:00:00] draft-branch for course-v1:edx+splitmongo+3
+ 6 (active) (re-link to original)
- 5
+ 4 (original)
"""
).lstrip()
# pylint: enable=line-too-long
self.assertEqual(expected_output, details_txt)
self.assertEqual(
plan,
ChangePlan(
delete=["5"],
update_parents=[("6", "4")]
)
)
class TestSplitMongoBackendHelpers(unittest.TestCase):
"""
Test the static helper methods of SplitMongoBackend.
Requires no actual database connection.
"""
def test_parse_structure_doc(self):
"""Test basic parsing of Structures."""
original_structure = SplitMongoBackend.parse_structure_doc(
{
'_id': obj_id(1),
'original_version': obj_id(1),
'previous_version': None,
'extra_data': "This is ignored"
}
)
self.assertEqual(
original_structure,
Structure(id=str_id(1), original_id=str_id(1), previous_id=None)
)
self.assertTrue(original_structure.is_original())
other_structure = SplitMongoBackend.parse_structure_doc(
{
'_id': obj_id(2),
'original_version': obj_id(1),
'previous_version': obj_id(1),
'extra_data': "This is ignored"
}
)
self.assertEqual(
other_structure,
Structure(id=str_id(2), original_id=str_id(1), previous_id=str_id(1))
)
self.assertFalse(other_structure.is_original())
def test_batch(self):
"""Test the batch helper that breaks up iterables for DB operations."""
self.assertEqual(
list(SplitMongoBackend.batch([], 1)),
[]
)
self.assertEqual(
list(SplitMongoBackend.batch([1, 2, 3], 1)),
[[1], [2], [3]]
)
self.assertEqual(
list(SplitMongoBackend.batch([1, 2, 3], 2)),
[[1, 2], [3]]
)
self.assertEqual(
list(SplitMongoBackend.batch([1, 2, 3, 4], 2)),
[[1, 2], [3, 4]]
)
def test_iter_from_start(self):
"""Test what we use to resume deletion from a given Structure ID."""
all_ids = [1, 2, 3]
self.assertEqual(
list(SplitMongoBackend.iter_from_start(all_ids, None)),
all_ids
)
self.assertEqual(
list(SplitMongoBackend.iter_from_start(all_ids, 1)),
all_ids
)
self.assertEqual(
list(SplitMongoBackend.iter_from_start(all_ids, 2)),
[2, 3]
)
self.assertEqual(
list(SplitMongoBackend.iter_from_start(all_ids, 3)),
[3]
)
self.assertEqual(
list(SplitMongoBackend.iter_from_start(all_ids, 4)),
[]
)
@unittest.skip("Requires local MongoDB instance (run manually).")
class TestSplitMongoBackend(unittest.TestCase):
"""
Tests the MongoDB-specific portions of the code.
These tests should be about simple read/write from the database. Complex
trees of Structures can be created and tested in TestSingleCourseChangePlan
without invoking the database.
These tests will be disabled by default because I didn't want to add MongoDB
as a test-time dependency for tubular, and the only decent looking MongoDB
mocking library I could find was no longer being maintained. Given how
isolated Split Mongo related code is in tubular (nothing else touches it),
the main danger of breakage comes from file format changes in edx-platform,
which automated testing at this level wouldn't catch anyway.
So basically, if you want to work on this code, please run these tests
locally by spinning up the MongoDB server used for Docker Devstack and
commenting out the unittest.skip decorator above.
"""
CONNECT_STR = "mongodb://localhost:27017"
DATABASE_NAME = "splitmongo_test"
def setUp(self):
"""Clear our test MongoDB instance of data."""
super().setUp()
self.client = MongoClient(self.CONNECT_STR)
database = self.client[self.DATABASE_NAME]
# Remove anything that might have been there from a previous test.
database.drop_collection('modulestore.active_versions')
database.drop_collection('modulestore.structures')
# Convenince pointers to our collections.
self.active_versions = database['modulestore.active_versions']
self.structures = database['modulestore.structures']
# The backend we should use in our tests for querying.
self.backend = SplitMongoBackend(self.CONNECT_STR, self.DATABASE_NAME)
self.seed_data()
def seed_data(self):
"""Create a Course and Library."""
structure_docs = [
# Branch 1
dict(_id=obj_id(1), original_version=obj_id(1), previous_version=None),
dict(_id=obj_id(2), original_version=obj_id(1), previous_version=obj_id(1)),
dict(_id=obj_id(3), original_version=obj_id(1), previous_version=obj_id(2)),
dict(_id=obj_id(4), original_version=obj_id(1), previous_version=obj_id(3)),
# Branch 2
dict(_id=obj_id(10), original_version=obj_id(10), previous_version=None),
dict(_id=obj_id(11), original_version=obj_id(10), previous_version=obj_id(10)),
# Branch 3
dict(_id=obj_id(20), original_version=obj_id(20), previous_version=None),
]
active_versions_docs = [
{
'_id': obj_id(100),
'edited_on': datetime(2012, 5, 2),
'org': 'edx',
'course': 'split_course',
'run': '2017',
'versions': {
'draft-branch': obj_id(4),
'published-branch': obj_id(11)
}
},
{
'_id': obj_id(101),
'edited_on': datetime(2012, 5, 3),
'org': 'edx',
'course': 'split_library',
'run': 'library',
'versions': {
'library': obj_id(20),
}
}
]
self.structures.insert_many(structure_docs)
self.active_versions.insert_many(active_versions_docs)
def test_structures_graph(self):
"""Test pulling a full graph out."""
graph = self.backend.structures_graph(0, 100)
self.assertEqual(
graph.branches,
[
ActiveVersionBranch(
id=str_id(100),
branch='draft-branch',
structure_id=str_id(4),
key=CourseLocator('edx', 'split_course', '2017'),
edited_on=datetime(2012, 5, 2),
),
ActiveVersionBranch(
id=str_id(100),
branch='published-branch',
structure_id=str_id(11),
key=CourseLocator('edx', 'split_course', '2017'),
edited_on=datetime(2012, 5, 2),
),
ActiveVersionBranch(
id=str_id(101),
branch='library',
structure_id=str_id(20),
key=LibraryLocator('edx', 'split_library'),
edited_on=datetime(2012, 5, 3),
),
]
)
self.assertEqual(
list(graph.structures.keys()),
[str_id(i) for i in [1, 2, 3, 4, 10, 11, 20]]
)
def test_update(self):
"""Execute a simple update."""
self.backend.update(
ChangePlan(
delete=[str_id(i) for i in [2, 3]],
update_parents=[(str_id(4), str_id(1))]
),
delay=0
)
graph = self.backend.structures_graph(0, 100)
self.assertEqual(
list(graph.structures.keys()),
[str_id(i) for i in [1, 4, 10, 11, 20]]
)
self.assertEqual(
graph.structures,
{
str_id(1): Structure(id=str_id(1), original_id=str_id(1), previous_id=None),
# This one got its previous_id rewritten from 3 -> 1
str_id(4): Structure(id=str_id(4), original_id=str_id(1), previous_id=str_id(1)),
str_id(10): Structure(id=str_id(10), original_id=str_id(10), previous_id=None),
str_id(11): Structure(id=str_id(11), original_id=str_id(10), previous_id=str_id(10)),
str_id(20): Structure(id=str_id(20), original_id=str_id(20), previous_id=None),
}
)
def test_race_condition(self):
"""Create new Structures are during ChangePlan creation."""
# Get the real method before we patch it...
real_all_structures_fn = SplitMongoBackend._all_structures # pylint: disable=protected-access
def add_structures(backend, delay, batch_size):
"""Do what _all_structures() would do, then add new Structures."""
structures = real_all_structures_fn(backend, delay, batch_size)
# Create new Structures
self.structures.insert_one(
dict(_id=obj_id(5), original_version=obj_id(1), previous_version=obj_id(4)),
)
self.structures.insert_one(
dict(_id=obj_id(6), original_version=obj_id(1), previous_version=obj_id(5)),
)
self.structures.insert_one(
dict(_id=obj_id(7), original_version=obj_id(1), previous_version=obj_id(6)),
)
# Update the Draft branch of course-v1:edx+split_course+2017 to
# point to one of the new Structures
self.active_versions.update_one(
{'_id': obj_id(100)},
{'$set': {'versions.draft-branch': obj_id(5)}}
)
# Create an entirely new ActiveVersion and point it to the newest
# Structure.
self.active_versions.insert_one(
{
'_id': obj_id(102),
'edited_on': datetime(2012, 5, 3),
'org': 'edx',
'course': 'split_library_race',
'run': 'library',
'versions': {
'library': obj_id(7),
}
}
)
return structures
with patch.object(SplitMongoBackend, '_all_structures', autospec=True) as all_structures_mock:
all_structures_mock.side_effect = add_structures
graph = self.backend.structures_graph(0, 100)
self.assertEqual(len(graph.structures), 10)
self.assertEqual(len(graph.branches), 4)
plan = ChangePlan.create(graph, 0, False, False)
self.assertNotIn(str_id(5), plan.delete) # Active updated to this for our course.
self.assertNotIn(str_id(7), plan.delete) # Active for our new Library
self.assertIn(str_id(4), plan.delete) # Was our Active before
self.assertIn(str_id(6), plan.delete) # Intermediate structure to new Library
def str_id(int_id):
"""Return the string version of Object IDs that PyMongo will accept."""
return "{:024}".format(int_id)
def obj_id(int_id):
"""Helper to create Object IDs that PyMongo will accept."""
return ObjectId(str_id(int_id))