feat: mgmt cmd to replace v1 libr refs in courses (#32904)

This PR adds a management command to, given a mapping of V1 content libraries to matching v2 content libraries, replaces references to v1 libs in courses (in library source xblocks) with V2 libraries. It does so by manipulating the mongo document directly. it also offers some improvements to the management command which copies all v1 libraries into v2 libraries.
2023-08-11 09:48:34 -04:00
parent 1b35bf716e
commit 4b38b1f750
4 changed files with 290 additions and 27 deletions
--- a/cms/djangoapps/contentstore/management/commands/copy_libraries_from_v1_to_v2.py
+++ b/cms/djangoapps/contentstore/management/commands/copy_libraries_from_v1_to_v2.py
@@ -1,6 +1,7 @@
 """A Command to  Copy or uncopy V1 Content Libraries entires to be stored as v2 content libraries."""

 import logging
+import csv
 from textwrap import dedent

 from django.core.management import BaseCommand, CommandError
@@ -28,15 +29,13 @@ class Command(BaseCommand):
    and -- file followed by the path for a list of libraries from a file.

    Example usage:
-
        $ ./manage.py cms copy_libraries_from_v1_to_v2 'collection_uuid' --all
+        $ ./manage.py cms copy_libraries_from_v1_to_v2 'collection_uuid' --all --uncopy
+        $ ./manage.py cms copy_libraries_from_v1_to_v2 'collection_uuid 'library-v1:edX+DemoX+Better_Library'
+        $ ./manage.py cms copy_libraries_from_v1_to_v2 'collection_uuid 'library-v1:edX+DemoX+Better_Library' --uncopy
        $ ./manage.py cms copy_libraries_from_v1_to_v2
-            library-v1:edX+DemoX+Demo_Library'  'library-v1:edX+DemoX+Better_Library' -c 'collection_uuid'
-        $ ./manage.py cms copy_libraries_from_v1_to_v2 --all --uncopy
-        $ ./manage.py cms copy_libraries_from_v1_to_v2 'library-v1:edX+DemoX+Better_Library' --uncopy
-        $ ./manage.py cms copy_libraries_from_v1_to_v2
-            '11111111-2111-4111-8111-111111111111'
-            './list_of--library-locators- --file
+        '11111111-2111-4111-8111-111111111111'
+        './list_of--library-locators.csv --all

    Note:
       This Command Also produces an "output file" which contains the mapping of locators and the status of the copy.
@@ -49,17 +48,18 @@ class Command(BaseCommand):
        """arguements for command"""

        parser.add_argument(
-            '-collection_uuid',
-            '-c',
-            nargs=1,
+            'collection_uuid',
            type=str,
            help='the uuid for the collection to create the content library in.'
        )
        parser.add_argument(
-            'library_ids',
-            nargs='*',
-            help='a space-seperated list of v1 library ids to copy'
+            'output_csv',
+            type=str,
+            nargs='?',
+            default=None,
+            help='a file path to write the tasks output to. Without this the result is simply logged.'
        )
+
        parser.add_argument(
            '--all',
            action='store_true',
@@ -72,12 +72,11 @@ class Command(BaseCommand):
            dest='uncopy',
            help='Delete libraries specified'
        )
-
        parser.add_argument(
-            'output_csv',
-            nargs='?',
-            default=None,
-            help='a file path to write the tasks output to. Without this the result is simply logged.'
+            'library_ids',
+            nargs='*',
+            default=[],
+            help='a space-seperated list of v1 library ids to copy'
        )

    def _parse_library_key(self, raw_value):
@@ -90,10 +89,6 @@ class Command(BaseCommand):

    def handle(self, *args, **options):  # lint-amnesty, pylint: disable=unused-argument
        """Parse args and generate tasks for copying content."""
-        print(options)
-
-        if (not options['library_ids'] and not options['all']) or (options['library_ids'] and options['all']):
-            raise CommandError("copy_libraries_from_v1_to_v2 requires one or more <library_id>s or the --all flag.")

        if (not options['library_ids'] and not options['all']) or (options['library_ids'] and options['all']):
            raise CommandError("copy_libraries_from_v1_to_v2 requires one or more <library_id>s or the --all flag.")
@@ -110,16 +105,17 @@ class Command(BaseCommand):
            v1_library_keys = list(map(self._parse_library_key, options['library_ids']))

        create_library_task_group = group([
-            delete_v2_library_from_v1_library.s(str(v1_library_key), options['collection_uuid'][0])
+            delete_v2_library_from_v1_library.s(str(v1_library_key), options['collection_uuid'])
            if options['uncopy']
-            else create_v2_library_from_v1_library.s(str(v1_library_key), options['collection_uuid'][0])
+            else create_v2_library_from_v1_library.s(str(v1_library_key), options['collection_uuid'])
            for v1_library_key in v1_library_keys
        ])

        group_result = create_library_task_group.apply_async().get()
        if options['output_csv']:
-            with open(options['output_csv'][0], 'w', encoding='utf-8', newline='') as output_writer:
-                output_writer.writerow("v1_library_id", "v2_library_id", "status", "error_msg")
+            with open(options['output_csv'], 'w', encoding='utf-8', newline='') as file:
+                output_writer = csv.writer(file)
+                output_writer.writerow(["v1_library_id", "v2_library_id", "status", "error_msg"])
                for result in group_result:
-                    output_writer.write(result.keys())
+                    output_writer.writerow(result.values())
        log.info(group_result)
--- a/cms/djangoapps/contentstore/management/commands/replace_v1_lib_refs_with_v2_in_courses.py
+++ b/cms/djangoapps/contentstore/management/commands/replace_v1_lib_refs_with_v2_in_courses.py
@@ -0,0 +1,125 @@
+"""
+A Command which, given a mapping of V1 to V2 Libraries,
+edits all xblocks in courses which refer to the v1 library to point to the v2 library.
+"""
+
+import logging
+import csv
+
+from django.core.management import BaseCommand, CommandError
+from celery import group
+
+from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
+from cms.djangoapps.contentstore.tasks import (
+    replace_all_library_source_blocks_ids_for_course,
+    validate_all_library_source_blocks_ids_for_course,
+    undo_all_library_source_blocks_ids_for_course
+)
+
+log = logging.getLogger(__name__)
+
+
+class Command(BaseCommand):
+    """
+    Example usage:
+        $ ./manage.py cms replace_v1_lib_refs_with_v2_in_courses '/path/to/library_mappings.csv'
+        $ ./manage.py cms replace_v1_lib_refs_with_v2_in_courses '/path/to/library_mappings.csv' --validate
+        $ ./manage.py cms replace_v1_lib_refs_with_v2_in_courses '/path/to/library_mappings.csv' --undo
+    """
+    def add_arguments(self, parser):
+        parser.add_argument('file_path', type=str, help='Path to the CSV file.')
+        parser.add_argument('--validate', action='store_true', help='Validate previous runs of the command')
+        parser.add_argument('--undo', action='store_true', help='Validate previous runs of the command')
+
+    def replace_all_library_source_blocks_ids(self, v1_to_v2_lib_map):
+        """A method to replace 'source_library_id' in all relevant blocks."""
+
+        courses = CourseOverview.get_all_courses()
+
+        # Use Celery to distribute the workload
+        tasks = group(
+            replace_all_library_source_blocks_ids_for_course.s(
+                course,
+                v1_to_v2_lib_map
+            )
+            for course in courses
+        )
+        results = tasks.apply_async()
+
+        for result in results.get():
+            if isinstance(result, Exception):
+                # Handle the task failure here
+                log.error("Task failed with error: %s", str(result))
+                continue
+        log.info(
+            "Completed replacing all v1 library source ids with v2 library source ids"
+        )
+
+    def validate(self, v1_to_v2_lib_map):
+        """ Validate that replace_all_library_source_blocks_ids was successful"""
+        courses = CourseOverview.get_all_courses()
+        tasks = group(validate_all_library_source_blocks_ids_for_course.s(course, v1_to_v2_lib_map) for course in courses)  # lint-amnesty, pylint: disable=line-too-long
+        results = tasks.apply_async()
+
+        validation = set()
+        for result in results.get():
+            if isinstance(result, Exception):
+                # Handle the task failure here
+                log.error("Task failed with error: %s", str(result))
+                continue
+            else:
+                validation.update(result)
+
+        if validation.issubset(v1_to_v2_lib_map.values()):
+            log.info("Validation: All values in the input map are present in courses.")
+        else:
+            log.info(
+                "Validation Failed: There are unmapped v1 libraries."
+            )
+
+    def undo(self, v1_to_v2_lib_map):
+        """ undo the changes made by replace_all_library_source_blocks_ids"""
+        courses = CourseOverview.get_all_courses()
+
+        # Use Celery to distribute the workload
+        tasks = group(undo_all_library_source_blocks_ids_for_course.s(course, v1_to_v2_lib_map) for course in courses)
+        results = tasks.apply_async()
+
+        for result in results.get():
+            if isinstance(result, Exception):
+                # Handle the task failure here
+                log.error("Task failed with error: %s", str(result))
+                continue
+        log.info("Completed replacing all v2 library source ids with v1 library source ids. Undo Complete")
+
+    def handle(self, *args, **kwargs):
+        """ Parse arguments and begin command"""
+        file_path = kwargs['file_path']
+        v1_to_v2_lib_map = {}
+        try:
+            with open(file_path, 'r', encoding='utf-8') as csvfile:
+
+                if not file_path.endswith('.csv'):
+                    raise CommandError('Invalid file format. Only CSV files are supported.')
+
+                csv_reader = csv.reader(csvfile)
+
+                for row in csv_reader:
+                    if len(row) >= 2:
+                        key = row[0].strip()
+                        value = row[1].strip()
+                        v1_to_v2_lib_map[key] = value
+
+                print("Data successfully imported as dictionary:")
+
+        except FileNotFoundError:
+            log.error("File not found at '%s'.", {file_path})
+        except Exception as e:  # lint-amnesty, pylint: disable=broad-except
+            log.error("An error occurred: %s", {str(e)})
+
+        if kwargs['validate']:
+            self.validate(v1_to_v2_lib_map)
+        if kwargs['undo']:
+            self.undo(v1_to_v2_lib_map)
+        else:
+            self.replace_all_library_source_blocks_ids(v1_to_v2_lib_map)
--- a/cms/djangoapps/contentstore/tasks.py
+++ b/cms/djangoapps/contentstore/tasks.py
@@ -868,6 +868,8 @@ def _create_copy_content_task(v2_library_key, v1_library_key):
 def _create_metadata(v1_library_key, collection_uuid):
    """instansiate an index for the V2 lib in the collection"""

+    print(collection_uuid)
+
    store = modulestore()
    v1_library = store.get_library(v1_library_key)
    collection = get_collection(collection_uuid).uuid
@@ -1000,3 +1002,142 @@ def delete_v1_library(v1_library_key_string):
        "status": "SUCCESS",
        "msg": "SUCCESS"
    }
+
+
+@shared_task(time_limit=30)
+@set_code_owner_attribute
+def validate_all_library_source_blocks_ids_for_course(course, v1_to_v2_lib_map):
+    """Search a Modulestore for all library source blocks in a course by querying mongo.
+        replace all source_library_ids with the corresponding v2 value from the map
+    """
+    store = modulestore()
+    with store.bulk_operations(course.id):
+        visited = []
+        for branch in [ModuleStoreEnum.BranchName.draft, ModuleStoreEnum.BranchName.published]:
+            blocks = store.get_items(
+                course.id.for_branch(branch),
+                settings={'source_library_id': {'$exists': True}}
+            )
+            for xblock in blocks:
+                if xblock.source_library_id not in v1_to_v2_lib_map.values():
+                    # lint-amnesty, pylint: disable=broad-except
+                    raise Exception(
+                        f'{xblock.source_library_id} in {course.id} is not found in mapping. Validation failed'
+                    )
+                visited.append(xblock.source_library_id)
+    # return sucess
+    return visited
+
+
+@shared_task(time_limit=30)
+@set_code_owner_attribute
+def replace_all_library_source_blocks_ids_for_course(course, v1_to_v2_lib_map):  # lint-amnesty, pylint: disable=useless-return
+    """Search a Modulestore for all library source blocks in a course by querying mongo.
+        replace all source_library_ids with the corresponding v2 value from the map.
+
+        This will trigger a publish on the course for every published library source block.
+    """
+    store = modulestore()
+    with store.bulk_operations(course.id):
+        #for branch in [ModuleStoreEnum.BranchName.draft, ModuleStoreEnum.BranchName.published]:
+        draft_blocks, published_blocks = [
+            store.get_items(
+                course.id.for_branch(branch),
+                settings={'source_library_id': {'$exists': True}}
+            )
+            for branch in [ModuleStoreEnum.BranchName.draft, ModuleStoreEnum.BranchName.published]
+        ]
+
+        published_dict = {block.location: block for block in published_blocks}
+
+        for draft_library_source_block in draft_blocks:
+            try:
+                new_source_id = str(v1_to_v2_lib_map[draft_library_source_block.source_library_id])
+            except KeyError:
+                #skip invalid keys
+                LOGGER.error(
+                    'Key %s not found in mapping. Skipping block for course %s',
+                    str({draft_library_source_block.source_library_id}),
+                    str(course.id)
+                )
+                continue
+
+            # The publsihed branch should be updated as well as the draft branch
+            # This way, if authors "discard changes," they won't be reverted back to the V1 lib.
+            # However, we also don't want to publish the draft branch.
+            try:
+                if published_dict[draft_library_source_block.location] is not None:
+                    #temporarily set the published version to be the draft & publish it.
+                    temp = published_dict[draft_library_source_block.location]
+                    temp.source_library_id = new_source_id
+                    store.update_item(temp, None)
+                    store.publish(temp.location, None)
+                    draft_library_source_block.source_library_id = new_source_id
+                    store.update_item(draft_library_source_block, None)
+            except KeyError:
+                #Warn, but just update the draft block if no published block for draft block.
+                LOGGER.warning(
+                    'No matching published block for draft block %s',
+                    str(draft_library_source_block.location)
+                )
+                draft_library_source_block.source_library_id = new_source_id
+                store.update_item(draft_library_source_block, None)
+    # return success
+    return
+
+
+@shared_task(time_limit=30)
+@set_code_owner_attribute
+def undo_all_library_source_blocks_ids_for_course(course, v1_to_v2_lib_map):  # lint-amnesty, pylint: disable=useless-return
+    """Search a Modulestore for all library source blocks in a course by querying mongo.
+        replace all source_library_ids with the corresponding v1 value from the inverted map.
+        This is exists to undo changes made previously.
+    """
+
+    v2_to_v1_lib_map = {v: k for k, v in v1_to_v2_lib_map.items()}
+
+    store = modulestore()
+    draft_blocks, published_blocks = [
+        store.get_items(
+            course.id.for_branch(branch),
+            settings={'source_library_id': {'$exists': True}}
+        )
+        for branch in [ModuleStoreEnum.BranchName.draft, ModuleStoreEnum.BranchName.published]
+    ]
+
+    published_dict = {block.location: block for block in published_blocks}
+
+    for draft_library_source_block in draft_blocks:
+        try:
+            new_source_id = str(v2_to_v1_lib_map[draft_library_source_block.source_library_id])
+        except KeyError:
+            #skip invalid keys
+            LOGGER.error(
+                'Key %s not found in mapping. Skipping block for course %s',
+                str({draft_library_source_block.source_library_id}),
+                str(course.id)
+            )
+            continue
+
+        # The publsihed branch should be updated as well as the draft branch
+        # This way, if authors "discard changes," they won't be reverted back to the V1 lib.
+        # However, we also don't want to publish the draft branch.
+        try:
+            if published_dict[draft_library_source_block.location] is not None:
+                #temporarily set the published version to be the draft & publish it.
+                temp = published_dict[draft_library_source_block.location]
+                temp.source_library_id = new_source_id
+                store.update_item(temp, None)
+                store.publish(temp.location, None)
+                draft_library_source_block.source_library_id = new_source_id
+                store.update_item(draft_library_source_block, None)
+        except KeyError:
+            #Warn, but just update the draft block if no published block for draft block.
+            LOGGER.warning(
+                'No matching published block for draft block %s',
+                str(draft_library_source_block.location)
+            )
+            draft_library_source_block.source_library_id = new_source_id
+            store.update_item(draft_library_source_block, None)
+    # return success
+    return
--- a/xmodule/modulestore/split_mongo/split.py
+++ b/xmodule/modulestore/split_mongo/split.py
@@ -1951,6 +1951,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
        The implementation tries to detect which, if any changes, actually need to be saved and thus won't version
        the definition, structure, nor course if they didn't change.
        """
+
        partitioned_fields = self.partition_xblock_fields_by_scope(block)
        definition_locator = getattr(block, "definition_locator", None)
        if definition_locator is None and not allow_not_found: