1695 lines
64 KiB
Python
1695 lines
64 KiB
Python
"""
|
|
This file contains celery tasks for contentstore views
|
|
"""
|
|
|
|
import asyncio
|
|
import base64
|
|
import json
|
|
import os
|
|
import re
|
|
import shutil
|
|
import tarfile
|
|
from datetime import datetime, timezone
|
|
from importlib.metadata import entry_points
|
|
from tempfile import NamedTemporaryFile, mkdtemp
|
|
|
|
import aiohttp
|
|
import olxcleaner
|
|
from ccx_keys.locator import CCXLocator
|
|
from celery import shared_task
|
|
from celery.utils.log import get_task_logger
|
|
from django.conf import settings
|
|
from django.contrib.auth import get_user_model
|
|
from django.core.exceptions import SuspiciousOperation
|
|
from django.core.files import File
|
|
from django.test import RequestFactory
|
|
from django.utils.text import get_valid_filename
|
|
from edx_django_utils.monitoring import (
|
|
set_code_owner_attribute,
|
|
set_code_owner_attribute_from_module,
|
|
set_custom_attribute,
|
|
set_custom_attributes_for_course_key
|
|
)
|
|
from olxcleaner.exceptions import ErrorLevel
|
|
from olxcleaner.reporting import report_error_summary, report_errors
|
|
from opaque_keys import InvalidKeyError
|
|
from opaque_keys.edx.keys import CourseKey, UsageKey
|
|
from opaque_keys.edx.locator import LibraryContainerLocator, LibraryLocator
|
|
from organizations.api import add_organization_course, ensure_organization
|
|
from organizations.exceptions import InvalidOrganizationException
|
|
from organizations.models import Organization
|
|
from path import Path as path
|
|
from pytz import UTC
|
|
from user_tasks.models import UserTaskArtifact, UserTaskStatus
|
|
from user_tasks.tasks import UserTask
|
|
|
|
import cms.djangoapps.contentstore.errors as UserErrors
|
|
from cms.djangoapps.contentstore.courseware_index import (
|
|
CoursewareSearchIndexer,
|
|
LibrarySearchIndexer,
|
|
SearchIndexingError
|
|
)
|
|
from cms.djangoapps.contentstore.storage import course_import_export_storage
|
|
from cms.djangoapps.contentstore.toggles import enable_course_optimizer_check_prev_run_links
|
|
from cms.djangoapps.contentstore.utils import (
|
|
IMPORTABLE_FILE_TYPES,
|
|
contains_previous_course_reference,
|
|
get_previous_run_course_key,
|
|
create_or_update_xblock_upstream_link,
|
|
delete_course,
|
|
initialize_permissions,
|
|
reverse_usage_url,
|
|
translation_language
|
|
)
|
|
from cms.djangoapps.contentstore.xblock_storage_handlers.view_handlers import get_block_info
|
|
from cms.djangoapps.models.settings.course_metadata import CourseMetadata
|
|
from cms.djangoapps.contentstore.utils import create_course_info_usage_key
|
|
from common.djangoapps.course_action_state.models import CourseRerunState
|
|
from common.djangoapps.static_replace import replace_static_urls
|
|
from common.djangoapps.student.auth import has_course_author_access
|
|
from common.djangoapps.student.roles import CourseInstructorRole, CourseStaffRole, LibraryUserRole
|
|
from common.djangoapps.util.monitoring import monitor_import_failure
|
|
from openedx.core.djangoapps.content.learning_sequences.api import key_supports_outlines
|
|
from openedx.core.djangoapps.content_libraries import api as v2contentlib_api
|
|
from openedx.core.djangoapps.content_tagging.api import make_copied_tags_editable
|
|
from openedx.core.djangoapps.course_apps.toggles import exams_ida_enabled
|
|
from openedx.core.djangoapps.discussions.config.waffle import ENABLE_NEW_STRUCTURE_DISCUSSIONS
|
|
from openedx.core.djangoapps.discussions.models import DiscussionsConfiguration, Provider
|
|
from openedx.core.djangoapps.discussions.tasks import update_unit_discussion_state_from_discussion_blocks
|
|
from openedx.core.djangoapps.embargo.models import CountryAccessRule, RestrictedCourse
|
|
from openedx.core.lib import ensure_cms
|
|
from openedx.core.lib.extract_archive import safe_extractall
|
|
from openedx.core.lib.xblock_utils import get_course_update_items
|
|
from xmodule.contentstore.django import contentstore
|
|
from xmodule.course_block import CourseFields
|
|
from xmodule.exceptions import SerializationError
|
|
from xmodule.modulestore import COURSE_ROOT, LIBRARY_ROOT, ModuleStoreEnum
|
|
from xmodule.modulestore.django import modulestore
|
|
from xmodule.modulestore.exceptions import DuplicateCourseError, InvalidProctoringProvider, ItemNotFoundError
|
|
from xmodule.modulestore.xml_exporter import export_course_to_xml, export_library_to_xml
|
|
from xmodule.modulestore.xml_importer import CourseImportException, import_course_from_xml, import_library_from_xml
|
|
from xmodule.tabs import StaticTab
|
|
|
|
from .models import ComponentLink, ContainerLink, LearningContextLinksStatus, LearningContextLinksStatusChoices
|
|
from .outlines import update_outline_from_modulestore
|
|
from .outlines_regenerate import CourseOutlineRegenerate
|
|
from .toggles import bypass_olx_failure_enabled
|
|
from .utils import course_import_olx_validation_is_enabled
|
|
|
|
User = get_user_model()
|
|
|
|
LOGGER = get_task_logger(__name__)
|
|
FILE_READ_CHUNK = 1024 # bytes
|
|
FULL_COURSE_REINDEX_THRESHOLD = 1
|
|
ALL_ALLOWED_XBLOCKS = frozenset(
|
|
[entry_point.name for entry_point in entry_points(group="xblock.v1")]
|
|
)
|
|
DEFAULT_HEADERS = {
|
|
"User-Agent": (
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
"Chrome/115.0.0.0 Safari/537.36"
|
|
),
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
"Connection": "keep-alive",
|
|
}
|
|
|
|
|
|
class LinkState:
|
|
"""
|
|
Links State Enumeration
|
|
"""
|
|
BROKEN = 'broken'
|
|
LOCKED = 'locked'
|
|
EXTERNAL_FORBIDDEN = 'external-forbidden'
|
|
PREVIOUS_RUN = 'previous-run'
|
|
|
|
|
|
def clone_instance(instance, field_values):
|
|
""" Clones a Django model instance.
|
|
|
|
The specified fields are replaced with new values.
|
|
|
|
Arguments:
|
|
instance (Model): Instance of a Django model.
|
|
field_values (dict): Map of field names to new values.
|
|
|
|
Returns:
|
|
Model: New instance.
|
|
"""
|
|
instance.pk = None
|
|
|
|
for field, value in field_values.items():
|
|
setattr(instance, field, value)
|
|
|
|
instance.save()
|
|
|
|
return instance
|
|
|
|
|
|
@shared_task
|
|
@set_code_owner_attribute
|
|
def rerun_course(source_course_key_string, destination_course_key_string, user_id, fields=None):
|
|
"""
|
|
Reruns a course in a new celery task.
|
|
"""
|
|
# import here, at top level this import prevents the celery workers from starting up correctly
|
|
from edxval.api import copy_course_videos
|
|
|
|
source_course_key = CourseKey.from_string(source_course_key_string)
|
|
destination_course_key = CourseKey.from_string(destination_course_key_string)
|
|
try:
|
|
# deserialize the payload
|
|
fields = deserialize_fields(fields) if fields else None
|
|
|
|
# use the split modulestore as the store for the rerun course,
|
|
# as the Mongo modulestore doesn't support multiple runs of the same course.
|
|
store = modulestore()
|
|
with store.default_store('split'):
|
|
store.clone_course(source_course_key, destination_course_key, user_id, fields=fields)
|
|
|
|
update_unit_discussion_state_from_discussion_blocks(destination_course_key, user_id)
|
|
|
|
# set initial permissions for the user to access the course.
|
|
initialize_permissions(destination_course_key, User.objects.get(id=user_id))
|
|
|
|
# update state: Succeeded
|
|
CourseRerunState.objects.succeeded(course_key=destination_course_key)
|
|
|
|
# call edxval to attach videos to the rerun
|
|
copy_course_videos(source_course_key, destination_course_key)
|
|
|
|
# Copy RestrictedCourse
|
|
restricted_course = RestrictedCourse.objects.filter(course_key=source_course_key).first()
|
|
|
|
if restricted_course:
|
|
country_access_rules = CountryAccessRule.objects.filter(restricted_course=restricted_course)
|
|
new_restricted_course = clone_instance(restricted_course, {'course_key': destination_course_key})
|
|
for country_access_rule in country_access_rules:
|
|
clone_instance(country_access_rule, {'restricted_course': new_restricted_course})
|
|
|
|
org_data = ensure_organization(destination_course_key.org)
|
|
add_organization_course(org_data, destination_course_key)
|
|
return "succeeded"
|
|
|
|
except DuplicateCourseError:
|
|
# do NOT delete the original course, only update the status
|
|
CourseRerunState.objects.failed(course_key=destination_course_key)
|
|
LOGGER.exception('Course Rerun Error')
|
|
return "duplicate course"
|
|
|
|
# catch all exceptions so we can update the state and properly cleanup the course.
|
|
except Exception as exc: # pylint: disable=broad-except
|
|
# update state: Failed
|
|
CourseRerunState.objects.failed(course_key=destination_course_key)
|
|
LOGGER.exception('Course Rerun Error')
|
|
|
|
try:
|
|
# cleanup any remnants of the course
|
|
modulestore().delete_course(destination_course_key, user_id)
|
|
except ItemNotFoundError:
|
|
# it's possible there was an error even before the course block was created
|
|
pass
|
|
|
|
return "exception: " + str(exc)
|
|
|
|
|
|
def deserialize_fields(json_fields):
|
|
fields = json.loads(json_fields)
|
|
for field_name, value in fields.items():
|
|
fields[field_name] = getattr(CourseFields, field_name).from_json(value)
|
|
return fields
|
|
|
|
|
|
def _parse_time(time_isoformat):
|
|
""" Parses time from iso format """
|
|
return datetime.strptime(
|
|
# remove the +00:00 from the end of the formats generated within the system
|
|
time_isoformat.split('+')[0],
|
|
"%Y-%m-%dT%H:%M:%S.%f"
|
|
).replace(tzinfo=UTC)
|
|
|
|
|
|
@shared_task
|
|
@set_code_owner_attribute
|
|
def update_search_index(course_id, triggered_time_isoformat):
|
|
""" Updates course search index. """
|
|
try:
|
|
course_key = CourseKey.from_string(course_id)
|
|
|
|
# We skip search indexing for CCX courses because there is currently
|
|
# some issue around Modulestore caching that makes it prohibitively
|
|
# expensive (sometimes hours-long for really complex courses).
|
|
if isinstance(course_key, CCXLocator):
|
|
LOGGER.warning(
|
|
'Search indexing skipped for CCX Course %s (this is currently too slow to run in production)',
|
|
course_id
|
|
)
|
|
return
|
|
|
|
CoursewareSearchIndexer.index(modulestore(), course_key, triggered_at=(_parse_time(triggered_time_isoformat)))
|
|
|
|
except SearchIndexingError as exc:
|
|
error_list = exc.error_list
|
|
LOGGER.error(
|
|
"Search indexing error for complete course %s - %s - %s",
|
|
course_id,
|
|
str(exc),
|
|
error_list,
|
|
)
|
|
else:
|
|
LOGGER.debug('Search indexing successful for complete course %s', course_id)
|
|
|
|
|
|
@shared_task
|
|
@set_code_owner_attribute
|
|
def update_library_index(library_id, triggered_time_isoformat):
|
|
""" Updates course search index. """
|
|
try:
|
|
library_key = CourseKey.from_string(library_id)
|
|
LibrarySearchIndexer.index(modulestore(), library_key, triggered_at=(_parse_time(triggered_time_isoformat)))
|
|
|
|
except SearchIndexingError as exc:
|
|
LOGGER.error('Search indexing error for library %s - %s', library_id, str(exc))
|
|
else:
|
|
LOGGER.debug('Search indexing successful for library %s', library_id)
|
|
|
|
|
|
@shared_task
|
|
@set_code_owner_attribute
|
|
def update_special_exams_and_publish(course_key_str):
|
|
"""
|
|
Registers special exams for a given course and calls publishing flow.
|
|
|
|
on_course_publish expects that the edx-proctoring subsystem has been refreshed
|
|
before being executed, so both functions are called here synchronously.
|
|
"""
|
|
from cms.djangoapps.contentstore.exams import register_exams
|
|
from cms.djangoapps.contentstore.proctoring import register_special_exams as register_exams_legacy
|
|
from openedx.core.djangoapps.credit.signals.handlers import on_course_publish
|
|
|
|
course_key = CourseKey.from_string(course_key_str)
|
|
LOGGER.info('Attempting to register exams for course %s', course_key_str)
|
|
|
|
# Call the appropriate handler for either the exams IDA or the edx-proctoring plugin
|
|
register_exams_handler = register_exams if exams_ida_enabled(course_key) else register_exams_legacy
|
|
try:
|
|
register_exams_handler(course_key)
|
|
LOGGER.info('Successfully registered exams for course %s', course_key_str)
|
|
# pylint: disable=broad-except
|
|
except Exception as exception:
|
|
LOGGER.exception(exception)
|
|
|
|
LOGGER.info('Publishing course %s', course_key_str)
|
|
on_course_publish(course_key)
|
|
|
|
|
|
class CourseExportTask(UserTask): # pylint: disable=abstract-method
|
|
"""
|
|
Base class for course and library export tasks.
|
|
"""
|
|
|
|
@staticmethod
|
|
def calculate_total_steps(arguments_dict):
|
|
"""
|
|
Get the number of in-progress steps in the export process, as shown in the UI.
|
|
|
|
For reference, these are:
|
|
|
|
1. Exporting
|
|
2. Compressing
|
|
"""
|
|
return 2
|
|
|
|
@classmethod
|
|
def generate_name(cls, arguments_dict):
|
|
"""
|
|
Create a name for this particular import task instance.
|
|
|
|
Arguments:
|
|
arguments_dict (dict): The arguments given to the task function
|
|
|
|
Returns:
|
|
str: The generated name
|
|
"""
|
|
key = arguments_dict['course_key_string']
|
|
return f'Export of {key}'
|
|
|
|
|
|
@shared_task(base=CourseExportTask, bind=True)
|
|
# Note: The decorator @set_code_owner_attribute cannot be used here because the UserTaskMixin
|
|
# does stack inspection and can't handle additional decorators.
|
|
def export_olx(self, user_id, course_key_string, language):
|
|
"""
|
|
Export a course or library to an OLX .tar.gz archive and prepare it for download.
|
|
"""
|
|
set_code_owner_attribute_from_module(__name__)
|
|
courselike_key = CourseKey.from_string(course_key_string)
|
|
|
|
try:
|
|
user = User.objects.get(pk=user_id)
|
|
except User.DoesNotExist:
|
|
with translation_language(language):
|
|
self.status.fail(UserErrors.UNKNOWN_USER_ID.format(user_id))
|
|
return
|
|
if not has_course_author_access(user, courselike_key):
|
|
with translation_language(language):
|
|
self.status.fail(UserErrors.PERMISSION_DENIED)
|
|
return
|
|
|
|
if isinstance(courselike_key, LibraryLocator):
|
|
courselike_block = modulestore().get_library(courselike_key)
|
|
else:
|
|
courselike_block = modulestore().get_course(courselike_key)
|
|
|
|
try:
|
|
self.status.set_state('Exporting')
|
|
tarball = create_export_tarball(courselike_block, courselike_key, {}, self.status)
|
|
artifact = UserTaskArtifact(status=self.status, name='Output')
|
|
artifact.file.save(name=os.path.basename(tarball.name), content=File(tarball))
|
|
artifact.save()
|
|
# catch all exceptions so we can record useful error messages
|
|
except Exception as exception: # pylint: disable=broad-except
|
|
LOGGER.exception('Error exporting course %s', courselike_key, exc_info=True)
|
|
if self.status.state != UserTaskStatus.FAILED:
|
|
self.status.fail({'raw_error_msg': str(exception)})
|
|
return
|
|
|
|
|
|
def create_export_tarball(course_block, course_key, context, status=None):
|
|
"""
|
|
Generates the export tarball, or returns None if there was an error.
|
|
|
|
Updates the context with any error information if applicable.
|
|
"""
|
|
name = course_block.url_name
|
|
export_file = NamedTemporaryFile(prefix=name + '.',
|
|
suffix=".tar.gz") # lint-amnesty, pylint: disable=consider-using-with
|
|
root_dir = path(mkdtemp())
|
|
|
|
try:
|
|
if isinstance(course_key, LibraryLocator):
|
|
export_library_to_xml(modulestore(), contentstore(), course_key, root_dir, name)
|
|
else:
|
|
export_course_to_xml(modulestore(), contentstore(), course_block.id, root_dir, name)
|
|
|
|
if status:
|
|
status.set_state('Compressing')
|
|
status.increment_completed_steps()
|
|
LOGGER.debug('tar file being generated at %s', export_file.name)
|
|
with tarfile.open(name=export_file.name, mode='w:gz') as tar_file:
|
|
tar_file.add(root_dir / name, arcname=name)
|
|
|
|
except SerializationError as exc:
|
|
LOGGER.exception('There was an error exporting %s', course_key, exc_info=True)
|
|
parent = None
|
|
try:
|
|
failed_item = modulestore().get_item(exc.location)
|
|
parent_loc = modulestore().get_parent_location(failed_item.location)
|
|
|
|
if parent_loc is not None:
|
|
parent = modulestore().get_item(parent_loc)
|
|
except: # pylint: disable=bare-except
|
|
# if we have a nested exception, then we'll show the more generic error message
|
|
pass
|
|
|
|
context.update({
|
|
'in_err': True,
|
|
'raw_err_msg': str(exc),
|
|
'edit_unit_url': reverse_usage_url("container_handler", parent.location) if parent else "",
|
|
})
|
|
if status:
|
|
status.fail(json.dumps({'raw_error_msg': context['raw_err_msg'],
|
|
'edit_unit_url': context['edit_unit_url']}))
|
|
raise
|
|
except Exception as exc:
|
|
LOGGER.exception('There was an error exporting %s', course_key, exc_info=True)
|
|
context.update({
|
|
'in_err': True,
|
|
'edit_unit_url': None,
|
|
'raw_err_msg': str(exc)})
|
|
if status:
|
|
status.fail(json.dumps({'raw_error_msg': context['raw_err_msg']}))
|
|
raise
|
|
finally:
|
|
if os.path.exists(root_dir / name):
|
|
shutil.rmtree(root_dir / name)
|
|
|
|
return export_file
|
|
|
|
|
|
class CourseImportTask(UserTask): # pylint: disable=abstract-method
|
|
"""
|
|
Base class for course and library import tasks.
|
|
"""
|
|
|
|
@staticmethod
|
|
def calculate_total_steps(arguments_dict):
|
|
"""
|
|
Get the number of in-progress steps in the import process, as shown in the UI.
|
|
|
|
For reference, these are:
|
|
|
|
1. Unpacking
|
|
2. Verifying
|
|
3. Updating
|
|
"""
|
|
return 3
|
|
|
|
@classmethod
|
|
def generate_name(cls, arguments_dict):
|
|
"""
|
|
Create a name for this particular import task instance.
|
|
|
|
Arguments:
|
|
arguments_dict (dict): The arguments given to the task function
|
|
|
|
Returns:
|
|
str: The generated name
|
|
"""
|
|
key = arguments_dict['course_key_string']
|
|
filename = arguments_dict['archive_name']
|
|
return f'Import of {key} from {filename}'
|
|
|
|
|
|
def sync_discussion_settings(course_key, user):
|
|
"""
|
|
Syncs the discussion settings for a course with the DiscussionsConfiguration model.
|
|
"""
|
|
course = modulestore().get_course(course_key)
|
|
try:
|
|
discussion_config = DiscussionsConfiguration.objects.get(context_key=course_key)
|
|
discussion_settings = course.discussions_settings
|
|
|
|
if (
|
|
ENABLE_NEW_STRUCTURE_DISCUSSIONS.is_enabled()
|
|
and not course.discussions_settings.get('provider_type', None) == Provider.OPEN_EDX
|
|
and not course.discussions_settings.get('provider', None) == Provider.OPEN_EDX
|
|
):
|
|
LOGGER.info(f"New structure is enabled, also updating {course_key} to use new provider")
|
|
course.discussions_settings['enable_graded_units'] = False
|
|
course.discussions_settings['unit_level_visibility'] = True
|
|
course.discussions_settings['provider_type'] = Provider.OPEN_EDX
|
|
modulestore().update_item(course, user.id)
|
|
|
|
discussion_config.provider_type = Provider.OPEN_EDX
|
|
|
|
discussion_config.enable_graded_units = discussion_settings['enable_graded_units']
|
|
discussion_config.unit_level_visibility = discussion_settings['unit_level_visibility']
|
|
discussion_config.save()
|
|
LOGGER.info(f'Course import {course.id}: DiscussionsConfiguration synced as per course')
|
|
except Exception as exc: # pylint: disable=broad-except
|
|
LOGGER.info(f'Course import {course.id}: DiscussionsConfiguration sync failed: {exc}')
|
|
|
|
|
|
@shared_task(base=CourseImportTask, bind=True)
|
|
# Note: The decorator @set_code_owner_attribute cannot be used here because the UserTaskMixin
|
|
# does stack inspection and can't handle additional decorators.
|
|
# lint-amnesty, pylint: disable=too-many-statements
|
|
def import_olx(self, user_id, course_key_string, archive_path, archive_name, language):
|
|
"""
|
|
Import a course or library from a provided OLX .tar.gz or .zip archive.
|
|
"""
|
|
set_code_owner_attribute_from_module(__name__)
|
|
current_step = 'Unpacking'
|
|
courselike_key = CourseKey.from_string(course_key_string)
|
|
set_custom_attributes_for_course_key(courselike_key)
|
|
log_prefix = f'Course import {courselike_key}'
|
|
self.status.set_state(current_step)
|
|
|
|
data_root = path(settings.GITHUB_REPO_ROOT)
|
|
subdir = base64.urlsafe_b64encode(repr(courselike_key).encode('utf-8')).decode('utf-8')
|
|
course_dir = data_root / subdir
|
|
|
|
def validate_user():
|
|
"""Validate if the user exists otherwise log error. """
|
|
try:
|
|
return User.objects.get(pk=user_id)
|
|
except User.DoesNotExist as exc:
|
|
with translation_language(language):
|
|
self.status.fail(UserErrors.USER_PERMISSION_DENIED)
|
|
LOGGER.error(f'{log_prefix}: Unknown User: {user_id}')
|
|
monitor_import_failure(courselike_key, current_step, exception=exc)
|
|
return
|
|
|
|
def user_has_access(user):
|
|
"""Return True if user has studio write access to the given course."""
|
|
has_access = has_course_author_access(user, courselike_key)
|
|
if not has_access:
|
|
message = f'User permission denied: {user.username}'
|
|
with translation_language(language):
|
|
self.status.fail(UserErrors.COURSE_PERMISSION_DENIED)
|
|
LOGGER.error(f'{log_prefix}: {message}')
|
|
monitor_import_failure(courselike_key, current_step, message=message)
|
|
return has_access
|
|
|
|
def file_is_supported():
|
|
"""Check if it is a supported file."""
|
|
file_is_valid = archive_name.endswith(IMPORTABLE_FILE_TYPES)
|
|
|
|
if not file_is_valid:
|
|
message = f'Unsupported file {archive_name}'
|
|
with translation_language(language):
|
|
self.status.fail(UserErrors.INVALID_FILE_TYPE)
|
|
LOGGER.error(f'{log_prefix}: {message}')
|
|
monitor_import_failure(courselike_key, current_step, message=message)
|
|
return file_is_valid
|
|
|
|
def file_exists_in_storage():
|
|
"""Verify archive path exists in storage."""
|
|
archive_path_exists = course_import_export_storage.exists(archive_path)
|
|
|
|
if not archive_path_exists:
|
|
message = f'Uploaded file {archive_path} not found'
|
|
with translation_language(language):
|
|
self.status.fail(UserErrors.FILE_NOT_FOUND)
|
|
LOGGER.error(f'{log_prefix}: {message}')
|
|
monitor_import_failure(courselike_key, current_step, message=message)
|
|
return archive_path_exists
|
|
|
|
def verify_root_name_exists(course_dir, root_name):
|
|
"""Verify root xml file exists."""
|
|
|
|
def get_all_files(directory):
|
|
"""
|
|
For each file in the directory, yield a 2-tuple of (file-name,
|
|
directory-path)
|
|
"""
|
|
for directory_path, _dirnames, filenames in os.walk(directory):
|
|
for filename in filenames:
|
|
yield (filename, directory_path)
|
|
|
|
def get_dir_for_filename(directory, filename):
|
|
"""
|
|
Returns the directory path for the first file found in the directory
|
|
with the given name. If there is no file in the directory with
|
|
the specified name, return None.
|
|
"""
|
|
for name, directory_path in get_all_files(directory):
|
|
if name == filename:
|
|
return directory_path
|
|
return None
|
|
|
|
dirpath = get_dir_for_filename(course_dir, root_name)
|
|
if not dirpath:
|
|
message = UserErrors.FILE_MISSING.format(root_name)
|
|
with translation_language(language):
|
|
self.status.fail(message)
|
|
LOGGER.error(f'{log_prefix}: {message}')
|
|
monitor_import_failure(courselike_key, current_step, message=message)
|
|
return
|
|
return dirpath
|
|
|
|
user = validate_user()
|
|
if not user:
|
|
return
|
|
|
|
if not user_has_access(user):
|
|
return
|
|
|
|
if not file_is_supported():
|
|
return
|
|
|
|
is_library = isinstance(courselike_key, LibraryLocator)
|
|
is_course = not is_library
|
|
if is_library:
|
|
root_name = LIBRARY_ROOT
|
|
courselike_block = modulestore().get_library(courselike_key)
|
|
import_func = import_library_from_xml
|
|
else:
|
|
root_name = COURSE_ROOT
|
|
courselike_block = modulestore().get_course(courselike_key)
|
|
import_func = import_course_from_xml
|
|
|
|
# Locate the uploaded OLX archive (and download it from S3 if necessary)
|
|
# Do everything in a try-except block to make sure everything is properly cleaned up.
|
|
try:
|
|
LOGGER.info(f'{log_prefix}: unpacking step started')
|
|
|
|
temp_filepath = course_dir / get_valid_filename(archive_name)
|
|
if not course_dir.isdir():
|
|
os.mkdir(course_dir)
|
|
|
|
LOGGER.info(f'{log_prefix}: importing course to {temp_filepath}')
|
|
|
|
# Copy the OLX archive from where it was uploaded to (S3, Swift, file system, etc.)
|
|
if not file_exists_in_storage():
|
|
return
|
|
|
|
with course_import_export_storage.open(archive_path, 'rb') as source:
|
|
with open(temp_filepath, 'wb') as destination:
|
|
def read_chunk():
|
|
"""
|
|
Read and return a sequence of bytes from the source file.
|
|
"""
|
|
return source.read(FILE_READ_CHUNK)
|
|
|
|
for chunk in iter(read_chunk, b''):
|
|
destination.write(chunk)
|
|
|
|
LOGGER.info(f'{log_prefix}: Download from storage complete')
|
|
# Delete from source location
|
|
course_import_export_storage.delete(archive_path)
|
|
|
|
# If the course has an entrance exam then remove it and its corresponding milestone.
|
|
# current course state before import.
|
|
if is_course:
|
|
if courselike_block.entrance_exam_enabled:
|
|
fake_request = RequestFactory().get('/')
|
|
fake_request.user = user
|
|
from .views.entrance_exam import remove_entrance_exam_milestone_reference
|
|
|
|
# TODO: Is this really ok? Seems dangerous for a live course
|
|
remove_entrance_exam_milestone_reference(fake_request, courselike_key)
|
|
LOGGER.info(f'{log_prefix}: entrance exam milestone content reference has been removed')
|
|
# Send errors to client with stage at which error occurred.
|
|
except Exception as exception: # pylint: disable=broad-except
|
|
if course_dir.isdir():
|
|
shutil.rmtree(course_dir)
|
|
LOGGER.info(f'{log_prefix}: Temp data cleared')
|
|
|
|
self.status.fail(UserErrors.UNKNOWN_ERROR_IN_UNPACKING)
|
|
LOGGER.exception(f'{log_prefix}: Unknown error while unpacking', exc_info=True)
|
|
monitor_import_failure(courselike_key, current_step, exception=exception)
|
|
return
|
|
|
|
# try-finally block for proper clean up after receiving file.
|
|
try:
|
|
try:
|
|
safe_extractall(temp_filepath, course_dir)
|
|
except SuspiciousOperation as exc:
|
|
with translation_language(language):
|
|
self.status.fail(UserErrors.UNSAFE_ARCHIVE_FILE)
|
|
LOGGER.error(f'{log_prefix}: Unsafe archive file')
|
|
monitor_import_failure(courselike_key, current_step, exception=exc)
|
|
return
|
|
|
|
current_step = 'Verifying'
|
|
self.status.set_state(current_step)
|
|
self.status.increment_completed_steps()
|
|
LOGGER.info(f'{log_prefix}: Uploaded file extracted. Verification step started')
|
|
|
|
dirpath = verify_root_name_exists(course_dir, root_name)
|
|
if not dirpath:
|
|
return
|
|
|
|
if not validate_course_olx(courselike_key, dirpath, self.status):
|
|
return
|
|
|
|
dirpath = os.path.relpath(dirpath, data_root)
|
|
|
|
current_step = 'Updating'
|
|
self.status.set_state(current_step)
|
|
self.status.increment_completed_steps()
|
|
LOGGER.info(f'{log_prefix}: Extracted file verified. Updating course started')
|
|
|
|
courselike_items = import_func(
|
|
modulestore(), user.id,
|
|
settings.GITHUB_REPO_ROOT, [dirpath],
|
|
load_error_blocks=False,
|
|
static_content_store=contentstore(),
|
|
target_id=courselike_key,
|
|
verbose=True,
|
|
)
|
|
|
|
new_location = courselike_items[0].location
|
|
LOGGER.debug('new course at %s', new_location)
|
|
|
|
LOGGER.info(f'{log_prefix}: Course import successful')
|
|
set_custom_attribute('course_import_completed', True)
|
|
except (CourseImportException, InvalidProctoringProvider, DuplicateCourseError) as known_exe:
|
|
handle_course_import_exception(courselike_key, known_exe, self.status)
|
|
except Exception as exception: # pylint: disable=broad-except
|
|
handle_course_import_exception(courselike_key, exception, self.status, known=False)
|
|
finally:
|
|
if course_dir.isdir():
|
|
shutil.rmtree(course_dir)
|
|
LOGGER.info(f'{log_prefix}: Temp data cleared')
|
|
|
|
if self.status.state == 'Updating' and is_course:
|
|
# Reload the course so we have the latest state
|
|
course = modulestore().get_course(courselike_key)
|
|
if course.entrance_exam_enabled:
|
|
entrance_exam_chapter = modulestore().get_items(
|
|
course.id,
|
|
qualifiers={'category': 'chapter'},
|
|
settings={'is_entrance_exam': True}
|
|
)[0]
|
|
|
|
metadata = {'entrance_exam_id': str(entrance_exam_chapter.location)}
|
|
CourseMetadata.update_from_dict(metadata, course, user)
|
|
from .views.entrance_exam import add_entrance_exam_milestone
|
|
add_entrance_exam_milestone(course.id, entrance_exam_chapter)
|
|
LOGGER.info(f'Course import {course.id}: Entrance exam imported')
|
|
if is_course:
|
|
sync_discussion_settings(courselike_key, user)
|
|
|
|
|
|
@shared_task
|
|
@set_code_owner_attribute
|
|
def update_all_outlines_from_modulestore_task():
|
|
"""
|
|
Celery task that creates multiple celery tasks - one per learning_sequence course outline
|
|
to regenerate. The list of course keys to regenerate comes from the proxy model itself.
|
|
"""
|
|
course_key_list = [str(course_key) for course_key in CourseOutlineRegenerate.get_course_outline_ids()]
|
|
for course_key_str in course_key_list:
|
|
try:
|
|
course_key = CourseKey.from_string(course_key_str)
|
|
if not key_supports_outlines(course_key):
|
|
LOGGER.warning(
|
|
(
|
|
"update_multiple_outlines_from_modulestore_task called for course key"
|
|
" %s, which does not support learning_sequence outlines."
|
|
),
|
|
course_key_str
|
|
)
|
|
continue
|
|
|
|
update_outline_from_modulestore_task.delay(course_key_str)
|
|
except Exception: # pylint: disable=broad-except
|
|
# Swallow the exception to continue the loop through course keys - but log it.
|
|
LOGGER.exception("Could not create course outline for course %s", course_key_str)
|
|
|
|
|
|
@shared_task
|
|
@set_code_owner_attribute
|
|
def update_outline_from_modulestore_task(course_key_str: str):
|
|
"""
|
|
Celery task that creates a learning_sequence course outline.
|
|
"""
|
|
try:
|
|
course_key = CourseKey.from_string(course_key_str)
|
|
if not key_supports_outlines(course_key):
|
|
LOGGER.warning(
|
|
(
|
|
"update_outline_from_modulestore_task called for course key"
|
|
" %s, which does not support learning_sequence outlines."
|
|
),
|
|
course_key_str
|
|
)
|
|
return
|
|
|
|
update_outline_from_modulestore(course_key)
|
|
except Exception: # pylint: disable=broad-except
|
|
LOGGER.exception("Could not create course outline for course %s", course_key_str)
|
|
raise # Re-raise so that errors are noted in reporting.
|
|
|
|
|
|
def validate_course_olx(courselike_key, course_dir, status):
|
|
"""
|
|
Validates course olx and records the errors as an artifact.
|
|
|
|
Arguments:
|
|
courselike_key: A locator identifies a course resource.
|
|
course_dir: complete path to the course olx
|
|
status: UserTaskStatus object.
|
|
"""
|
|
olx_is_valid = True
|
|
log_prefix = f'Course import {courselike_key}'
|
|
validation_failed_mesg = 'CourseOlx validation failed.'
|
|
is_library = isinstance(courselike_key, LibraryLocator)
|
|
|
|
if is_library:
|
|
return olx_is_valid
|
|
|
|
if not course_import_olx_validation_is_enabled():
|
|
return olx_is_valid
|
|
try:
|
|
__, errorstore, __ = olxcleaner.validate(
|
|
filename=course_dir,
|
|
steps=settings.COURSE_OLX_VALIDATION_STAGE,
|
|
ignore=settings.COURSE_OLX_VALIDATION_IGNORE_LIST,
|
|
allowed_xblocks=ALL_ALLOWED_XBLOCKS
|
|
)
|
|
except Exception: # pylint: disable=broad-except
|
|
LOGGER.exception(f'{log_prefix}: CourseOlx could not be validated')
|
|
return olx_is_valid
|
|
|
|
has_errors = errorstore.return_error(ErrorLevel.ERROR.value)
|
|
if not has_errors:
|
|
return olx_is_valid
|
|
|
|
LOGGER.error(f'{log_prefix}: {validation_failed_mesg}')
|
|
log_errors_to_artifact(errorstore, status)
|
|
|
|
if bypass_olx_failure_enabled():
|
|
return olx_is_valid
|
|
|
|
monitor_import_failure(courselike_key, status.state, message=validation_failed_mesg)
|
|
status.fail(UserErrors.OLX_VALIDATION_FAILED)
|
|
return False
|
|
|
|
|
|
def log_errors_to_artifact(errorstore, status):
|
|
"""Log errors as a task artifact."""
|
|
|
|
def get_error_by_type(error_type):
|
|
return [error for error in error_report if error.startswith(error_type)]
|
|
|
|
error_summary = report_error_summary(errorstore)
|
|
error_report = report_errors(errorstore)
|
|
message = json.dumps({
|
|
'summary': error_summary,
|
|
'errors': get_error_by_type(ErrorLevel.ERROR.name),
|
|
'warnings': get_error_by_type(ErrorLevel.WARNING.name),
|
|
})
|
|
UserTaskArtifact.objects.create(status=status, name='OLX_VALIDATION_ERROR', text=message)
|
|
|
|
|
|
def handle_course_import_exception(courselike_key, exception, status, known=True):
|
|
"""
|
|
Handle course import exception and fail task status.
|
|
Arguments:
|
|
courselike_key: A locator identifies a course resource.
|
|
exception: Exception object
|
|
status: UserTaskStatus object.
|
|
known: boolean indicating if this is a known failure or unknown.
|
|
"""
|
|
exception_message = str(exception)
|
|
log_prefix = f"Course import {courselike_key}:"
|
|
LOGGER.exception(f"{log_prefix} Error while importing course: {exception_message}")
|
|
task_fail_message = UserErrors.UNKNOWN_ERROR_IN_IMPORT
|
|
monitor_import_failure(courselike_key, status.state, exception=exception)
|
|
|
|
if known:
|
|
task_fail_message = exception_message
|
|
|
|
if status.state != UserTaskStatus.FAILED:
|
|
status.fail(task_fail_message)
|
|
|
|
|
|
def _parse_organization(org_name):
|
|
"""Find a matching organization name, if one does not exist, specify that this is the *unspecfied* organization"""
|
|
try:
|
|
ensure_organization(org_name)
|
|
except InvalidOrganizationException:
|
|
return 'None'
|
|
return Organization.objects.get(short_name=org_name)
|
|
|
|
|
|
def copy_v1_user_roles_into_v2_library(v2_library_key, v1_library_key):
|
|
"""
|
|
write the access and edit permissions of a v1 library into a v2 library.
|
|
"""
|
|
|
|
def _get_users_by_access_level(v1_library_key):
|
|
"""
|
|
Get a permissions object for a library which contains a list of user IDs for every V2 permissions level,
|
|
based on V1 library roles.
|
|
The following mapping exists for a library:
|
|
V1 Library Role -> V2 Permission Level
|
|
LibraryUserRole -> READ_LEVEL
|
|
CourseStaffRole -> AUTHOR_LEVEL
|
|
CourseInstructorRole -> ADMIN_LEVEL
|
|
"""
|
|
permissions = {}
|
|
permissions[v2contentlib_api.AccessLevel.READ_LEVEL] = list(LibraryUserRole(v1_library_key).users_with_role())
|
|
permissions[v2contentlib_api.AccessLevel.AUTHOR_LEVEL] = list(CourseStaffRole(v1_library_key).users_with_role())
|
|
permissions[v2contentlib_api.AccessLevel.ADMIN_LEVEL] = list(
|
|
CourseInstructorRole(v1_library_key).users_with_role()
|
|
)
|
|
return permissions
|
|
|
|
permissions = _get_users_by_access_level(v1_library_key)
|
|
for access_level in permissions.keys(): # lint-amnesty, pylint: disable=consider-iterating-dictionary
|
|
for user in permissions[access_level]:
|
|
v2contentlib_api.set_library_user_permissions(v2_library_key, user, access_level)
|
|
|
|
|
|
def _create_copy_content_task(v2_library_key, v1_library_key):
|
|
"""
|
|
spin up a celery task to import the V1 Library's content into the V2 library.
|
|
This utilizes the fact that course and v1 library content is stored almost identically.
|
|
"""
|
|
return v2contentlib_api.import_blocks_create_task(
|
|
v2_library_key, v1_library_key,
|
|
use_course_key_as_block_id_suffix=False
|
|
)
|
|
|
|
|
|
@shared_task(time_limit=30)
|
|
@set_code_owner_attribute
|
|
def delete_v1_library(v1_library_key_string):
|
|
"""
|
|
Delete a v1 library index by key string.
|
|
"""
|
|
v1_library_key = CourseKey.from_string(v1_library_key_string)
|
|
if not modulestore().get_library(v1_library_key):
|
|
raise KeyError(f"Library not found: {v1_library_key}")
|
|
try:
|
|
delete_course(v1_library_key, ModuleStoreEnum.UserID.mgmt_command, True)
|
|
LOGGER.info(f"Deleted course {v1_library_key}")
|
|
except Exception as error: # lint-amnesty, pylint: disable=broad-except
|
|
return {
|
|
"v1_library_id": v1_library_key_string,
|
|
"status": "FAILED",
|
|
"msg":
|
|
f"Error occurred deleting library: {str(error)}"
|
|
}
|
|
|
|
return {
|
|
"v1_library_id": v1_library_key_string,
|
|
"status": "SUCCESS",
|
|
"msg": "SUCCESS"
|
|
}
|
|
|
|
|
|
@shared_task(time_limit=30)
|
|
@set_code_owner_attribute
|
|
def validate_all_library_source_blocks_ids_for_course(course_key_string, v1_to_v2_lib_map):
|
|
"""Search a Modulestore for all library source blocks in a course by querying mongo.
|
|
replace all source_library_ids with the corresponding v2 value from the map
|
|
"""
|
|
course_id = CourseKey.from_string(course_key_string)
|
|
store = modulestore()
|
|
with store.bulk_operations(course_id):
|
|
visited = []
|
|
for branch in [ModuleStoreEnum.BranchName.draft, ModuleStoreEnum.BranchName.published]:
|
|
blocks = store.get_items(
|
|
course_id.for_branch(branch),
|
|
settings={'source_library_id': {'$exists': True}}
|
|
)
|
|
for xblock in blocks:
|
|
if xblock.source_library_id not in v1_to_v2_lib_map.values():
|
|
# lint-amnesty, pylint: disable=broad-except
|
|
raise Exception(
|
|
f'{xblock.source_library_id} in {course_id} is not found in mapping. Validation failed'
|
|
)
|
|
visited.append(xblock.source_library_id)
|
|
# return sucess
|
|
return visited
|
|
|
|
|
|
@shared_task(time_limit=30)
|
|
@set_code_owner_attribute
|
|
def replace_all_library_source_blocks_ids_for_course(course_key_string, v1_to_v2_lib_map): # lint-amnesty, pylint: disable=useless-return
|
|
"""Search a Modulestore for all library source blocks in a course by querying mongo.
|
|
replace all source_library_ids with the corresponding v2 value from the map.
|
|
|
|
This will trigger a publish on the course for every published library source block.
|
|
"""
|
|
store = modulestore()
|
|
course_id = CourseKey.from_string(course_key_string)
|
|
|
|
with store.bulk_operations(course_id):
|
|
#for branch in [ModuleStoreEnum.BranchName.draft, ModuleStoreEnum.BranchName.published]:
|
|
draft_blocks, published_blocks = [
|
|
store.get_items(
|
|
course_id.for_branch(branch),
|
|
settings={'source_library_id': {'$exists': True}}
|
|
)
|
|
for branch in [ModuleStoreEnum.BranchName.draft, ModuleStoreEnum.BranchName.published]
|
|
]
|
|
|
|
published_dict = {block.location: block for block in published_blocks}
|
|
|
|
for draft_library_source_block in draft_blocks:
|
|
try:
|
|
new_source_id = str(v1_to_v2_lib_map[draft_library_source_block.source_library_id])
|
|
except KeyError:
|
|
#skip invalid keys
|
|
LOGGER.error(
|
|
'Key %s not found in mapping. Skipping block for course %s',
|
|
str({draft_library_source_block.source_library_id}),
|
|
str(course_id)
|
|
)
|
|
continue
|
|
|
|
# The publsihed branch should be updated as well as the draft branch
|
|
# This way, if authors "discard changes," they won't be reverted back to the V1 lib.
|
|
# However, we also don't want to publish the draft branch.
|
|
try:
|
|
if published_dict[draft_library_source_block.location] is not None:
|
|
#temporarily set the published version to be the draft & publish it.
|
|
temp = published_dict[draft_library_source_block.location]
|
|
temp.source_library_id = new_source_id
|
|
store.update_item(temp, None)
|
|
store.publish(temp.location, None)
|
|
draft_library_source_block.source_library_id = new_source_id
|
|
store.update_item(draft_library_source_block, None)
|
|
except KeyError:
|
|
#Warn, but just update the draft block if no published block for draft block.
|
|
LOGGER.warning(
|
|
'No matching published block for draft block %s',
|
|
str(draft_library_source_block.location)
|
|
)
|
|
draft_library_source_block.source_library_id = new_source_id
|
|
store.update_item(draft_library_source_block, None)
|
|
# return success
|
|
return
|
|
|
|
|
|
@shared_task(time_limit=30)
|
|
@set_code_owner_attribute
|
|
def undo_all_library_source_blocks_ids_for_course(course_key_string, v1_to_v2_lib_map): # lint-amnesty, pylint: disable=useless-return
|
|
"""Search a Modulestore for all library source blocks in a course by querying mongo.
|
|
replace all source_library_ids with the corresponding v1 value from the inverted map.
|
|
This is exists to undo changes made previously.
|
|
"""
|
|
course_id = CourseKey.from_string(course_key_string)
|
|
|
|
v2_to_v1_lib_map = {v: k for k, v in v1_to_v2_lib_map.items()}
|
|
|
|
store = modulestore()
|
|
draft_blocks, published_blocks = [
|
|
store.get_items(
|
|
course_id.for_branch(branch),
|
|
settings={'source_library_id': {'$exists': True}}
|
|
)
|
|
for branch in [ModuleStoreEnum.BranchName.draft, ModuleStoreEnum.BranchName.published]
|
|
]
|
|
|
|
published_dict = {block.location: block for block in published_blocks}
|
|
|
|
for draft_library_source_block in draft_blocks:
|
|
try:
|
|
new_source_id = str(v2_to_v1_lib_map[draft_library_source_block.source_library_id])
|
|
except KeyError:
|
|
#skip invalid keys
|
|
LOGGER.error(
|
|
'Key %s not found in mapping. Skipping block for course %s',
|
|
str({draft_library_source_block.source_library_id}),
|
|
str(course_id)
|
|
)
|
|
continue
|
|
|
|
# The publsihed branch should be updated as well as the draft branch
|
|
# This way, if authors "discard changes," they won't be reverted back to the V1 lib.
|
|
# However, we also don't want to publish the draft branch.
|
|
try:
|
|
if published_dict[draft_library_source_block.location] is not None:
|
|
#temporarily set the published version to be the draft & publish it.
|
|
temp = published_dict[draft_library_source_block.location]
|
|
temp.source_library_id = new_source_id
|
|
store.update_item(temp, None)
|
|
store.publish(temp.location, None)
|
|
draft_library_source_block.source_library_id = new_source_id
|
|
store.update_item(draft_library_source_block, None)
|
|
except KeyError:
|
|
#Warn, but just update the draft block if no published block for draft block.
|
|
LOGGER.warning(
|
|
'No matching published block for draft block %s',
|
|
str(draft_library_source_block.location)
|
|
)
|
|
draft_library_source_block.source_library_id = new_source_id
|
|
store.update_item(draft_library_source_block, None)
|
|
# return success
|
|
return
|
|
|
|
|
|
class CourseLinkCheckTask(UserTask): # pylint: disable=abstract-method
|
|
"""
|
|
Base class for course link check tasks.
|
|
"""
|
|
|
|
@staticmethod
|
|
def calculate_total_steps(arguments_dict):
|
|
"""
|
|
Get the number of in-progress steps in the link check process, as shown in the UI.
|
|
|
|
For reference, these are:
|
|
1. Scanning
|
|
"""
|
|
return 1
|
|
|
|
@classmethod
|
|
def generate_name(cls, arguments_dict):
|
|
"""
|
|
Create a name for this particular task instance.
|
|
|
|
Arguments:
|
|
arguments_dict (dict): The arguments given to the task function
|
|
|
|
Returns:
|
|
str: The generated name
|
|
"""
|
|
key = arguments_dict['course_key_string']
|
|
return f'Broken link check of {key}'
|
|
|
|
|
|
# -------------- Course optimizer functions ------------------
|
|
|
|
|
|
@shared_task(base=CourseLinkCheckTask, bind=True)
|
|
# Note: The decorator @set_code_owner_attribute cannot be used here because the UserTaskMixin
|
|
# does stack inspection and can't handle additional decorators.
|
|
def check_broken_links(self, user_id, course_key_string, language):
|
|
"""
|
|
Checks for broken links in a course and store the results in a file.
|
|
"""
|
|
set_code_owner_attribute_from_module(__name__)
|
|
return _check_broken_links(self, user_id, course_key_string, language)
|
|
|
|
|
|
def _check_broken_links(task_instance, user_id, course_key_string, language):
|
|
"""
|
|
Checks for broken links in a course and stores the results in a file.
|
|
Also checks for previous run links if the feature is enabled.
|
|
"""
|
|
user = _validate_user(task_instance, user_id, language)
|
|
|
|
task_instance.status.set_state(UserTaskStatus.IN_PROGRESS)
|
|
course_key = CourseKey.from_string(course_key_string)
|
|
|
|
url_list = _scan_course_for_links(course_key)
|
|
previous_run_links = []
|
|
urls_to_validate = url_list
|
|
|
|
if enable_course_optimizer_check_prev_run_links(course_key):
|
|
previous_run_course_key = get_previous_run_course_key(course_key)
|
|
if previous_run_course_key:
|
|
|
|
# Separate previous run links from regular links BEFORE validation
|
|
urls_to_validate = []
|
|
for block_id, url in url_list:
|
|
if contains_previous_course_reference(url, previous_run_course_key):
|
|
previous_run_links.append([block_id, url, LinkState.PREVIOUS_RUN])
|
|
else:
|
|
urls_to_validate.append([block_id, url])
|
|
|
|
validated_url_list = asyncio.run(_validate_urls_access_in_batches(urls_to_validate, course_key, batch_size=100))
|
|
broken_or_locked_urls, retry_list = _filter_by_status(validated_url_list)
|
|
|
|
if retry_list:
|
|
retry_results = _retry_validation(retry_list, course_key, retry_count=3)
|
|
broken_or_locked_urls.extend(retry_results)
|
|
|
|
all_links = broken_or_locked_urls + previous_run_links
|
|
try:
|
|
task_instance.status.increment_completed_steps()
|
|
|
|
file_name = str(course_key)
|
|
broken_links_file = NamedTemporaryFile(prefix=file_name + '.', suffix='.json')
|
|
LOGGER.debug(f'[Link Check] json file being generated at {broken_links_file.name}')
|
|
|
|
with open(broken_links_file.name, 'w') as file:
|
|
json.dump(all_links, file, indent=4)
|
|
|
|
_write_broken_links_to_file(all_links, broken_links_file)
|
|
|
|
artifact = UserTaskArtifact(status=task_instance.status, name='BrokenLinks')
|
|
_save_broken_links_file(artifact, broken_links_file)
|
|
|
|
# catch all exceptions so we can record useful error messages
|
|
except Exception as e: # pylint: disable=broad-except
|
|
LOGGER.exception('Error checking links for course %s', course_key, exc_info=True)
|
|
if task_instance.status.state != UserTaskStatus.FAILED:
|
|
task_instance.status.fail({'raw_error_msg': str(e)})
|
|
|
|
|
|
def _validate_user(task, user_id, language):
|
|
"""Validate if the user exists. Otherwise log an unknown user id error."""
|
|
try:
|
|
return User.objects.get(pk=user_id)
|
|
except User.DoesNotExist as exc:
|
|
with translation_language(language):
|
|
task.status.fail(UserErrors.UNKNOWN_USER_ID.format(user_id))
|
|
return
|
|
|
|
|
|
def _scan_course_for_links(course_key):
|
|
"""
|
|
Scans a course for links found in the data contents of
|
|
blocks, course updates, handouts, and custom pages.
|
|
|
|
Returns:
|
|
list: block id and URL pairs
|
|
|
|
Example return:
|
|
[
|
|
[block_id1, url1],
|
|
[block_id2, url2],
|
|
...
|
|
]
|
|
"""
|
|
verticals = modulestore().get_items(
|
|
course_key,
|
|
qualifiers={'category': 'vertical'},
|
|
revision=ModuleStoreEnum.RevisionOption.published_only
|
|
)
|
|
blocks = []
|
|
urls_to_validate = []
|
|
course = modulestore().get_course(course_key)
|
|
|
|
for vertical in verticals:
|
|
blocks.extend(vertical.get_children())
|
|
|
|
for block in blocks:
|
|
# Excluding 'drag-and-drop-v2' as it contains data of object type instead of string, causing errors,
|
|
# and it doesn't contain user-facing links to scan.
|
|
if block.category == 'drag-and-drop-v2':
|
|
continue
|
|
block_id = str(block.usage_key)
|
|
block_info = get_block_info(block)
|
|
block_data = block_info['data']
|
|
url_list = extract_content_URLs_from_course(block_data)
|
|
urls_to_validate += [[block_id, url] for url in url_list]
|
|
|
|
course_updates_data = _scan_course_updates_for_links(course)
|
|
handouts_data = _scan_course_handouts_for_links(course)
|
|
custom_pages_data = _scan_custom_pages_for_links(course)
|
|
|
|
for update in course_updates_data:
|
|
for url in update['urls']:
|
|
urls_to_validate.append([update['block_id'], url])
|
|
|
|
for handout in handouts_data:
|
|
for url in handout['urls']:
|
|
urls_to_validate.append([handout['block_id'], url])
|
|
|
|
for page in custom_pages_data:
|
|
for url in page['urls']:
|
|
urls_to_validate.append([page['block_id'], url])
|
|
|
|
return urls_to_validate
|
|
|
|
|
|
def extract_content_URLs_from_course(content):
|
|
"""
|
|
Finds and returns a list of URLs in the given content.
|
|
Uses multiple regex patterns to find URLs in various contexts:
|
|
- URLs in href and src attributes
|
|
- Standalone URLs starting with http(s)://
|
|
Excludes strings that are only '#' or start with 'data:'.
|
|
|
|
Arguments:
|
|
content (str): entire content of a block
|
|
|
|
Returns:
|
|
list: urls
|
|
"""
|
|
url_list = set()
|
|
|
|
# Regex to match URLs in href and src attributes, or standalone URLs
|
|
regex = (
|
|
r'(?:href|src)=["\'](?!#|data:)([^"\']+)["\']'
|
|
r'|(?:^|[\s\'"(<>])((?:https?://|http://|https://|www\.)[^\s\'")<>]+)(?=[\s\'")<>]|$)'
|
|
)
|
|
|
|
# Update list to include URLs found in the content
|
|
matches = re.findall(regex, content, re.IGNORECASE)
|
|
for match in matches:
|
|
url = match[0] or match[1]
|
|
if url:
|
|
url_list.add(url)
|
|
|
|
return url_list
|
|
|
|
|
|
def _scan_course_updates_for_links(course):
|
|
"""
|
|
Scans course updates for links.
|
|
|
|
Returns:
|
|
list: course update data with links
|
|
"""
|
|
course_updates = []
|
|
try:
|
|
store = modulestore()
|
|
usage_key = create_course_info_usage_key(course, "updates")
|
|
updates_block = store.get_item(usage_key)
|
|
|
|
if updates_block and hasattr(updates_block, "data"):
|
|
update_items = get_course_update_items(updates_block)
|
|
|
|
for update in update_items:
|
|
if update.get("status") != "deleted":
|
|
update_content = update.get("content", "")
|
|
url_list = extract_content_URLs_from_course(update_content)
|
|
|
|
course_updates.append(
|
|
{
|
|
"displayName": update.get("date", "Unknown"),
|
|
"block_id": str(usage_key),
|
|
"urls": url_list,
|
|
}
|
|
)
|
|
|
|
return course_updates
|
|
|
|
return course_updates
|
|
except Exception as e: # pylint: disable=broad-exception-caught
|
|
LOGGER.debug(f"Error scanning course updates: {e}")
|
|
return course_updates
|
|
|
|
|
|
def _scan_course_handouts_for_links(course):
|
|
"""
|
|
Scans course handouts for links.
|
|
|
|
Returns:
|
|
list: handouts data with links
|
|
"""
|
|
|
|
course_handouts = []
|
|
try:
|
|
store = modulestore()
|
|
usage_key = create_course_info_usage_key(course, "handouts")
|
|
handouts_block = store.get_item(usage_key)
|
|
|
|
if handouts_block and hasattr(handouts_block, "data") and handouts_block.data:
|
|
url_list = extract_content_URLs_from_course(handouts_block.data)
|
|
course_handouts.append(
|
|
{"name": "handouts", "block_id": str(usage_key), "urls": url_list}
|
|
)
|
|
|
|
return course_handouts
|
|
except Exception as e: # pylint: disable=broad-exception-caught
|
|
LOGGER.debug(f"Error scanning course handouts: {e}")
|
|
return course_handouts
|
|
|
|
|
|
def _scan_custom_pages_for_links(course):
|
|
"""
|
|
Scans custom pages (static tabs) for links.
|
|
|
|
Returns:
|
|
list: custom pages data with links
|
|
"""
|
|
|
|
custom_pages = []
|
|
try:
|
|
store = modulestore()
|
|
course_key = course.id
|
|
|
|
for tab in course.tabs:
|
|
if isinstance(tab, StaticTab):
|
|
try:
|
|
# Get the static tab content
|
|
static_tab_loc = course_key.make_usage_key(
|
|
"static_tab", tab.url_slug
|
|
)
|
|
static_tab_block = store.get_item(static_tab_loc)
|
|
|
|
if static_tab_block and hasattr(static_tab_block, "data"):
|
|
url_list = extract_content_URLs_from_course(static_tab_block.data)
|
|
|
|
custom_pages.append(
|
|
{
|
|
"displayName": tab.name,
|
|
"block_id": str(static_tab_loc),
|
|
"urls": url_list,
|
|
}
|
|
)
|
|
except Exception as e: # pylint: disable=broad-exception-caught
|
|
LOGGER.debug(f"Error scanning static tab {tab.name}: {e}")
|
|
continue
|
|
|
|
return custom_pages
|
|
except Exception as e: # pylint: disable=broad-exception-caught
|
|
LOGGER.debug(f"Error scanning custom pages: {e}")
|
|
return custom_pages
|
|
|
|
|
|
async def _validate_urls_access_in_batches(url_list, course_key, batch_size=100):
|
|
"""
|
|
Returns the statuses of a list of URL requests.
|
|
|
|
Arguments:
|
|
url_list (list): block id and URL pairs
|
|
|
|
Returns:
|
|
list: dictionary containing URL, associated block id, and request status
|
|
"""
|
|
responses = []
|
|
url_count = len(url_list)
|
|
|
|
for i in range(0, url_count, batch_size):
|
|
batch = url_list[i:i + batch_size]
|
|
batch_results = await _validate_batch(batch, course_key)
|
|
responses.extend(batch_results)
|
|
LOGGER.debug(f'[Link Check] request batch {i // batch_size + 1} of {url_count // batch_size + 1}')
|
|
|
|
return responses
|
|
|
|
|
|
async def _validate_batch(batch, course_key):
|
|
"""Validate a batch of URLs"""
|
|
async with aiohttp.ClientSession(headers=DEFAULT_HEADERS) as session:
|
|
tasks = [_validate_url_access(session, url_data, course_key) for url_data in batch]
|
|
batch_results = await asyncio.gather(*tasks)
|
|
return batch_results
|
|
|
|
|
|
async def _validate_url_access(session, url_data, course_key):
|
|
"""
|
|
Validates a URL.
|
|
|
|
Arguments:
|
|
url_data (list): block id and URL pairs
|
|
course_key (str): locator id for a course
|
|
|
|
Returns:
|
|
dict: URL, associated block id, and request status
|
|
|
|
Example return:
|
|
{
|
|
'block_id': block_id1,
|
|
'url': url1,
|
|
'status': status
|
|
}
|
|
"""
|
|
block_id, url = url_data
|
|
url = url.strip() # Trim leading/trailing whitespace
|
|
result = {'block_id': block_id, 'url': url}
|
|
standardized_url = _convert_to_standard_url(url, course_key)
|
|
try:
|
|
async with session.get(standardized_url, timeout=5) as response:
|
|
result.update({'status': response.status})
|
|
except Exception as e: # lint-amnesty, pylint: disable=broad-except
|
|
result.update({'status': None})
|
|
LOGGER.debug(f'[Link Check] Request error when validating {url}: {str(e)}')
|
|
return result
|
|
|
|
|
|
def _convert_to_standard_url(url, course_key):
|
|
"""
|
|
Returns standard URLs when given studio URLs. Otherwise returns the URL as is.
|
|
|
|
Example URLs:
|
|
/assets/courseware/v1/506da5d6f866e8f0be44c5df8b6e6b2a/...
|
|
...asset-v1:edX+DemoX+Demo_Course+type@asset+block/getting-started_x250.png
|
|
/static/getting-started_x250.png
|
|
/container/block-v1:edX+DemoX+Demo_Course+type@vertical+block@2152d4a4aadc4cb0af5256394a3d1fc7
|
|
/jump_to_id/2152d4a4aadc4cb0af5256394a3d1fc7
|
|
"""
|
|
if _is_studio_url_without_base(url):
|
|
if url.startswith('/static/'):
|
|
processed_url = replace_static_urls(f'\"{url}\"', course_id=course_key)[1:-1]
|
|
return 'https://' + settings.CMS_BASE + processed_url
|
|
elif url.startswith('/jump_to_id/'):
|
|
return f'https://{settings.LMS_BASE}/courses/{course_key}{url}'
|
|
elif url.startswith('/'):
|
|
return 'https://' + settings.CMS_BASE + url
|
|
else:
|
|
return 'https://' + settings.CMS_BASE + '/container/' + url
|
|
else:
|
|
return url
|
|
|
|
|
|
def _is_studio_url(url):
|
|
"""Returns True if url is a studio url."""
|
|
return _is_studio_url_with_base(url) or _is_studio_url_without_base(url)
|
|
|
|
|
|
def _is_studio_url_with_base(url):
|
|
"""Returns True if url is a studio url with cms base."""
|
|
return url.startswith('http://' + settings.CMS_BASE) or url.startswith('https://' + settings.CMS_BASE)
|
|
|
|
|
|
def _is_studio_url_without_base(url):
|
|
"""Returns True if url is a studio url without cms base."""
|
|
return not url.startswith('http://') and not url.startswith('https://')
|
|
|
|
|
|
def _filter_by_status(results):
|
|
"""
|
|
Filter results by status.
|
|
|
|
Statuses:
|
|
200: OK. No need to do more
|
|
403: Forbidden. Record as locked link if it is studio link.
|
|
403: Forbidden. Record as external-forbidden link if it is external link
|
|
None: Error. Retry up to 3 times.
|
|
Other: Failure. Record as broken link.
|
|
|
|
Arguments:
|
|
results (list): URL, associated block id, and request status
|
|
|
|
Returns:
|
|
filtered_results (list): list of block id, URL and if URL is locked
|
|
retry_list (list): block id and url pairs
|
|
|
|
Example return:
|
|
[
|
|
[block_id1, filtered_results_url1, link_state],
|
|
...
|
|
],
|
|
[
|
|
[block_id1, retry_url1],
|
|
...
|
|
]
|
|
"""
|
|
filtered_results = []
|
|
retry_list = []
|
|
for result in results:
|
|
status, block_id, url = result['status'], result['block_id'], result['url']
|
|
if status is None and _is_studio_url(url):
|
|
retry_list.append([block_id, url])
|
|
elif status == 200:
|
|
continue
|
|
elif status == 403 and _is_studio_url(url):
|
|
filtered_results.append([block_id, url, LinkState.LOCKED])
|
|
elif status in [403, 500, None] and not _is_studio_url(url):
|
|
filtered_results.append([block_id, url, LinkState.EXTERNAL_FORBIDDEN])
|
|
else:
|
|
filtered_results.append([block_id, url, LinkState.BROKEN])
|
|
|
|
return filtered_results, retry_list
|
|
|
|
|
|
def _retry_validation(url_list, course_key, retry_count=3):
|
|
"""
|
|
Retry validation for URLs that failed due to connection error.
|
|
|
|
Returns:
|
|
list: URLs that could not be validated due to being locked or due to persistent connection problems
|
|
"""
|
|
results = []
|
|
retry_list = url_list
|
|
for i in range(retry_count):
|
|
if retry_list:
|
|
LOGGER.debug(f'[Link Check] retry attempt #{i + 1}')
|
|
retry_list = _retry_validation_and_filter_results(course_key, results, retry_list)
|
|
results.extend(retry_list)
|
|
|
|
return results
|
|
|
|
|
|
def _retry_validation_and_filter_results(course_key, results, retry_list):
|
|
"""
|
|
Validates URLs and then filter them by status.
|
|
|
|
Arguments:
|
|
retry_list: list of urls to retry
|
|
|
|
Returns:
|
|
list: URLs that did not pass validation and should be retried
|
|
"""
|
|
validated_url_list = asyncio.run(
|
|
_validate_urls_access_in_batches(retry_list, course_key, batch_size=100)
|
|
)
|
|
filtered_url_list, retry_list = _filter_by_status(validated_url_list)
|
|
results.extend(filtered_url_list)
|
|
return retry_list
|
|
|
|
|
|
def _save_broken_links_file(artifact, file_to_save):
|
|
artifact.file.save(name=os.path.basename(file_to_save.name), content=File(file_to_save))
|
|
artifact.save()
|
|
return True
|
|
|
|
|
|
def _write_broken_links_to_file(broken_or_locked_urls, broken_links_file):
|
|
with open(broken_links_file.name, 'w') as file:
|
|
json.dump(broken_or_locked_urls, file, indent=4)
|
|
|
|
|
|
@shared_task
|
|
@set_code_owner_attribute
|
|
def handle_create_or_update_xblock_upstream_link(usage_key):
|
|
"""
|
|
Create or update upstream link for a single xblock.
|
|
"""
|
|
ensure_cms("handle_create_or_update_xblock_upstream_link may only be executed in a CMS context")
|
|
try:
|
|
xblock = modulestore().get_item(UsageKey.from_string(usage_key))
|
|
except (ItemNotFoundError, InvalidKeyError):
|
|
LOGGER.exception(f'Could not find item for given usage_key: {usage_key}')
|
|
return
|
|
if not xblock.upstream or not xblock.upstream_version:
|
|
return
|
|
create_or_update_xblock_upstream_link(xblock, xblock.course_id)
|
|
|
|
|
|
@shared_task
|
|
@set_code_owner_attribute
|
|
def create_or_update_upstream_links(
|
|
course_key_str: str,
|
|
force: bool = False,
|
|
replace: bool = False,
|
|
created: datetime | None = None,
|
|
):
|
|
"""
|
|
A Celery task to create or update upstream downstream links in database from course xblock content.
|
|
"""
|
|
ensure_cms("create_or_update_upstream_links may only be executed in a CMS context")
|
|
|
|
if not created:
|
|
created = datetime.now(timezone.utc)
|
|
course_status = LearningContextLinksStatus.get_or_create(course_key_str, created)
|
|
if course_status.status in [
|
|
LearningContextLinksStatusChoices.COMPLETED,
|
|
LearningContextLinksStatusChoices.PROCESSING
|
|
] and not force:
|
|
return
|
|
store = modulestore()
|
|
course_key = CourseKey.from_string(course_key_str)
|
|
course_status.update_status(
|
|
LearningContextLinksStatusChoices.PROCESSING,
|
|
updated=created,
|
|
)
|
|
if replace:
|
|
ComponentLink.objects.filter(downstream_context_key=course_key).delete()
|
|
ContainerLink.objects.filter(downstream_context_key=course_key).delete()
|
|
try:
|
|
xblocks = store.get_items(course_key, settings={"upstream": lambda x: x is not None})
|
|
except ItemNotFoundError:
|
|
LOGGER.exception(f'Could not find items for given course: {course_key}')
|
|
course_status.update_status(LearningContextLinksStatusChoices.FAILED)
|
|
return
|
|
for xblock in xblocks:
|
|
create_or_update_xblock_upstream_link(xblock, course_key, created)
|
|
course_status.update_status(LearningContextLinksStatusChoices.COMPLETED)
|
|
|
|
|
|
@shared_task
|
|
@set_code_owner_attribute
|
|
def handle_unlink_upstream_block(upstream_usage_key_string: str) -> None:
|
|
"""
|
|
Handle updates needed to downstream blocks when the upstream link is severed.
|
|
"""
|
|
ensure_cms("handle_unlink_upstream_block may only be executed in a CMS context")
|
|
|
|
try:
|
|
upstream_usage_key = UsageKey.from_string(upstream_usage_key_string)
|
|
except (InvalidKeyError):
|
|
LOGGER.exception(f'Invalid upstream usage_key: {upstream_usage_key_string}')
|
|
return
|
|
|
|
for link in ComponentLink.objects.filter(
|
|
upstream_usage_key=upstream_usage_key,
|
|
):
|
|
make_copied_tags_editable(str(link.downstream_usage_key))
|
|
|
|
|
|
@shared_task
|
|
@set_code_owner_attribute
|
|
def handle_unlink_upstream_container(upstream_container_key_string: str) -> None:
|
|
"""
|
|
Handle updates needed to downstream blocks when the upstream link is severed.
|
|
"""
|
|
ensure_cms("handle_unlink_upstream_container may only be executed in a CMS context")
|
|
|
|
try:
|
|
upstream_container_key = LibraryContainerLocator.from_string(upstream_container_key_string)
|
|
except (InvalidKeyError):
|
|
LOGGER.exception(f'Invalid upstream container_key: {upstream_container_key_string}')
|
|
return
|
|
|
|
for link in ContainerLink.objects.filter(
|
|
upstream_container_key=upstream_container_key,
|
|
):
|
|
make_copied_tags_editable(str(link.downstream_usage_key))
|