Add indexing support for blockstore content libraries

This commit is contained in:
Sid Verma
2020-07-17 18:50:30 +05:30
committed by Kyle McCormick
parent 34d1e46d25
commit 8d33a5a3e1
13 changed files with 415 additions and 6 deletions

View File

@@ -143,6 +143,7 @@ YOUTUBE['TEXT_API']['url'] = "{0}:{1}/test_transcripts_youtube/".format(YOUTUBE_
FEATURES['ENABLE_COURSEWARE_INDEX'] = True
FEATURES['ENABLE_LIBRARY_INDEX'] = True
FEATURES['ENABLE_CONTENT_LIBRARY_INDEX'] = False
FEATURES['ORGANIZATIONS_APP'] = True
SEARCH_ENGINE = "search.tests.mock_search_engine.MockSearchEngine"

View File

@@ -254,9 +254,12 @@ FEATURES = {
# Enable the courseware search functionality
'ENABLE_COURSEWARE_INDEX': False,
# Enable content libraries search functionality
# Enable content libraries (modulestore) search functionality
'ENABLE_LIBRARY_INDEX': False,
# Enable content libraries (blockstore) indexing
'ENABLE_CONTENT_LIBRARY_INDEX': False,
# Enable course reruns, which will always use the split modulestore
'ALLOW_COURSE_RERUNS': True,

View File

@@ -131,6 +131,7 @@ XBLOCK_SETTINGS.update({'VideoBlock': {'licensing_enabled': True}})
################################ SEARCH INDEX ################################
FEATURES['ENABLE_COURSEWARE_INDEX'] = False
FEATURES['ENABLE_LIBRARY_INDEX'] = False
FEATURES['ENABLE_CONTENT_LIBRARY_INDEX'] = False
SEARCH_ENGINE = "search.elastic.ElasticSearchEngine"
################################ COURSE DISCUSSIONS ###########################

View File

@@ -471,7 +471,7 @@ PARSE_KEYS = AUTH_TOKENS.get("PARSE_KEYS", {})
# Example: {'CN': 'http://api.xuetangx.com/edx/video?s3_url='}
VIDEO_CDN_URL = ENV_TOKENS.get('VIDEO_CDN_URL', {})
if FEATURES['ENABLE_COURSEWARE_INDEX'] or FEATURES['ENABLE_LIBRARY_INDEX']:
if FEATURES['ENABLE_COURSEWARE_INDEX'] or FEATURES['ENABLE_LIBRARY_INDEX'] or FEATURES['ENABLE_CONTENT_LIBRARY_INDEX']:
# Use ElasticSearch for the search engine
SEARCH_ENGINE = "search.elastic.ElasticSearchEngine"

View File

@@ -245,6 +245,7 @@ VIDEO_CDN_URL = {
# Courseware Search Index
FEATURES['ENABLE_COURSEWARE_INDEX'] = True
FEATURES['ENABLE_LIBRARY_INDEX'] = True
FEATURES['ENABLE_CONTENT_LIBRARY_INDEX'] = False
SEARCH_ENGINE = "search.tests.mock_search_engine.MockSearchEngine"
FEATURES['ENABLE_ENROLLMENT_TRACK_USER_PARTITION'] = True

View File

@@ -54,8 +54,17 @@ from xblock.core import XBlock
from xblock.exceptions import XBlockNotFoundError
from openedx.core.djangoapps.content_libraries import permissions
from openedx.core.djangoapps.content_libraries.constants import DRAFT_NAME
from openedx.core.djangoapps.content_libraries.library_bundle import LibraryBundle
from openedx.core.djangoapps.content_libraries.models import ContentLibrary, ContentLibraryPermission
from openedx.core.djangoapps.content_libraries.signals import (
CONTENT_LIBRARY_CREATED,
CONTENT_LIBRARY_UPDATED,
CONTENT_LIBRARY_DELETED,
LIBRARY_BLOCK_CREATED,
LIBRARY_BLOCK_UPDATED,
LIBRARY_BLOCK_DELETED,
)
from openedx.core.djangoapps.xblock.api import get_block_display_name, load_block
from openedx.core.djangoapps.xblock.learning_context.manager import get_learning_context_impl
from openedx.core.djangoapps.xblock.runtime.olx_parsing import XBlockInclude
@@ -77,10 +86,6 @@ from openedx.core.djangolib.blockstore_cache import BundleCache
log = logging.getLogger(__name__)
# This API is only used in Studio, so we always work with this draft of any
# content library bundle:
DRAFT_NAME = 'studio_draft'
# Exceptions:
ContentLibraryNotFound = ContentLibrary.DoesNotExist
@@ -302,6 +307,7 @@ def create_library(collection_uuid, org, slug, title, description, allow_public_
except IntegrityError:
delete_bundle(bundle.uuid)
raise LibraryAlreadyExists(slug)
CONTENT_LIBRARY_CREATED.send(sender=None, library_key=ref.library_key)
return ContentLibraryMetadata(
key=ref.library_key,
bundle_uuid=bundle.uuid,
@@ -396,6 +402,7 @@ def update_library(
assert isinstance(description, six.string_types)
fields["description"] = description
update_bundle(ref.bundle_uuid, **fields)
CONTENT_LIBRARY_UPDATED.send(sender=None, library_key=ref.library_key)
def delete_library(library_key):
@@ -410,6 +417,7 @@ def delete_library(library_key):
# system, which is a better state than having a reference to a library with
# no backing blockstore bundle.
ref.delete()
CONTENT_LIBRARY_DELETED.send(sender=None, library_key=ref.library_key)
try:
delete_bundle(bundle_uuid)
except:
@@ -508,6 +516,7 @@ def set_library_block_olx(usage_key, new_olx_str):
write_draft_file(draft.uuid, metadata.def_key.olx_path, new_olx_str.encode('utf-8'))
# Clear the bundle cache so everyone sees the new block immediately:
BundleCache(metadata.def_key.bundle_uuid, draft_name=DRAFT_NAME).clear()
LIBRARY_BLOCK_UPDATED.send(sender=None, library_key=usage_key.context_key)
def create_library_block(library_key, block_type, definition_id):
@@ -549,6 +558,7 @@ def create_library_block(library_key, block_type, definition_id):
# Clear the bundle cache so everyone sees the new block immediately:
BundleCache(ref.bundle_uuid, draft_name=DRAFT_NAME).clear()
# Now return the metadata about the new block:
LIBRARY_BLOCK_CREATED.send(sender=None, library_key=ref.library_key)
return get_library_block(usage_key)
@@ -601,6 +611,7 @@ def delete_library_block(usage_key, remove_from_parent=True):
pass
# Clear the bundle cache so everyone sees the deleted block immediately:
lib_bundle.cache.clear()
LIBRARY_BLOCK_DELETED.send(sender=None, library_key=lib_bundle.library_key)
def create_library_block_child(parent_usage_key, block_type, definition_id):
@@ -672,6 +683,7 @@ def add_library_block_static_asset_file(usage_key, file_name, file_content):
file_metadata = blockstore_cache.get_bundle_file_metadata_with_cache(
bundle_uuid=def_key.bundle_uuid, path=file_path, draft_name=DRAFT_NAME,
)
LIBRARY_BLOCK_UPDATED.send(sender=None, library_key=lib_bundle.library_key)
return LibraryXBlockStaticFile(path=file_metadata.path, url=file_metadata.url, size=file_metadata.size)
@@ -692,6 +704,7 @@ def delete_library_block_static_asset_file(usage_key, file_name):
write_draft_file(draft.uuid, file_path, contents=None)
# Clear the bundle cache so everyone sees the new file immediately:
lib_bundle.cache.clear()
LIBRARY_BLOCK_UPDATED.send(sender=None, library_key=lib_bundle.library_key)
def get_allowed_block_types(library_key): # pylint: disable=unused-argument
@@ -778,6 +791,7 @@ def create_bundle_link(library_key, link_id, target_opaque_key, version=None):
set_draft_link(draft.uuid, link_id, target_bundle_uuid, version)
# Clear the cache:
LibraryBundle(library_key, ref.bundle_uuid, draft_name=DRAFT_NAME).cache.clear()
CONTENT_LIBRARY_UPDATED.send(sender=None, library_key=library_key)
def update_bundle_link(library_key, link_id, version=None, delete=False):
@@ -801,6 +815,7 @@ def update_bundle_link(library_key, link_id, version=None, delete=False):
set_draft_link(draft.uuid, link_id, link.bundle_uuid, version)
# Clear the cache:
LibraryBundle(library_key, ref.bundle_uuid, draft_name=DRAFT_NAME).cache.clear()
CONTENT_LIBRARY_UPDATED.send(sender=None, library_key=library_key)
def publish_changes(library_key):
@@ -816,6 +831,7 @@ def publish_changes(library_key):
return # If there is no draft, no action is needed.
LibraryBundle(library_key, ref.bundle_uuid).cache.clear()
LibraryBundle(library_key, ref.bundle_uuid, draft_name=DRAFT_NAME).cache.clear()
CONTENT_LIBRARY_UPDATED.send(sender=None, library_key=library_key)
def revert_changes(library_key):
@@ -831,3 +847,4 @@ def revert_changes(library_key):
else:
return # If there is no draft, no action is needed.
LibraryBundle(library_key, ref.bundle_uuid, draft_name=DRAFT_NAME).cache.clear()
CONTENT_LIBRARY_UPDATED.send(sender=None, library_key=library_key)

View File

@@ -0,0 +1,5 @@
""" Constants used for the content libraries. """
# This API is only used in Studio, so we always work with this draft of any
# content library bundle:
DRAFT_NAME = 'studio_draft'

View File

@@ -0,0 +1,152 @@
""" Code to allow indexing content libraries """
import logging
from django.conf import settings
from django.dispatch import receiver
from elasticsearch.exceptions import ConnectionError as ElasticConnectionError
from search.search_engine_base import SearchEngine
from openedx.core.djangoapps.content_libraries.constants import DRAFT_NAME
from openedx.core.djangoapps.content_libraries.signals import (
CONTENT_LIBRARY_CREATED,
CONTENT_LIBRARY_UPDATED,
CONTENT_LIBRARY_DELETED,
LIBRARY_BLOCK_CREATED,
LIBRARY_BLOCK_UPDATED,
LIBRARY_BLOCK_DELETED,
)
from openedx.core.djangoapps.content_libraries.library_bundle import LibraryBundle
from openedx.core.djangoapps.content_libraries.models import ContentLibrary
from openedx.core.lib.blockstore_api import get_bundle
log = logging.getLogger(__name__)
MAX_SIZE = 10000 # 10000 is the maximum records elastic is able to return in a single result. Defaults to 10.
class LibraryNotIndexedException(Exception):
"""
Library supplied wasn't indexed in ElasticSearch
"""
class ContentLibraryIndexer:
"""
Class to perform indexing for blockstore-based content libraries
"""
INDEX_NAME = "content_library_index"
LIBRARY_DOCUMENT_TYPE = "content_library"
@classmethod
def index_libraries(cls, library_keys):
"""
Index the specified libraries. If they already exist, replace them with new ones.
"""
searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
library_dicts = []
for library_key in library_keys:
ref = ContentLibrary.objects.get_by_key(library_key)
lib_bundle = LibraryBundle(library_key, ref.bundle_uuid, draft_name=DRAFT_NAME)
usages = lib_bundle.get_top_level_usages()
(has_unpublished_changes, has_unpublished_deletes) = lib_bundle.has_changes()
bundle_metadata = get_bundle(ref.bundle_uuid)
library_dict = {
"id": str(library_key),
"uuid": str(bundle_metadata.uuid),
"title": bundle_metadata.title,
"description": bundle_metadata.description,
"version": bundle_metadata.latest_version,
"num_blocks": len(usages),
"has_unpublished_changes": has_unpublished_changes,
"has_unpublished_deletes": has_unpublished_deletes,
}
library_dicts.append(library_dict)
return searcher.index(cls.LIBRARY_DOCUMENT_TYPE, library_dicts)
@classmethod
def get_libraries(cls, library_keys):
"""
Retrieve a list of libraries from the index
"""
searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
library_keys_str = [str(key) for key in library_keys]
response = searcher.search(
doc_type=cls.LIBRARY_DOCUMENT_TYPE,
field_dictionary={"id": library_keys_str},
size=MAX_SIZE,
)
# Search results may not retain the original order of keys - we use this
# dict to construct a list in the original order of library_keys
response_dict = {
result["data"]["id"]: result["data"]
for result in response["results"]
}
if len(response_dict) != len(library_keys_str):
missing = set(library_keys_str) - set(response_dict.keys())
raise LibraryNotIndexedException("Keys not found in index: {}".format(missing))
return [
response_dict[key]
for key in library_keys_str
]
@classmethod
def remove_libraries(cls, library_keys):
"""
Remove the provided library_keys from the index
"""
searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
ids_str = [str(key) for key in library_keys]
searcher.remove(cls.LIBRARY_DOCUMENT_TYPE, ids_str)
@classmethod
def remove_all_libraries(cls):
"""
Remove all libraries from the index
"""
searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
response = searcher.search(doc_type=cls.LIBRARY_DOCUMENT_TYPE, filter_dictionary={}, size=MAX_SIZE)
ids = [result["data"]["id"] for result in response["results"]]
searcher.remove(cls.LIBRARY_DOCUMENT_TYPE, ids)
@classmethod
def indexing_is_enabled(cls):
"""
Checks to see if the indexing feature is enabled
"""
return settings.FEATURES.get("ENABLE_CONTENT_LIBRARY_INDEX", False)
@receiver(CONTENT_LIBRARY_CREATED)
@receiver(CONTENT_LIBRARY_UPDATED)
@receiver(LIBRARY_BLOCK_CREATED)
@receiver(LIBRARY_BLOCK_UPDATED)
@receiver(LIBRARY_BLOCK_DELETED)
def index_library(sender, library_key, **kwargs): # pylint: disable=unused-argument
"""
Index library when created or updated, or when its blocks are modified.
"""
if ContentLibraryIndexer.indexing_is_enabled():
try:
ContentLibraryIndexer.index_libraries([library_key])
except ElasticConnectionError as e:
log.exception(e)
@receiver(CONTENT_LIBRARY_DELETED)
def remove_library_index(sender, library_key, **kwargs): # pylint: disable=unused-argument
"""
Remove from index when library is deleted
"""
if ContentLibraryIndexer.indexing_is_enabled():
try:
ContentLibraryIndexer.remove_libraries([library_key])
except ElasticConnectionError as e:
log.exception(e)

View File

@@ -0,0 +1,62 @@
""" Management command to update content libraries' search index """
from textwrap import dedent
from django.core.management import BaseCommand
from opaque_keys.edx.locator import LibraryLocatorV2
from openedx.core.djangoapps.content_libraries.libraries_index import ContentLibraryIndexer
from openedx.core.djangoapps.content_libraries.models import ContentLibrary
from cms.djangoapps.contentstore.management.commands.prompt import query_yes_no
class Command(BaseCommand):
"""
Command to reindex blockstore-based content libraries (single, multiple or all available).
This isn't needed on a regular basis as signals in various library APIs update the index when creating, updating or
deleting libraries.
This is usually required when the schema of the index changes, or if indexes are out of sync due to indexing
being previously disabled or any other reason.
Examples:
./manage.py reindex_content_library lib1 lib2 - reindexes libraries with keys lib1 and lib2
./manage.py reindex_content_library --all - reindexes all available libraries
./manage.py reindex_content_library --clear-all - clear all libraries indexes
"""
help = dedent(__doc__)
CONFIRMATION_PROMPT_CLEAR = u"This will clear all indexed libraries from elasticsearch. Do you want to continue?"
CONFIRMATION_PROMPT_ALL = u"Reindexing all libraries might be a time consuming operation. Do you want to continue?"
def add_arguments(self, parser):
parser.add_argument(
'--clear-all',
action='store_true',
dest='clear-all',
help='Clear all library indexes'
)
parser.add_argument(
'--all',
action='store_true',
dest='all',
help='Reindex all libraries'
)
parser.add_argument('library_ids', nargs='*')
def handle(self, *args, **options):
if options['clear-all']:
if query_yes_no(self.CONFIRMATION_PROMPT_CLEAR, default="no"):
ContentLibraryIndexer.remove_all_libraries()
return
if options['all']:
if query_yes_no(self.CONFIRMATION_PROMPT_ALL, default="no"):
library_keys = [library.library_key for library in ContentLibrary.objects.all()]
else:
return
else:
library_keys = list(map(LibraryLocatorV2.from_string, options['library_ids']))
ContentLibraryIndexer.index_libraries(library_keys)

View File

@@ -0,0 +1,12 @@
"""
Content libraries related signals.
"""
from django.dispatch import Signal
CONTENT_LIBRARY_CREATED = Signal(providing_args=['library_key'])
CONTENT_LIBRARY_UPDATED = Signal(providing_args=['library_key'])
CONTENT_LIBRARY_DELETED = Signal(providing_args=['library_key'])
LIBRARY_BLOCK_CREATED = Signal(providing_args=['library_key'])
LIBRARY_BLOCK_DELETED = Signal(providing_args=['library_key'])
LIBRARY_BLOCK_UPDATED = Signal(providing_args=['library_key'])

View File

@@ -0,0 +1,155 @@
"""
Testing indexing of blockstore based content libraries
"""
from django.conf import settings
from django.test.utils import override_settings
from search.search_engine_base import SearchEngine
from opaque_keys.edx.locator import LibraryLocatorV2
from openedx.core.djangoapps.content_libraries.libraries_index import ContentLibraryIndexer, LibraryNotIndexedException
from openedx.core.djangoapps.content_libraries.tests.base import ContentLibrariesRestApiTest
@override_settings(FEATURES={**settings.FEATURES, 'ENABLE_CONTENT_LIBRARY_INDEX': True})
@override_settings(SEARCH_ENGINE="search.tests.mock_search_engine.MockSearchEngine")
class ContentLibraryIndexerIndexer(ContentLibrariesRestApiTest):
"""
Tests the operation of ContentLibraryIndexer
"""
def setUp(self):
super().setUp()
ContentLibraryIndexer.remove_all_libraries()
self.searcher = SearchEngine.get_search_engine(ContentLibraryIndexer.INDEX_NAME)
def test_index_libraries(self):
"""
Test if libraries are being indexed correctly
"""
result1 = self._create_library(slug="test-lib-index-1", title="Title 1", description="Description")
result2 = self._create_library(slug="test-lib-index-2", title="Title 2", description="Description")
response = self.searcher.search(doc_type=ContentLibraryIndexer.LIBRARY_DOCUMENT_TYPE, filter_dictionary={})
self.assertEqual(response['total'], 2)
for result in [result1, result2]:
library_key = LibraryLocatorV2.from_string(result['id'])
response = ContentLibraryIndexer.get_libraries([library_key])[0]
self.assertEqual(response['id'], result['id'])
self.assertEqual(response['title'], result['title'])
self.assertEqual(response['description'], result['description'])
self.assertEqual(response['uuid'], result['bundle_uuid'])
self.assertEqual(response['version'], result['version'])
self.assertEqual(response['num_blocks'], 0)
self.assertEqual(response['has_unpublished_changes'], False)
self.assertEqual(response['has_unpublished_deletes'], False)
def test_remove_all_libraries(self):
"""
Test if remove_all_libraries() deletes all libraries
"""
self._create_library(slug="test-lib-rm-all-1", title="Title 1", description="Description")
self._create_library(slug="test-lib-rm-all-2", title="Title 2", description="Description")
response = self.searcher.search(doc_type=ContentLibraryIndexer.LIBRARY_DOCUMENT_TYPE, filter_dictionary={})
self.assertEqual(response['total'], 2)
ContentLibraryIndexer.remove_all_libraries()
response = self.searcher.search(doc_type=ContentLibraryIndexer.LIBRARY_DOCUMENT_TYPE, filter_dictionary={})
self.assertEqual(response['total'], 0)
def test_update_libraries(self):
"""
Test if indexes are updated when libraries are updated
"""
lib = self._create_library(slug="test-lib-update", title="Title", description="Description")
library_key = LibraryLocatorV2.from_string(lib['id'])
self._update_library(lib['id'], title="New Title", description="New Title")
response = ContentLibraryIndexer.get_libraries([library_key])[0]
self.assertEqual(response['id'], lib['id'])
self.assertEqual(response['title'], "New Title")
self.assertEqual(response['description'], "New Title")
self.assertEqual(response['uuid'], lib['bundle_uuid'])
self.assertEqual(response['version'], lib['version'])
self.assertEqual(response['num_blocks'], 0)
self.assertEqual(response['has_unpublished_changes'], False)
self.assertEqual(response['has_unpublished_deletes'], False)
self._delete_library(lib['id'])
with self.assertRaises(LibraryNotIndexedException):
ContentLibraryIndexer.get_libraries([library_key])
def test_update_library_blocks(self):
"""
Test if indexes are updated when blocks in libraries are updated
"""
def commit_library_and_verify(library_key):
"""
Commit library changes, and verify that there are no uncommited changes anymore
"""
self._commit_library_changes(str(library_key))
response = ContentLibraryIndexer.get_libraries([library_key])[0]
self.assertEqual(response['has_unpublished_changes'], False)
self.assertEqual(response['has_unpublished_deletes'], False)
return response
def verify_uncommitted_libraries(library_key, has_unpublished_changes, has_unpublished_deletes):
"""
Verify uncommitted changes and deletes in the index
"""
response = ContentLibraryIndexer.get_libraries([library_key])[0]
self.assertEqual(response['has_unpublished_changes'], has_unpublished_changes)
self.assertEqual(response['has_unpublished_deletes'], has_unpublished_deletes)
return response
lib = self._create_library(slug="test-lib-update-block", title="Title", description="Description")
library_key = LibraryLocatorV2.from_string(lib['id'])
# Verify uncommitted new blocks
block = self._add_block_to_library(lib['id'], "problem", "problem1")
response = verify_uncommitted_libraries(library_key, True, False)
self.assertEqual(response['num_blocks'], 1)
# Verify committed new blocks
self._commit_library_changes(lib['id'])
response = verify_uncommitted_libraries(library_key, False, False)
self.assertEqual(response['num_blocks'], 1)
# Verify uncommitted deleted blocks
self._delete_library_block(block['id'])
response = verify_uncommitted_libraries(library_key, True, True)
self.assertEqual(response['num_blocks'], 0)
# Verify committed deleted blocks
self._commit_library_changes(lib['id'])
response = verify_uncommitted_libraries(library_key, False, False)
self.assertEqual(response['num_blocks'], 0)
block = self._add_block_to_library(lib['id'], "problem", "problem1")
self._commit_library_changes(lib['id'])
# Verify changes to blocks
# Verify OLX updates on blocks
self._set_library_block_olx(block["id"], "<problem/>")
verify_uncommitted_libraries(library_key, True, False)
commit_library_and_verify(library_key)
# Verify asset updates on blocks
self._set_library_block_asset(block["id"], "whatever.png", b"data")
verify_uncommitted_libraries(library_key, True, False)
commit_library_and_verify(library_key)
self._delete_library_block_asset(block["id"], "whatever.png", expect_response=204)
verify_uncommitted_libraries(library_key, True, False)
commit_library_and_verify(library_key)
lib2 = self._create_library(slug="test-lib-update-block-2", title="Title 2", description="Description")
self._add_block_to_library(lib2["id"], "problem", "problem1")
self._commit_library_changes(lib2["id"])
#Verify new links on libraries
self._link_to_library(lib["id"], "library_2", lib2["id"])
verify_uncommitted_libraries(library_key, True, False)
#Verify reverting uncommitted changes
self._revert_library_changes(lib["id"])
verify_uncommitted_libraries(library_key, False, False)