From 78045115ab51ea2e7b42512dbf7f9cd3d3d8b43d Mon Sep 17 00:00:00 2001 From: Sid Verma Date: Thu, 16 Jul 2020 12:21:56 +0530 Subject: [PATCH] Add filtering and search support to library APIs --- .../contentstore/tests/test_libraries.py | 16 ++++ cms/djangoapps/contentstore/views/library.py | 24 +++++- .../core/djangoapps/content_libraries/api.py | 29 +++++-- .../content_libraries/libraries_index.py | 78 ++++++++++++++++++- .../content_libraries/tests/base.py | 6 +- .../tests/test_content_libraries.py | 28 +++++++ .../tests/test_libraries_index.py | 2 +- .../djangoapps/content_libraries/views.py | 25 +++++- openedx/core/lib/blockstore_api/methods.py | 12 ++- 9 files changed, 197 insertions(+), 23 deletions(-) diff --git a/cms/djangoapps/contentstore/tests/test_libraries.py b/cms/djangoapps/contentstore/tests/test_libraries.py index ea3bf99806..14b4721ddb 100644 --- a/cms/djangoapps/contentstore/tests/test_libraries.py +++ b/cms/djangoapps/contentstore/tests/test_libraries.py @@ -478,6 +478,22 @@ class TestLibraries(LibraryTestCase): with self.assertRaises(ValueError): self._refresh_children(lc_block, status_code_expected=400) + def test_library_filters(self): + """ + Test the filters in the list libraries API + """ + self._create_library(library="test-lib1", display_name="Foo", org='org') + self._create_library(library="test-lib2", display_name="Library-Title-2", org='org-test1') + self._create_library(library="l3", display_name="Library-Title-3", org='org-test1') + self._create_library(library="l4", display_name="Library-Title-4", org='org-test2') + + self.assertEqual(len(self.client.get_json(LIBRARY_REST_URL).json()), 5) # 1 more from self.setUp() + self.assertEqual(len(self.client.get_json('{}?org=org-test1'.format(LIBRARY_REST_URL)).json()), 2) + self.assertEqual(len(self.client.get_json('{}?text_search=test-lib'.format(LIBRARY_REST_URL)).json()), 2) + self.assertEqual(len(self.client.get_json('{}?text_search=library-title'.format(LIBRARY_REST_URL)).json()), 3) + self.assertEqual(len(self.client.get_json('{}?text_search=library-'.format(LIBRARY_REST_URL)).json()), 3) + self.assertEqual(len(self.client.get_json('{}?text_search=org-test'.format(LIBRARY_REST_URL)).json()), 3) + @ddt.ddt @patch('django.conf.settings.SEARCH_ENGINE', None) diff --git a/cms/djangoapps/contentstore/views/library.py b/cms/djangoapps/contentstore/views/library.py index 926c49df87..e41f9dab77 100644 --- a/cms/djangoapps/contentstore/views/library.py +++ b/cms/djangoapps/contentstore/views/library.py @@ -144,15 +144,33 @@ def _display_library(library_key_string, request): def _list_libraries(request): """ - List all accessible libraries + List all accessible libraries, after applying filters in the request + Query params: + org - The organization used to filter libraries + text_search - The string used to filter libraries by searching in title, id or org """ + org = request.GET.get('org', '') + text_search = request.GET.get('text_search', '').lower() + + if org: + libraries = modulestore().get_libraries(org=org) + else: + libraries = modulestore().get_libraries() + lib_info = [ { "display_name": lib.display_name, "library_key": text_type(lib.location.library_key), } - for lib in modulestore().get_libraries() - if has_studio_read_access(request.user, lib.location.library_key) + for lib in libraries + if ( + ( + text_search in lib.display_name.lower() or + text_search in lib.location.library_key.org.lower() or + text_search in lib.location.library_key.library.lower() + ) and + has_studio_read_access(request.user, lib.location.library_key) + ) ] return JsonResponse(lib_info) diff --git a/openedx/core/djangoapps/content_libraries/api.py b/openedx/core/djangoapps/content_libraries/api.py index cb693ac4c4..7b1111dba7 100644 --- a/openedx/core/djangoapps/content_libraries/api.py +++ b/openedx/core/djangoapps/content_libraries/api.py @@ -220,15 +220,18 @@ class AccessLevel: NO_ACCESS = None -def get_libraries_for_user(user): +def get_libraries_for_user(user, org=None): """ Return content libraries that the user has permission to view. """ - qs = ContentLibrary.objects.all() + if org: + qs = ContentLibrary.objects.filter(org__short_name=org) + else: + qs = ContentLibrary.objects.all() return permissions.perms[permissions.CAN_VIEW_THIS_CONTENT_LIBRARY].filter(user, qs) -def get_metadata_from_index(queryset): +def get_metadata_from_index(queryset, text_search=None): """ Take a list of ContentLibrary objects and return metadata stored in ContentLibraryIndex. @@ -237,14 +240,22 @@ def get_metadata_from_index(queryset): if ContentLibraryIndexer.indexing_is_enabled(): try: library_keys = [lib.library_key for lib in queryset] - metadata = ContentLibraryIndexer.get_libraries(library_keys) + metadata = ContentLibraryIndexer.get_libraries(library_keys, text_search=text_search) except (LibraryNotIndexedException, KeyError, ElasticConnectionError) as e: log.exception(e) # If ContentLibraryIndex is not available, we query blockstore for a limited set of metadata if metadata is None: uuids = [lib.bundle_uuid for lib in queryset] - bundles = get_bundles(uuids) + bundles = get_bundles(uuids=uuids, text_search=text_search) + + if text_search: + # Bundle APIs can't apply text_search on a bundle's org, so including those results here + queryset_org_search = queryset.filter(org__short_name__icontains=text_search) + if len(queryset_org_search): + uuids_org_search = [lib.bundle_uuid for lib in queryset_org_search] + bundles += get_bundles(uuids=uuids_org_search) + bundle_dict = { bundle.uuid: { 'uuid': bundle.uuid, @@ -254,7 +265,12 @@ def get_metadata_from_index(queryset): } for bundle in bundles } - metadata = [bundle_dict[uuid] for uuid in uuids] + metadata = [ + bundle_dict[uuid] + if uuid in bundle_dict + else None + for uuid in uuids + ] libraries = [ ContentLibraryMetadata( @@ -271,6 +287,7 @@ def get_metadata_from_index(queryset): has_unpublished_deletes=metadata[i].get('has_unpublished_deletes'), ) for i, lib in enumerate(queryset) + if metadata[i] is not None ] return libraries diff --git a/openedx/core/djangoapps/content_libraries/libraries_index.py b/openedx/core/djangoapps/content_libraries/libraries_index.py index 8124e61b6e..9d9321a6a8 100644 --- a/openedx/core/djangoapps/content_libraries/libraries_index.py +++ b/openedx/core/djangoapps/content_libraries/libraries_index.py @@ -5,6 +5,7 @@ import logging from django.conf import settings from django.dispatch import receiver from elasticsearch.exceptions import ConnectionError as ElasticConnectionError +from search.elastic import ElasticSearchEngine, _translate_hits, _process_field_filters, RESERVED_CHARACTERS from search.search_engine_base import SearchEngine from openedx.core.djangoapps.content_libraries.constants import DRAFT_NAME @@ -75,18 +76,25 @@ class ContentLibraryIndexer: "last_published": last_published_str, "has_unpublished_changes": has_unpublished_changes, "has_unpublished_deletes": has_unpublished_deletes, + # only 'content' field is analyzed by elastisearch, and allows text-search + "content": { + "id": str(library_key), + "title": bundle_metadata.title, + "description": bundle_metadata.description, + }, } library_dicts.append(library_dict) return searcher.index(cls.LIBRARY_DOCUMENT_TYPE, library_dicts) @classmethod - def get_libraries(cls, library_keys): + def get_libraries(cls, library_keys, text_search=None): """ Retrieve a list of libraries from the index """ searcher = SearchEngine.get_search_engine(cls.INDEX_NAME) library_keys_str = [str(key) for key in library_keys] + response = searcher.search( doc_type=cls.LIBRARY_DOCUMENT_TYPE, field_dictionary={ @@ -95,6 +103,25 @@ class ContentLibraryIndexer: }, size=MAX_SIZE, ) + if len(response["results"]) != len(library_keys_str): + missing = set(library_keys_str) - set([result["data"]["id"] for result in response["results"]]) + raise LibraryNotIndexedException("Keys not found in index: {}".format(missing)) + + if text_search: + # Elastic is hit twice if text_search is valid + # Once above to identify unindexed libraries, and now to filter with text_search + if isinstance(searcher, ElasticSearchEngine): + response = _translate_hits(searcher._es.search( + doc_type=cls.LIBRARY_DOCUMENT_TYPE, + index=searcher.index_name, + body=build_elastic_query(library_keys_str, text_search), + )) + else: + response = searcher.search( + doc_type=cls.LIBRARY_DOCUMENT_TYPE, + field_dictionary={"id": library_keys_str}, + query_string=text_search + ) # Search results may not retain the original order of keys - we use this # dict to construct a list in the original order of library_keys @@ -102,11 +129,11 @@ class ContentLibraryIndexer: result["data"]["id"]: result["data"] for result in response["results"] } - if len(response_dict) != len(library_keys_str): - missing = set(library_keys_str) - set(response_dict.keys()) - raise LibraryNotIndexedException("Keys not found in index: {}".format(missing)) + return [ response_dict[key] + if key in response_dict + else None for key in library_keys_str ] @@ -163,3 +190,46 @@ def remove_library_index(sender, library_key, **kwargs): # pylint: disable=unus ContentLibraryIndexer.remove_libraries([library_key]) except ElasticConnectionError as e: log.exception(e) + + +def build_elastic_query(library_keys_str, text_search): + """ + Build and return an elastic query for doing text search on a library + """ + # Remove reserved characters (and ") from the text to prevent unexpected errors. + text_search_normalised = text_search.translate(text_search.maketrans('', '', RESERVED_CHARACTERS + '"')) + # Wrap with asterix to enable partial matches + text_search_normalised = "*{}*".format(text_search_normalised) + return { + 'query': { + 'filtered': { + 'query': { + 'bool': { + 'should': [ + { + 'query_string': { + 'query': text_search_normalised, + "fields": ["content.*"], + "minimum_should_match": "100%", + }, + }, + # Add a special wildcard search for id, as it contains a ":" character which is filtered out + # in query_string + { + 'wildcard': { + 'id': { + 'value': '*{}*'.format(text_search), + } + }, + }, + ], + }, + }, + 'filter': { + 'terms': { + 'id': library_keys_str + } + } + }, + }, + } diff --git a/openedx/core/djangoapps/content_libraries/tests/base.py b/openedx/core/djangoapps/content_libraries/tests/base.py index 4799028fda..2f0f908d73 100644 --- a/openedx/core/djangoapps/content_libraries/tests/base.py +++ b/openedx/core/djangoapps/content_libraries/tests/base.py @@ -124,10 +124,12 @@ class ContentLibrariesRestApiTest(APITestCase): yield self.client = old_client # pylint: disable=attribute-defined-outside-init - def _create_library(self, slug, title, description="", expect_response=200): + def _create_library(self, slug, title, description="", org=None, expect_response=200): """ Create a library """ + if org is None: + org = self.organization.short_name return self._api('post', URL_LIB_CREATE, { - "org": self.organization.short_name, + "org": org, "slug": slug, "title": title, "description": description, diff --git a/openedx/core/djangoapps/content_libraries/tests/test_content_libraries.py b/openedx/core/djangoapps/content_libraries/tests/test_content_libraries.py index 1140e5be7d..493ce4ad69 100644 --- a/openedx/core/djangoapps/content_libraries/tests/test_content_libraries.py +++ b/openedx/core/djangoapps/content_libraries/tests/test_content_libraries.py @@ -10,6 +10,7 @@ from django.conf import settings from django.contrib.auth.models import Group from django.test.utils import override_settings from mock import patch +from organizations.models import Organization from openedx.core.djangoapps.content_libraries.tests.base import ContentLibrariesRestApiTest from openedx.core.djangoapps.content_libraries.api import BlockLimitReachedError @@ -127,6 +128,33 @@ class ContentLibrariesTest(ContentLibrariesRestApiTest): self.assertEqual(len(result['results']), 1) self.assertEqual(result['next'], None) + @ddt.data(True, False) + def test_library_filters(self, is_indexing_enabled): + """ + Test the filters in the list libraries API + """ + features = settings.FEATURES + features['ENABLE_CONTENT_LIBRARY_INDEX'] = is_indexing_enabled + with override_settings(FEATURES=features): + self._create_library(slug="test-lib1", title="Foo", description="Bar") + self._create_library(slug="test-lib2", title="Library-Title-2", description="Bar2") + self._create_library(slug="l3", title="Library-Title-3", description="Description") + + Organization.objects.get_or_create( + short_name="org-test", + defaults={"name": "Content Libraries Tachyon Exploration & Survey Team"}, + ) + self._create_library(slug="l4", title="Library-Title-4", description="Library-Description", org='org-test') + self._create_library(slug="l5", title="Library-Title-5", description="Library-Description", org='org-test') + + self.assertEqual(len(self._list_libraries()), 5) + self.assertEqual(len(self._list_libraries({'org': 'org-test'})), 2) + self.assertEqual(len(self._list_libraries({'text_search': 'test-lib'})), 2) + self.assertEqual(len(self._list_libraries({'text_search': 'library-title'})), 4) + self.assertEqual(len(self._list_libraries({'text_search': 'bar'})), 2) + self.assertEqual(len(self._list_libraries({'text_search': 'org-tes'})), 2) + self.assertEqual(len(self._list_libraries({'org': 'org-test', 'text_search': 'library-title-4'})), 1) + # General Content Library XBlock tests: def test_library_blocks(self): diff --git a/openedx/core/djangoapps/content_libraries/tests/test_libraries_index.py b/openedx/core/djangoapps/content_libraries/tests/test_libraries_index.py index 876a3c2ca6..a8fb06d239 100644 --- a/openedx/core/djangoapps/content_libraries/tests/test_libraries_index.py +++ b/openedx/core/djangoapps/content_libraries/tests/test_libraries_index.py @@ -63,7 +63,7 @@ class ContentLibraryIndexerIndexer(ContentLibrariesRestApiTest): with self.assertRaises(LibraryNotIndexedException): ContentLibraryIndexer.get_libraries([library_key]) - call_command("reindex_content_library", all=True, quiet=True) + call_command("reindex_content_library", all=True, force=True) ContentLibraryIndexer.get_libraries([library_key]) diff --git a/openedx/core/djangoapps/content_libraries/views.py b/openedx/core/djangoapps/content_libraries/views.py index f3536fa747..18126d3098 100644 --- a/openedx/core/djangoapps/content_libraries/views.py +++ b/openedx/core/djangoapps/content_libraries/views.py @@ -97,16 +97,35 @@ class LibraryRootView(APIView): int, description="Page size of the result. Defaults to 50", ), + apidocs.query_parameter( + 'org', + str, + description="The organization short-name used to filter libraries", + ), + apidocs.query_parameter( + 'text_search', + str, + description="The string used to filter libraries by searching in title, id, org, or description", + ), ], ) def get(self, request): """ Return a list of all content libraries that the user has permission to view. """ + org = request.query_params.get('org', None) + text_search = request.query_params.get('text_search', None) + paginator = LibraryRootPagination() - queryset = api.get_libraries_for_user(request.user) - paginated_qs = paginator.paginate_queryset(queryset, request) - result = api.get_metadata_from_index(paginated_qs) + queryset = api.get_libraries_for_user(request.user, org=org) + if text_search: + result = api.get_metadata_from_index(queryset, text_search=text_search) + result = paginator.paginate_queryset(result, request) + else: + # We can paginate queryset early and prevent fetching unneeded metadata + paginated_qs = paginator.paginate_queryset(queryset, request) + result = api.get_metadata_from_index(paginated_qs) + serializer = ContentLibraryMetadataSerializer(result, many=True) # Verify `pagination` param to maintain compatibility with older # non pagination-aware clients diff --git a/openedx/core/lib/blockstore_api/methods.py b/openedx/core/lib/blockstore_api/methods.py index eec7c69003..1e39b80e8b 100644 --- a/openedx/core/lib/blockstore_api/methods.py +++ b/openedx/core/lib/blockstore_api/methods.py @@ -3,6 +3,7 @@ API Client methods for working with Blockstore bundles and drafts """ import base64 +from urllib.parse import urlencode from uuid import UUID import dateutil.parser @@ -144,13 +145,16 @@ def delete_collection(collection_uuid): api_request('delete', api_url('collections', str(collection_uuid))) -def get_bundles(uuids=None): +def get_bundles(uuids=None, text_search=None): """ Get the details of all bundles """ - if uuids is None: - uuids = [] - version_url = api_url('bundles') + '?uuid=' + ','.join(map(str, uuids)) + query_params = {} + if uuids: + query_params['uuid'] = ','.join(map(str, uuids)) + if text_search: + query_params['text_search'] = text_search + version_url = api_url('bundles') + '?' + urlencode(query_params) response = api_request('get', version_url) # build bundle from response, convert map object to list and return return [_bundle_from_response(item) for item in response]