Add filtering and search support to library APIs

This commit is contained in:
Sid Verma
2020-07-16 12:21:56 +05:30
committed by Kyle McCormick
parent 4bb1914ec6
commit 78045115ab
9 changed files with 197 additions and 23 deletions

View File

@@ -478,6 +478,22 @@ class TestLibraries(LibraryTestCase):
with self.assertRaises(ValueError):
self._refresh_children(lc_block, status_code_expected=400)
def test_library_filters(self):
"""
Test the filters in the list libraries API
"""
self._create_library(library="test-lib1", display_name="Foo", org='org')
self._create_library(library="test-lib2", display_name="Library-Title-2", org='org-test1')
self._create_library(library="l3", display_name="Library-Title-3", org='org-test1')
self._create_library(library="l4", display_name="Library-Title-4", org='org-test2')
self.assertEqual(len(self.client.get_json(LIBRARY_REST_URL).json()), 5) # 1 more from self.setUp()
self.assertEqual(len(self.client.get_json('{}?org=org-test1'.format(LIBRARY_REST_URL)).json()), 2)
self.assertEqual(len(self.client.get_json('{}?text_search=test-lib'.format(LIBRARY_REST_URL)).json()), 2)
self.assertEqual(len(self.client.get_json('{}?text_search=library-title'.format(LIBRARY_REST_URL)).json()), 3)
self.assertEqual(len(self.client.get_json('{}?text_search=library-'.format(LIBRARY_REST_URL)).json()), 3)
self.assertEqual(len(self.client.get_json('{}?text_search=org-test'.format(LIBRARY_REST_URL)).json()), 3)
@ddt.ddt
@patch('django.conf.settings.SEARCH_ENGINE', None)

View File

@@ -144,15 +144,33 @@ def _display_library(library_key_string, request):
def _list_libraries(request):
"""
List all accessible libraries
List all accessible libraries, after applying filters in the request
Query params:
org - The organization used to filter libraries
text_search - The string used to filter libraries by searching in title, id or org
"""
org = request.GET.get('org', '')
text_search = request.GET.get('text_search', '').lower()
if org:
libraries = modulestore().get_libraries(org=org)
else:
libraries = modulestore().get_libraries()
lib_info = [
{
"display_name": lib.display_name,
"library_key": text_type(lib.location.library_key),
}
for lib in modulestore().get_libraries()
if has_studio_read_access(request.user, lib.location.library_key)
for lib in libraries
if (
(
text_search in lib.display_name.lower() or
text_search in lib.location.library_key.org.lower() or
text_search in lib.location.library_key.library.lower()
) and
has_studio_read_access(request.user, lib.location.library_key)
)
]
return JsonResponse(lib_info)

View File

@@ -220,15 +220,18 @@ class AccessLevel:
NO_ACCESS = None
def get_libraries_for_user(user):
def get_libraries_for_user(user, org=None):
"""
Return content libraries that the user has permission to view.
"""
qs = ContentLibrary.objects.all()
if org:
qs = ContentLibrary.objects.filter(org__short_name=org)
else:
qs = ContentLibrary.objects.all()
return permissions.perms[permissions.CAN_VIEW_THIS_CONTENT_LIBRARY].filter(user, qs)
def get_metadata_from_index(queryset):
def get_metadata_from_index(queryset, text_search=None):
"""
Take a list of ContentLibrary objects and return metadata stored in
ContentLibraryIndex.
@@ -237,14 +240,22 @@ def get_metadata_from_index(queryset):
if ContentLibraryIndexer.indexing_is_enabled():
try:
library_keys = [lib.library_key for lib in queryset]
metadata = ContentLibraryIndexer.get_libraries(library_keys)
metadata = ContentLibraryIndexer.get_libraries(library_keys, text_search=text_search)
except (LibraryNotIndexedException, KeyError, ElasticConnectionError) as e:
log.exception(e)
# If ContentLibraryIndex is not available, we query blockstore for a limited set of metadata
if metadata is None:
uuids = [lib.bundle_uuid for lib in queryset]
bundles = get_bundles(uuids)
bundles = get_bundles(uuids=uuids, text_search=text_search)
if text_search:
# Bundle APIs can't apply text_search on a bundle's org, so including those results here
queryset_org_search = queryset.filter(org__short_name__icontains=text_search)
if len(queryset_org_search):
uuids_org_search = [lib.bundle_uuid for lib in queryset_org_search]
bundles += get_bundles(uuids=uuids_org_search)
bundle_dict = {
bundle.uuid: {
'uuid': bundle.uuid,
@@ -254,7 +265,12 @@ def get_metadata_from_index(queryset):
}
for bundle in bundles
}
metadata = [bundle_dict[uuid] for uuid in uuids]
metadata = [
bundle_dict[uuid]
if uuid in bundle_dict
else None
for uuid in uuids
]
libraries = [
ContentLibraryMetadata(
@@ -271,6 +287,7 @@ def get_metadata_from_index(queryset):
has_unpublished_deletes=metadata[i].get('has_unpublished_deletes'),
)
for i, lib in enumerate(queryset)
if metadata[i] is not None
]
return libraries

View File

@@ -5,6 +5,7 @@ import logging
from django.conf import settings
from django.dispatch import receiver
from elasticsearch.exceptions import ConnectionError as ElasticConnectionError
from search.elastic import ElasticSearchEngine, _translate_hits, _process_field_filters, RESERVED_CHARACTERS
from search.search_engine_base import SearchEngine
from openedx.core.djangoapps.content_libraries.constants import DRAFT_NAME
@@ -75,18 +76,25 @@ class ContentLibraryIndexer:
"last_published": last_published_str,
"has_unpublished_changes": has_unpublished_changes,
"has_unpublished_deletes": has_unpublished_deletes,
# only 'content' field is analyzed by elastisearch, and allows text-search
"content": {
"id": str(library_key),
"title": bundle_metadata.title,
"description": bundle_metadata.description,
},
}
library_dicts.append(library_dict)
return searcher.index(cls.LIBRARY_DOCUMENT_TYPE, library_dicts)
@classmethod
def get_libraries(cls, library_keys):
def get_libraries(cls, library_keys, text_search=None):
"""
Retrieve a list of libraries from the index
"""
searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
library_keys_str = [str(key) for key in library_keys]
response = searcher.search(
doc_type=cls.LIBRARY_DOCUMENT_TYPE,
field_dictionary={
@@ -95,6 +103,25 @@ class ContentLibraryIndexer:
},
size=MAX_SIZE,
)
if len(response["results"]) != len(library_keys_str):
missing = set(library_keys_str) - set([result["data"]["id"] for result in response["results"]])
raise LibraryNotIndexedException("Keys not found in index: {}".format(missing))
if text_search:
# Elastic is hit twice if text_search is valid
# Once above to identify unindexed libraries, and now to filter with text_search
if isinstance(searcher, ElasticSearchEngine):
response = _translate_hits(searcher._es.search(
doc_type=cls.LIBRARY_DOCUMENT_TYPE,
index=searcher.index_name,
body=build_elastic_query(library_keys_str, text_search),
))
else:
response = searcher.search(
doc_type=cls.LIBRARY_DOCUMENT_TYPE,
field_dictionary={"id": library_keys_str},
query_string=text_search
)
# Search results may not retain the original order of keys - we use this
# dict to construct a list in the original order of library_keys
@@ -102,11 +129,11 @@ class ContentLibraryIndexer:
result["data"]["id"]: result["data"]
for result in response["results"]
}
if len(response_dict) != len(library_keys_str):
missing = set(library_keys_str) - set(response_dict.keys())
raise LibraryNotIndexedException("Keys not found in index: {}".format(missing))
return [
response_dict[key]
if key in response_dict
else None
for key in library_keys_str
]
@@ -163,3 +190,46 @@ def remove_library_index(sender, library_key, **kwargs): # pylint: disable=unus
ContentLibraryIndexer.remove_libraries([library_key])
except ElasticConnectionError as e:
log.exception(e)
def build_elastic_query(library_keys_str, text_search):
"""
Build and return an elastic query for doing text search on a library
"""
# Remove reserved characters (and ") from the text to prevent unexpected errors.
text_search_normalised = text_search.translate(text_search.maketrans('', '', RESERVED_CHARACTERS + '"'))
# Wrap with asterix to enable partial matches
text_search_normalised = "*{}*".format(text_search_normalised)
return {
'query': {
'filtered': {
'query': {
'bool': {
'should': [
{
'query_string': {
'query': text_search_normalised,
"fields": ["content.*"],
"minimum_should_match": "100%",
},
},
# Add a special wildcard search for id, as it contains a ":" character which is filtered out
# in query_string
{
'wildcard': {
'id': {
'value': '*{}*'.format(text_search),
}
},
},
],
},
},
'filter': {
'terms': {
'id': library_keys_str
}
}
},
},
}

View File

@@ -124,10 +124,12 @@ class ContentLibrariesRestApiTest(APITestCase):
yield
self.client = old_client # pylint: disable=attribute-defined-outside-init
def _create_library(self, slug, title, description="", expect_response=200):
def _create_library(self, slug, title, description="", org=None, expect_response=200):
""" Create a library """
if org is None:
org = self.organization.short_name
return self._api('post', URL_LIB_CREATE, {
"org": self.organization.short_name,
"org": org,
"slug": slug,
"title": title,
"description": description,

View File

@@ -10,6 +10,7 @@ from django.conf import settings
from django.contrib.auth.models import Group
from django.test.utils import override_settings
from mock import patch
from organizations.models import Organization
from openedx.core.djangoapps.content_libraries.tests.base import ContentLibrariesRestApiTest
from openedx.core.djangoapps.content_libraries.api import BlockLimitReachedError
@@ -127,6 +128,33 @@ class ContentLibrariesTest(ContentLibrariesRestApiTest):
self.assertEqual(len(result['results']), 1)
self.assertEqual(result['next'], None)
@ddt.data(True, False)
def test_library_filters(self, is_indexing_enabled):
"""
Test the filters in the list libraries API
"""
features = settings.FEATURES
features['ENABLE_CONTENT_LIBRARY_INDEX'] = is_indexing_enabled
with override_settings(FEATURES=features):
self._create_library(slug="test-lib1", title="Foo", description="Bar")
self._create_library(slug="test-lib2", title="Library-Title-2", description="Bar2")
self._create_library(slug="l3", title="Library-Title-3", description="Description")
Organization.objects.get_or_create(
short_name="org-test",
defaults={"name": "Content Libraries Tachyon Exploration & Survey Team"},
)
self._create_library(slug="l4", title="Library-Title-4", description="Library-Description", org='org-test')
self._create_library(slug="l5", title="Library-Title-5", description="Library-Description", org='org-test')
self.assertEqual(len(self._list_libraries()), 5)
self.assertEqual(len(self._list_libraries({'org': 'org-test'})), 2)
self.assertEqual(len(self._list_libraries({'text_search': 'test-lib'})), 2)
self.assertEqual(len(self._list_libraries({'text_search': 'library-title'})), 4)
self.assertEqual(len(self._list_libraries({'text_search': 'bar'})), 2)
self.assertEqual(len(self._list_libraries({'text_search': 'org-tes'})), 2)
self.assertEqual(len(self._list_libraries({'org': 'org-test', 'text_search': 'library-title-4'})), 1)
# General Content Library XBlock tests:
def test_library_blocks(self):

View File

@@ -63,7 +63,7 @@ class ContentLibraryIndexerIndexer(ContentLibrariesRestApiTest):
with self.assertRaises(LibraryNotIndexedException):
ContentLibraryIndexer.get_libraries([library_key])
call_command("reindex_content_library", all=True, quiet=True)
call_command("reindex_content_library", all=True, force=True)
ContentLibraryIndexer.get_libraries([library_key])

View File

@@ -97,16 +97,35 @@ class LibraryRootView(APIView):
int,
description="Page size of the result. Defaults to 50",
),
apidocs.query_parameter(
'org',
str,
description="The organization short-name used to filter libraries",
),
apidocs.query_parameter(
'text_search',
str,
description="The string used to filter libraries by searching in title, id, org, or description",
),
],
)
def get(self, request):
"""
Return a list of all content libraries that the user has permission to view.
"""
org = request.query_params.get('org', None)
text_search = request.query_params.get('text_search', None)
paginator = LibraryRootPagination()
queryset = api.get_libraries_for_user(request.user)
paginated_qs = paginator.paginate_queryset(queryset, request)
result = api.get_metadata_from_index(paginated_qs)
queryset = api.get_libraries_for_user(request.user, org=org)
if text_search:
result = api.get_metadata_from_index(queryset, text_search=text_search)
result = paginator.paginate_queryset(result, request)
else:
# We can paginate queryset early and prevent fetching unneeded metadata
paginated_qs = paginator.paginate_queryset(queryset, request)
result = api.get_metadata_from_index(paginated_qs)
serializer = ContentLibraryMetadataSerializer(result, many=True)
# Verify `pagination` param to maintain compatibility with older
# non pagination-aware clients

View File

@@ -3,6 +3,7 @@ API Client methods for working with Blockstore bundles and drafts
"""
import base64
from urllib.parse import urlencode
from uuid import UUID
import dateutil.parser
@@ -144,13 +145,16 @@ def delete_collection(collection_uuid):
api_request('delete', api_url('collections', str(collection_uuid)))
def get_bundles(uuids=None):
def get_bundles(uuids=None, text_search=None):
"""
Get the details of all bundles
"""
if uuids is None:
uuids = []
version_url = api_url('bundles') + '?uuid=' + ','.join(map(str, uuids))
query_params = {}
if uuids:
query_params['uuid'] = ','.join(map(str, uuids))
if text_search:
query_params['text_search'] = text_search
version_url = api_url('bundles') + '?' + urlencode(query_params)
response = api_request('get', version_url)
# build bundle from response, convert map object to list and return
return [_bundle_from_response(item) for item in response]