Add filtering and search support to library APIs

2020-07-16 12:21:56 +05:30
parent 4bb1914ec6
commit 78045115ab
9 changed files with 197 additions and 23 deletions
--- a/cms/djangoapps/contentstore/tests/test_libraries.py
+++ b/cms/djangoapps/contentstore/tests/test_libraries.py
@@ -478,6 +478,22 @@ class TestLibraries(LibraryTestCase):
        with self.assertRaises(ValueError):
            self._refresh_children(lc_block, status_code_expected=400)

+    def test_library_filters(self):
+        """
+        Test the filters in the list libraries API
+        """
+        self._create_library(library="test-lib1", display_name="Foo", org='org')
+        self._create_library(library="test-lib2", display_name="Library-Title-2", org='org-test1')
+        self._create_library(library="l3", display_name="Library-Title-3", org='org-test1')
+        self._create_library(library="l4", display_name="Library-Title-4", org='org-test2')
+
+        self.assertEqual(len(self.client.get_json(LIBRARY_REST_URL).json()), 5)  # 1 more from self.setUp()
+        self.assertEqual(len(self.client.get_json('{}?org=org-test1'.format(LIBRARY_REST_URL)).json()), 2)
+        self.assertEqual(len(self.client.get_json('{}?text_search=test-lib'.format(LIBRARY_REST_URL)).json()), 2)
+        self.assertEqual(len(self.client.get_json('{}?text_search=library-title'.format(LIBRARY_REST_URL)).json()), 3)
+        self.assertEqual(len(self.client.get_json('{}?text_search=library-'.format(LIBRARY_REST_URL)).json()), 3)
+        self.assertEqual(len(self.client.get_json('{}?text_search=org-test'.format(LIBRARY_REST_URL)).json()), 3)
+

@ddt.ddt
@patch('django.conf.settings.SEARCH_ENGINE', None)
--- a/cms/djangoapps/contentstore/views/library.py
+++ b/cms/djangoapps/contentstore/views/library.py
@@ -144,15 +144,33 @@ def _display_library(library_key_string, request):

 def _list_libraries(request):
    """
-    List all accessible libraries
+    List all accessible libraries, after applying filters in the request
+    Query params:
+        org - The organization used to filter libraries
+        text_search - The string used to filter libraries by searching in title, id or org
    """
+    org = request.GET.get('org', '')
+    text_search = request.GET.get('text_search', '').lower()
+
+    if org:
+        libraries = modulestore().get_libraries(org=org)
+    else:
+        libraries = modulestore().get_libraries()
+
    lib_info = [
        {
            "display_name": lib.display_name,
            "library_key": text_type(lib.location.library_key),
        }
-        for lib in modulestore().get_libraries()
-        if has_studio_read_access(request.user, lib.location.library_key)
+        for lib in libraries
+        if (
+            (
+                text_search in lib.display_name.lower() or
+                text_search in lib.location.library_key.org.lower() or
+                text_search in lib.location.library_key.library.lower()
+            ) and
+            has_studio_read_access(request.user, lib.location.library_key)
+        )
    ]
    return JsonResponse(lib_info)

--- a/openedx/core/djangoapps/content_libraries/api.py
+++ b/openedx/core/djangoapps/content_libraries/api.py
@@ -220,15 +220,18 @@ class AccessLevel:
    NO_ACCESS = None


-def get_libraries_for_user(user):
+def get_libraries_for_user(user, org=None):
    """
    Return content libraries that the user has permission to view.
    """
-    qs = ContentLibrary.objects.all()
+    if org:
+        qs = ContentLibrary.objects.filter(org__short_name=org)
+    else:
+        qs = ContentLibrary.objects.all()
    return permissions.perms[permissions.CAN_VIEW_THIS_CONTENT_LIBRARY].filter(user, qs)


-def get_metadata_from_index(queryset):
+def get_metadata_from_index(queryset, text_search=None):
    """
    Take a list of ContentLibrary objects and return metadata stored in
    ContentLibraryIndex.
@@ -237,14 +240,22 @@ def get_metadata_from_index(queryset):
    if ContentLibraryIndexer.indexing_is_enabled():
        try:
            library_keys = [lib.library_key for lib in queryset]
-            metadata = ContentLibraryIndexer.get_libraries(library_keys)
+            metadata = ContentLibraryIndexer.get_libraries(library_keys, text_search=text_search)
        except (LibraryNotIndexedException, KeyError, ElasticConnectionError) as e:
            log.exception(e)

    # If ContentLibraryIndex is not available, we query blockstore for a limited set of metadata
    if metadata is None:
        uuids = [lib.bundle_uuid for lib in queryset]
-        bundles = get_bundles(uuids)
+        bundles = get_bundles(uuids=uuids, text_search=text_search)
+
+        if text_search:
+            # Bundle APIs can't apply text_search on a bundle's org, so including those results here
+            queryset_org_search = queryset.filter(org__short_name__icontains=text_search)
+            if len(queryset_org_search):
+                uuids_org_search = [lib.bundle_uuid for lib in queryset_org_search]
+                bundles += get_bundles(uuids=uuids_org_search)
+
        bundle_dict = {
            bundle.uuid: {
                'uuid': bundle.uuid,
@@ -254,7 +265,12 @@ def get_metadata_from_index(queryset):
            }
            for bundle in bundles
        }
-        metadata = [bundle_dict[uuid] for uuid in uuids]
+        metadata = [
+            bundle_dict[uuid]
+            if uuid in bundle_dict
+            else None
+            for uuid in uuids
+        ]

    libraries = [
        ContentLibraryMetadata(
@@ -271,6 +287,7 @@ def get_metadata_from_index(queryset):
            has_unpublished_deletes=metadata[i].get('has_unpublished_deletes'),
        )
        for i, lib in enumerate(queryset)
+        if metadata[i] is not None
    ]
    return libraries

--- a/openedx/core/djangoapps/content_libraries/libraries_index.py
+++ b/openedx/core/djangoapps/content_libraries/libraries_index.py
@@ -5,6 +5,7 @@ import logging
 from django.conf import settings
 from django.dispatch import receiver
 from elasticsearch.exceptions import ConnectionError as ElasticConnectionError
+from search.elastic import ElasticSearchEngine, _translate_hits, _process_field_filters, RESERVED_CHARACTERS
 from search.search_engine_base import SearchEngine

 from openedx.core.djangoapps.content_libraries.constants import DRAFT_NAME
@@ -75,18 +76,25 @@ class ContentLibraryIndexer:
                "last_published": last_published_str,
                "has_unpublished_changes": has_unpublished_changes,
                "has_unpublished_deletes": has_unpublished_deletes,
+                # only 'content' field is analyzed by elastisearch, and allows text-search
+                "content": {
+                    "id": str(library_key),
+                    "title": bundle_metadata.title,
+                    "description": bundle_metadata.description,
+                },
            }
            library_dicts.append(library_dict)

        return searcher.index(cls.LIBRARY_DOCUMENT_TYPE, library_dicts)

    @classmethod
-    def get_libraries(cls, library_keys):
+    def get_libraries(cls, library_keys, text_search=None):
        """
        Retrieve a list of libraries from the index
        """
        searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
        library_keys_str = [str(key) for key in library_keys]
+
        response = searcher.search(
            doc_type=cls.LIBRARY_DOCUMENT_TYPE,
            field_dictionary={
@@ -95,6 +103,25 @@ class ContentLibraryIndexer:
            },
            size=MAX_SIZE,
        )
+        if len(response["results"]) != len(library_keys_str):
+            missing = set(library_keys_str) - set([result["data"]["id"] for result in response["results"]])
+            raise LibraryNotIndexedException("Keys not found in index: {}".format(missing))
+
+        if text_search:
+            # Elastic is hit twice if text_search is valid
+            # Once above to identify unindexed libraries, and now to filter with text_search
+            if isinstance(searcher, ElasticSearchEngine):
+                response = _translate_hits(searcher._es.search(
+                    doc_type=cls.LIBRARY_DOCUMENT_TYPE,
+                    index=searcher.index_name,
+                    body=build_elastic_query(library_keys_str, text_search),
+                ))
+            else:
+                response = searcher.search(
+                    doc_type=cls.LIBRARY_DOCUMENT_TYPE,
+                    field_dictionary={"id": library_keys_str},
+                    query_string=text_search
+                )

        # Search results may not retain the original order of keys - we use this
        # dict to construct a list in the original order of library_keys
@@ -102,11 +129,11 @@ class ContentLibraryIndexer:
            result["data"]["id"]: result["data"]
            for result in response["results"]
        }
-        if len(response_dict) != len(library_keys_str):
-            missing = set(library_keys_str) - set(response_dict.keys())
-            raise LibraryNotIndexedException("Keys not found in index: {}".format(missing))
+
        return [
            response_dict[key]
+            if key in response_dict
+            else None
            for key in library_keys_str
        ]

@@ -163,3 +190,46 @@ def remove_library_index(sender, library_key, **kwargs):  # pylint: disable=unus
            ContentLibraryIndexer.remove_libraries([library_key])
        except ElasticConnectionError as e:
            log.exception(e)
+
+
+def build_elastic_query(library_keys_str, text_search):
+    """
+    Build and return an elastic query for doing text search on a library
+    """
+    # Remove reserved characters (and ") from the text to prevent unexpected errors.
+    text_search_normalised = text_search.translate(text_search.maketrans('', '', RESERVED_CHARACTERS + '"'))
+    # Wrap with asterix to enable partial matches
+    text_search_normalised = "*{}*".format(text_search_normalised)
+    return {
+        'query': {
+            'filtered': {
+                'query': {
+                    'bool': {
+                        'should': [
+                            {
+                                'query_string': {
+                                    'query': text_search_normalised,
+                                    "fields": ["content.*"],
+                                    "minimum_should_match": "100%",
+                                },
+                            },
+                            # Add a special wildcard search for id, as it contains a ":" character which is filtered out
+                            # in query_string
+                            {
+                                'wildcard': {
+                                    'id': {
+                                        'value': '*{}*'.format(text_search),
+                                    }
+                                },
+                            },
+                        ],
+                    },
+                },
+                'filter': {
+                    'terms': {
+                        'id': library_keys_str
+                    }
+                }
+            },
+        },
+    }
--- a/openedx/core/djangoapps/content_libraries/tests/base.py
+++ b/openedx/core/djangoapps/content_libraries/tests/base.py
@@ -124,10 +124,12 @@ class ContentLibrariesRestApiTest(APITestCase):
        yield
        self.client = old_client  # pylint: disable=attribute-defined-outside-init

-    def _create_library(self, slug, title, description="", expect_response=200):
+    def _create_library(self, slug, title, description="", org=None, expect_response=200):
        """ Create a library """
+        if org is None:
+            org = self.organization.short_name
        return self._api('post', URL_LIB_CREATE, {
-            "org": self.organization.short_name,
+            "org": org,
            "slug": slug,
            "title": title,
            "description": description,
--- a/openedx/core/djangoapps/content_libraries/tests/test_content_libraries.py
+++ b/openedx/core/djangoapps/content_libraries/tests/test_content_libraries.py
@@ -10,6 +10,7 @@ from django.conf import settings
 from django.contrib.auth.models import Group
 from django.test.utils import override_settings
 from mock import patch
+from organizations.models import Organization

 from openedx.core.djangoapps.content_libraries.tests.base import ContentLibrariesRestApiTest
 from openedx.core.djangoapps.content_libraries.api import BlockLimitReachedError
@@ -127,6 +128,33 @@ class ContentLibrariesTest(ContentLibrariesRestApiTest):
            self.assertEqual(len(result['results']), 1)
            self.assertEqual(result['next'], None)

+    @ddt.data(True, False)
+    def test_library_filters(self, is_indexing_enabled):
+        """
+        Test the filters in the list libraries API
+        """
+        features = settings.FEATURES
+        features['ENABLE_CONTENT_LIBRARY_INDEX'] = is_indexing_enabled
+        with override_settings(FEATURES=features):
+            self._create_library(slug="test-lib1", title="Foo", description="Bar")
+            self._create_library(slug="test-lib2", title="Library-Title-2", description="Bar2")
+            self._create_library(slug="l3", title="Library-Title-3", description="Description")
+
+            Organization.objects.get_or_create(
+                short_name="org-test",
+                defaults={"name": "Content Libraries Tachyon Exploration & Survey Team"},
+            )
+            self._create_library(slug="l4", title="Library-Title-4", description="Library-Description", org='org-test')
+            self._create_library(slug="l5", title="Library-Title-5", description="Library-Description", org='org-test')
+
+            self.assertEqual(len(self._list_libraries()), 5)
+            self.assertEqual(len(self._list_libraries({'org': 'org-test'})), 2)
+            self.assertEqual(len(self._list_libraries({'text_search': 'test-lib'})), 2)
+            self.assertEqual(len(self._list_libraries({'text_search': 'library-title'})), 4)
+            self.assertEqual(len(self._list_libraries({'text_search': 'bar'})), 2)
+            self.assertEqual(len(self._list_libraries({'text_search': 'org-tes'})), 2)
+            self.assertEqual(len(self._list_libraries({'org': 'org-test', 'text_search': 'library-title-4'})), 1)
+
    # General Content Library XBlock tests:

    def test_library_blocks(self):
--- a/openedx/core/djangoapps/content_libraries/tests/test_libraries_index.py
+++ b/openedx/core/djangoapps/content_libraries/tests/test_libraries_index.py
@@ -63,7 +63,7 @@ class ContentLibraryIndexerIndexer(ContentLibrariesRestApiTest):
            with self.assertRaises(LibraryNotIndexedException):
                ContentLibraryIndexer.get_libraries([library_key])

-            call_command("reindex_content_library", all=True, quiet=True)
+            call_command("reindex_content_library", all=True, force=True)

            ContentLibraryIndexer.get_libraries([library_key])

--- a/openedx/core/djangoapps/content_libraries/views.py
+++ b/openedx/core/djangoapps/content_libraries/views.py
@@ -97,16 +97,35 @@ class LibraryRootView(APIView):
                int,
                description="Page size of the result. Defaults to 50",
            ),
+            apidocs.query_parameter(
+                'org',
+                str,
+                description="The organization short-name used to filter libraries",
+            ),
+            apidocs.query_parameter(
+                'text_search',
+                str,
+                description="The string used to filter libraries by searching in title, id, org, or description",
+            ),
        ],
    )
    def get(self, request):
        """
        Return a list of all content libraries that the user has permission to view.
        """
+        org = request.query_params.get('org', None)
+        text_search = request.query_params.get('text_search', None)
+
        paginator = LibraryRootPagination()
-        queryset = api.get_libraries_for_user(request.user)
-        paginated_qs = paginator.paginate_queryset(queryset, request)
-        result = api.get_metadata_from_index(paginated_qs)
+        queryset = api.get_libraries_for_user(request.user, org=org)
+        if text_search:
+            result = api.get_metadata_from_index(queryset, text_search=text_search)
+            result = paginator.paginate_queryset(result, request)
+        else:
+            # We can paginate queryset early and prevent fetching unneeded metadata
+            paginated_qs = paginator.paginate_queryset(queryset, request)
+            result = api.get_metadata_from_index(paginated_qs)
+
        serializer = ContentLibraryMetadataSerializer(result, many=True)
        # Verify `pagination` param to maintain compatibility with older
        # non pagination-aware clients
--- a/openedx/core/lib/blockstore_api/methods.py
+++ b/openedx/core/lib/blockstore_api/methods.py
@@ -3,6 +3,7 @@ API Client methods for working with Blockstore bundles and drafts
 """

 import base64
+from urllib.parse import urlencode
 from uuid import UUID

 import dateutil.parser
@@ -144,13 +145,16 @@ def delete_collection(collection_uuid):
    api_request('delete', api_url('collections', str(collection_uuid)))


-def get_bundles(uuids=None):
+def get_bundles(uuids=None, text_search=None):
    """
    Get the details of all bundles
    """
-    if uuids is None:
-        uuids = []
-    version_url = api_url('bundles') + '?uuid=' + ','.join(map(str, uuids))
+    query_params = {}
+    if uuids:
+        query_params['uuid'] = ','.join(map(str, uuids))
+    if text_search:
+        query_params['text_search'] = text_search
+    version_url = api_url('bundles') + '?' + urlencode(query_params)
    response = api_request('get', version_url)
    # build bundle from response, convert map object to list and return
    return [_bundle_from_response(item) for item in response]