From a3541d6e46f118bd34c813560f7df78edfeb5ef3 Mon Sep 17 00:00:00 2001 From: Calen Pennington Date: Tue, 22 Jan 2019 14:50:45 -0500 Subject: [PATCH] Allow courses api to return data incrementally Prior to this commit, the course api (/api/courses/v1/courses/) performed all the work necessary to return all courses available to the user, and then only actually returned on page's worth of those courses. With this change, the api now does the work incrementally, computing only the data needed to fetch the courses up to and including the page being returned. This still increases approximately linearly as the page number accessed being increases, but should be more cache-friendly. One side effect of this is that the max_page reported by pagination will be an overestimate (it will include pages that are removed due to a users access restrictions). This change also changes the sort-order of courses being returned by the course_api. By sorting by course-id, rather than course-number, we can sort in the database, rather than in Python, and defer loading data from the end of the list until it is requested. REVMI-90 --- lms/djangoapps/branding/__init__.py | 11 ++- lms/djangoapps/course_api/api.py | 4 +- lms/djangoapps/course_api/tests/test_api.py | 4 +- lms/djangoapps/course_api/tests/test_views.py | 6 +- lms/djangoapps/course_api/views.py | 16 +-- lms/djangoapps/courseware/courses.py | 11 ++- .../courseware/tests/test_courses.py | 2 +- .../content/course_overviews/models.py | 5 +- openedx/core/lib/api/view_utils.py | 98 +++++++++++++++++++ 9 files changed, 132 insertions(+), 25 deletions(-) diff --git a/lms/djangoapps/branding/__init__.py b/lms/djangoapps/branding/__init__.py index 534c0c4faf..e467d96ef4 100644 --- a/lms/djangoapps/branding/__init__.py +++ b/lms/djangoapps/branding/__init__.py @@ -15,7 +15,7 @@ from openedx.core.djangoapps.site_configuration import helpers as configuration_ def get_visible_courses(org=None, filter_=None): """ - Return the set of CourseOverviews that should be visible in this branded + Yield the CourseOverviews that should be visible in this branded instance. Arguments: @@ -27,9 +27,10 @@ def get_visible_courses(org=None, filter_=None): # Import is placed here to avoid model import at project startup. from openedx.core.djangoapps.content.course_overviews.models import CourseOverview - courses = [] current_site_orgs = configuration_helpers.get_current_site_orgs() + courses = CourseOverview.objects.none() + if org: # Check the current site's orgs to make sure the org's courses should be displayed if not current_site_orgs or org in current_site_orgs: @@ -40,7 +41,7 @@ def get_visible_courses(org=None, filter_=None): else: courses = CourseOverview.get_all_courses(filter_=filter_) - courses = sorted(courses, key=lambda course: course.number) + courses = courses.order_by('id') # Filtering can stop here. if current_site_orgs: @@ -57,11 +58,11 @@ def get_visible_courses(org=None, filter_=None): ) if filtered_visible_ids: - return [course for course in courses if course.id in filtered_visible_ids] + return courses.filter(id__in=filtered_visible_ids) else: # Filter out any courses based on current org, to avoid leaking these. orgs = configuration_helpers.get_all_orgs() - return [course for course in courses if course.location.org not in orgs] + return courses.exclude(org__in=orgs) def get_university_for_request(): diff --git a/lms/djangoapps/course_api/api.py b/lms/djangoapps/course_api/api.py index 79a3b38796..ef25e61399 100644 --- a/lms/djangoapps/course_api/api.py +++ b/lms/djangoapps/course_api/api.py @@ -57,7 +57,7 @@ def course_detail(request, username, course_key): def list_courses(request, username, org=None, filter_=None): """ - Return a list of available courses. + Yield all available courses. The courses returned are all be visible to the user identified by `username` and the logged in user should have permission to view courses @@ -81,7 +81,7 @@ def list_courses(request, username, org=None, filter_=None): by the given key-value pairs. Return value: - List of `CourseOverview` objects representing the collection of courses. + Yield `CourseOverview` objects representing the collection of courses. """ user = get_effective_user(request.user, username) return get_courses(user, org=org, filter_=filter_) diff --git a/lms/djangoapps/course_api/tests/test_api.py b/lms/djangoapps/course_api/tests/test_api.py index cef5d4c739..7670acbfa1 100644 --- a/lms/djangoapps/course_api/tests/test_api.py +++ b/lms/djangoapps/course_api/tests/test_api.py @@ -231,12 +231,12 @@ class TestGetCourseListExtras(CourseListTestMixin, ModuleStoreTestCase): def test_no_courses(self): courses = self._make_api_call(self.honor_user, self.honor_user) - self.assertEqual(len(courses), 0) + self.assertEqual(len(list(courses)), 0) def test_hidden_course_for_honor(self): self.create_course(visible_to_staff_only=True) courses = self._make_api_call(self.honor_user, self.honor_user) - self.assertEqual(len(courses), 0) + self.assertEqual(len(list(courses)), 0) def test_hidden_course_for_staff(self): self.create_course(visible_to_staff_only=True) diff --git a/lms/djangoapps/course_api/tests/test_views.py b/lms/djangoapps/course_api/tests/test_views.py index 92a4fd9da9..5541e511ca 100644 --- a/lms/djangoapps/course_api/tests/test_views.py +++ b/lms/djangoapps/course_api/tests/test_views.py @@ -356,7 +356,9 @@ class CourseListSearchViewTest(CourseApiTestViewMixin, ModuleStoreTestCase, Sear res = self.verify_response(params={'search_term': 'unique search term'}) self.assertIn('results', res.data) self.assertNotEqual(res.data['results'], []) - self.assertEqual(res.data['pagination']['count'], 1) # Should list a single course + # Returns a count of 3 courses because that's the estimate before filtering + self.assertEqual(res.data['pagination']['count'], 3) + self.assertEqual(len(res.data['results']), 1) # Should return a single course def test_too_many_courses(self): """ @@ -390,7 +392,7 @@ class CourseListSearchViewTest(CourseApiTestViewMixin, ModuleStoreTestCase, Sear self.setup_user(self.audit_user) # These query counts were found empirically - query_counts = [1266, 349, 349, 349, 349, 349, 349, 349, 349, 349, 322] + query_counts = [174, 196, 226, 256, 286, 316, 346, 376, 406, 436, 331] ordered_course_ids = sorted([str(cid) for cid in (course_ids + [c.id for c in self.courses])]) self.clear_caches() diff --git a/lms/djangoapps/course_api/views.py b/lms/djangoapps/course_api/views.py index 66743ea079..26d7986342 100644 --- a/lms/djangoapps/course_api/views.py +++ b/lms/djangoapps/course_api/views.py @@ -10,6 +10,7 @@ from rest_framework.throttling import UserRateThrottle from edx_rest_framework_extensions.paginators import NamespacedPageNumberPagination from openedx.core.lib.api.view_utils import DeveloperErrorViewMixin, view_auth_classes +from openedx.core.lib.api.view_utils import LazySequence from . import USE_RATE_LIMIT_2_FOR_COURSE_LIST_API, USE_RATE_LIMIT_10_FOR_COURSE_LIST_API from .api import course_detail, list_courses @@ -243,7 +244,7 @@ class CourseListView(DeveloperErrorViewMixin, ListAPIView): def get_queryset(self): """ - Return a list of courses visible to the user. + Yield courses visible to the user. """ form = CourseListGetForm(self.request.query_params, initial={'requesting_user': self.request.user}) if not form.is_valid(): @@ -264,9 +265,12 @@ class CourseListView(DeveloperErrorViewMixin, ListAPIView): size=self.results_size_infinity, ) - search_courses_ids = {course['data']['id']: True for course in search_courses['results']} + search_courses_ids = {course['data']['id'] for course in search_courses['results']} - return [ - course for course in db_courses - if unicode(course.id) in search_courses_ids - ] + return LazySequence( + ( + course for course in db_courses + if unicode(course.id) in search_courses_ids + ), + est_len=len(db_courses) + ) diff --git a/lms/djangoapps/courseware/courses.py b/lms/djangoapps/courseware/courses.py index caccff1313..9fc73ec0a8 100644 --- a/lms/djangoapps/courseware/courses.py +++ b/lms/djangoapps/courseware/courses.py @@ -35,6 +35,7 @@ from lms.djangoapps.courseware.exceptions import CourseAccessRedirect from opaque_keys.edx.keys import UsageKey from openedx.core.djangoapps.content.course_overviews.models import CourseOverview from openedx.core.djangoapps.site_configuration import helpers as configuration_helpers +from openedx.core.lib.api.view_utils import LazySequence from path import Path as path from six import text_type from static_replace import replace_static_urls @@ -451,8 +452,7 @@ def get_course_syllabus_section(course, section_key): def get_courses(user, org=None, filter_=None): """ - Returns a list of courses available, sorted by course.number and optionally - filtered by org code (case-insensitive). + Return a LazySequence of courses available, optionally filtered by org code (case-insensitive). """ courses = branding.get_visible_courses(org=org, filter_=filter_) @@ -461,9 +461,10 @@ def get_courses(user, org=None, filter_=None): settings.COURSE_CATALOG_VISIBILITY_PERMISSION ) - courses = [c for c in courses if has_access(user, permission_name, c)] - - return courses + return LazySequence( + (c for c in courses if has_access(user, permission_name, c)), + est_len=courses.count() + ) def get_permission_for_course_about(): diff --git a/lms/djangoapps/courseware/tests/test_courses.py b/lms/djangoapps/courseware/tests/test_courses.py index c54d98e4e2..12d694524c 100644 --- a/lms/djangoapps/courseware/tests/test_courses.py +++ b/lms/djangoapps/courseware/tests/test_courses.py @@ -141,7 +141,7 @@ class CoursesTest(ModuleStoreTestCase): # Request filtering for an org distinct from the designated org. no_courses = get_courses(user, org=primary) - self.assertEqual(no_courses, []) + self.assertEqual(list(no_courses), []) # Request filtering for an org matching the designated org. site_courses = get_courses(user, org=alternate) diff --git a/openedx/core/djangoapps/content/course_overviews/models.py b/openedx/core/djangoapps/content/course_overviews/models.py index e852cce1b0..6d1998c92f 100644 --- a/openedx/core/djangoapps/content/course_overviews/models.py +++ b/openedx/core/djangoapps/content/course_overviews/models.py @@ -409,7 +409,8 @@ class CourseOverview(TimeStampedModel): migrate and test switching to display_name_with_default, which is no longer escaped. """ - return block_metadata_utils.display_name_with_default_escaped(self) + # pylint: disable=line-too-long + return block_metadata_utils.display_name_with_default_escaped(self) # xss-lint: disable=python-deprecated-display-name @property def dashboard_start_display(self): @@ -560,7 +561,7 @@ class CourseOverview(TimeStampedModel): @classmethod def get_all_courses(cls, orgs=None, filter_=None): """ - Returns all CourseOverview objects in the database. + Return a queryset containing all CourseOverview objects in the database. Arguments: orgs (list[string]): Optional parameter that allows case-insensitive diff --git a/openedx/core/lib/api/view_utils.py b/openedx/core/lib/api/view_utils.py index ef67c4eb82..61c8dbd319 100644 --- a/openedx/core/lib/api/view_utils.py +++ b/openedx/core/lib/api/view_utils.py @@ -1,6 +1,7 @@ """ Utilities related to API views """ +from collections import Sequence from django.core.exceptions import NON_FIELD_ERRORS, ObjectDoesNotExist, ValidationError from django.http import Http404 from django.utils.translation import ugettext as _ @@ -205,3 +206,100 @@ class RetrievePatchAPIView(RetrieveModelMixin, UpdateModelMixin, GenericAPIView) # PATCH requests where the object does not exist should still # return a 404 response. raise + + +class LazySequence(Sequence): + """ + This class provides an immutable Sequence interface on top of an existing + iterable. + + It is immutable, and accepts an estimated length in order to support __len__ + without exhausting the underlying sequence + """ + def __init__(self, iterable, est_len=None): # pylint: disable=super-init-not-called + self.iterable = iterable + self.est_len = est_len + self._data = [] + self._exhausted = False + + def __len__(self): + # Return the actual data length if we know it exactly (because + # the underlying sequence is exhausted), or it's greater than + # the initial estimated length + if len(self._data) > self.est_len or self._exhausted: + return len(self._data) + else: + return self.est_len + + def __iter__(self): + # Yield all the known data first + for item in self._data: + yield item + + # Capture and yield data from the underlying iterator + # until it is exhausted + while True: + try: + item = next(self.iterable) + self._data.append(item) + yield item + except StopIteration: + self._exhausted = True + return + + def __getitem__(self, index): + if isinstance(index, int): + # For a single index, if we haven't already loaded enough + # data, we can load data until we have enough, and then + # return the value from the loaded data + if index < 0: + raise IndexError("Negative indexes aren't supported") + + while len(self._data) <= index: + try: + self._data.append(next(self.iterable)) + except StopIteration: + self._exhausted = True + raise IndexError("Underlying sequence exhausted") + + return self._data[index] + elif isinstance(index, slice): + # For a slice, we can load data until we reach 'stop'. + # Once we have data including 'stop', then we can use + # the underlying list to actually understand the mechanics + # of the slicing operation. + if index.start is not None and index.start < 0: + raise IndexError("Negative indexes aren't supported") + if index.stop is not None and index.stop < 0: + raise IndexError("Negative indexes aren't supported") + + if index.step is not None and index.step < 0: + largest_value = index.start + 1 + else: + largest_value = index.stop + + if largest_value is not None: + while len(self._data) <= largest_value: + try: + self._data.append(next(self.iterable)) + except StopIteration: + self._exhausted = True + break + else: + self._data.extend(self.iterable) + return self._data[index] + else: + raise TypeError("Unsupported index type") + + def __repr__(self): + if self._exhausted: + return "LazySequence({!r}, {!r})".format( + self._data, + self.est_len, + ) + else: + return "LazySequence(itertools.chain({!r}, {!r}), {!r})".format( + self._data, + self.iterable, + self.est_len, + )