From 4e98578f11a050ee7264cad7a752daa8d224b011 Mon Sep 17 00:00:00 2001 From: Awais Jibran Date: Tue, 9 Oct 2018 18:57:13 +0500 Subject: [PATCH] Workaround of CS50 OOM error EDUCATOR-3483 --- .../instructor_task/tasks_helper/grades.py | 52 +++++++++++++++++-- .../tests/test_tasks_helper.py | 2 +- 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/lms/djangoapps/instructor_task/tasks_helper/grades.py b/lms/djangoapps/instructor_task/tasks_helper/grades.py index a8b85f1f88..01bd3830bd 100644 --- a/lms/djangoapps/instructor_task/tasks_helper/grades.py +++ b/lms/djangoapps/instructor_task/tasks_helper/grades.py @@ -29,6 +29,7 @@ from lms.djangoapps.verify_student.services import IDVerificationService from openedx.core.djangoapps.content.block_structure.api import get_course_in_cache from openedx.core.djangoapps.course_groups.cohorts import bulk_cache_cohorts, get_cohort, is_course_cohorted from openedx.core.djangoapps.user_api.course_tag.api import BulkCourseTags +from openedx.core.djangoapps.waffle_utils import WaffleSwitchNamespace from student.models import CourseEnrollment from student.roles import BulkRoleCache from xmodule.modulestore.django import modulestore @@ -38,6 +39,10 @@ from xmodule.split_test_module import get_split_user_partitions from .runner import TaskProgress from .utils import upload_csv_to_report_store +WAFFLE_NAMESPACE = 'instructor_task' +WAFFLE_SWITCHES = WaffleSwitchNamespace(name=WAFFLE_NAMESPACE) +OPTIMIZE_GET_LEARNERS_FOR_COURSE = 'optimize_get_learners_for_course' + TASK_LOG = logging.getLogger('edx.celery.task') ENROLLED_IN_COURSE = 'enrolled' @@ -297,9 +302,50 @@ class CourseGradeReport(object): args = [iter(iterable)] * chunk_size return izip_longest(*args, fillvalue=fillvalue) - users = CourseEnrollment.objects.users_enrolled_in(context.course_id, include_inactive=True) - users = users.select_related('profile') - return grouper(users) + def users_for_course(course_id): + """ + Get all the enrolled users in a course. + + This method fetches & loads the enrolled user objects at once which may cause + out-of-memory errors in large courses. This method will be removed when + `OPTIMIZE_GET_LEARNERS_FOR_COURSE` waffle flag is removed. + """ + users = CourseEnrollment.objects.users_enrolled_in(course_id, include_inactive=True) + users = users.select_related('profile') + return grouper(users) + + def users_for_course_v2(course_id): + """ + Get all the enrolled users in a course chunk by chunk. + + This generator method fetches & loads the enrolled user objects on demand which in chunk + size defined. This method is a workaround to avoid out-of-memory errors. + """ + filter_kwargs = { + 'courseenrollment__course_id': course_id, + } + + user_ids_list = get_user_model().objects.filter(**filter_kwargs).values_list('id', flat=True).order_by('id') + user_chunks = grouper(user_ids_list) + for user_ids in user_chunks: + user_ids = [user_id for user_id in user_ids if user_id is not None] + min_id = min(user_ids) + max_id = max(user_ids) + users = get_user_model().objects.filter( + id__gte=min_id, + id__lte=max_id, + **filter_kwargs + ).select_related('profile') + yield users + + task_log_message = u'{}, Task type: {}'.format(context.task_info_string, context.action_name) + if WAFFLE_SWITCHES.is_enabled(OPTIMIZE_GET_LEARNERS_FOR_COURSE): + TASK_LOG.info(u'%s, Creating Course Grade with optimization', task_log_message) + return users_for_course_v2(context.course_id) + + TASK_LOG.info(u'%s, Creating Course Grade without optimization', task_log_message) + batch_users = users_for_course(context.course_id) + return batch_users def _user_grades(self, course_grade, context): """ diff --git a/lms/djangoapps/instructor_task/tests/test_tasks_helper.py b/lms/djangoapps/instructor_task/tests/test_tasks_helper.py index 63b6448761..4cf42490b5 100644 --- a/lms/djangoapps/instructor_task/tests/test_tasks_helper.py +++ b/lms/djangoapps/instructor_task/tests/test_tasks_helper.py @@ -413,7 +413,7 @@ class TestInstructorGradeReport(InstructorGradeReportTestCase): RequestCache.clear_all_namespaces() - expected_query_count = 47 + expected_query_count = 48 with patch('lms.djangoapps.instructor_task.tasks_helper.runner._get_current_task'): with check_mongo_calls(mongo_count): with self.assertNumQueries(expected_query_count):