fix analytics distribution with no data, add unit test, cleanup

2013-07-29 11:03:55 -04:00
parent f7989cb2ab
commit 2760938e15
6 changed files with 61 additions and 31 deletions
--- a/lms/djangoapps/analytics/basic.py
+++ b/lms/djangoapps/analytics/basic.py
@@ -36,7 +36,7 @@ def enrolled_students_features(course_id, features):
        student_dict = dict((feature, getattr(student, feature))
                            for feature in student_features)
        profile = student.profile
-        if not profile is None:
+        if profile is not None:
            profile_dict = dict((feature, getattr(profile, feature))
                                for feature in profile_features)
            student_dict.update(profile_dict)
--- a/lms/djangoapps/analytics/distributions.py
+++ b/lms/djangoapps/analytics/distributions.py
@@ -24,8 +24,11 @@ The distribution in a course for gender might look like:
 from django.db.models import Count
 from student.models import CourseEnrollment, UserProfile

+# choices with a restricted domain, e.g. level_of_education
 _EASY_CHOICE_FEATURES = ('gender', 'level_of_education')
+# choices with a larger domain e.g. year_of_birth
 _OPEN_CHOICE_FEATURES = ('year_of_birth',)
+
 AVAILABLE_PROFILE_FEATURES = _EASY_CHOICE_FEATURES + _OPEN_CHOICE_FEATURES
 DISPLAY_NAMES = {
    'gender': 'Gender',
@@ -51,7 +54,7 @@ class ProfileDistribution(object):

    def __init__(self, feature):
        self.feature = feature
-        self.feature_display_name = DISPLAY_NAMES[feature]
+        self.feature_display_name = DISPLAY_NAMES.get(feature, feature)

    def validate(self):
        """
@@ -64,6 +67,7 @@ class ProfileDistribution(object):
                raise ProfileDistribution.ValidationError()

        validation_assert(isinstance(self.feature, str))
+        validation_assert(self.feature in DISPLAY_NAMES)
        validation_assert(isinstance(self.feature_display_name, str))
        validation_assert(self.type in ['EASY_CHOICE', 'OPEN_CHOICE'])
        validation_assert(isinstance(self.data, dict))
@@ -78,10 +82,8 @@ def profile_distribution(course_id, feature):

    Returns a ProfileDistribution instance.

-    NOTE: no_data will appear as a key instead of None to be compatible with the json spec.
-    data types are
-        EASY_CHOICE - choices with a restricted domain, e.g. level_of_education
-        OPEN_CHOICE - choices with a larger domain e.g. year_of_birth
+    NOTE: no_data will appear as a key instead of None/null to adhere to the json spec.
+    data types are EASY_CHOICE or OPEN_CHOICE
    """

    if not feature in AVAILABLE_PROFILE_FEATURES:
@@ -100,28 +102,41 @@ def profile_distribution(course_id, feature):
        elif feature == 'level_of_education':
            raw_choices = UserProfile.LEVEL_OF_EDUCATION_CHOICES

-        # short name and display nae (full) of the choices.
+        # short name and display name (full) of the choices.
        choices = [(short, full)
                   for (short, full) in raw_choices] + [('no_data', 'No Data')]

+        def get_filter(feature, value):
+            """ Get the orm filter parameters for a feature. """
+            return {
+                'gender': {'user__profile__gender': value},
+                'level_of_education': {'user__profile__level_of_education': value},
+            }[feature]
+
+        def get_count(feature, value):
+            """ Get the count of enrolled students matching the feature value. """
+            return CourseEnrollment.objects.filter(
+                course_id=course_id,
+                **get_filter(feature, value)
+            ).count()
+
        distribution = {}
        for (short, full) in choices:
-            if feature == 'gender':
-                count = CourseEnrollment.objects.filter(
-                    course_id=course_id, user__profile__gender=short
-                ).count()
-            elif feature == 'level_of_education':
-                count = CourseEnrollment.objects.filter(
-                    course_id=course_id, user__profile__level_of_education=short
-                ).count()
-            distribution[short] = count
+            # handle no data case
+            if short == 'no_data':
+                distribution['no_data'] = 0
+                distribution['no_data'] += get_count(feature, None)
+                distribution['no_data'] += get_count(feature, '')
+            else:
+                distribution[short] = get_count(feature, short)

        prd.data = distribution
        prd.choices_display_names = dict(choices)
    elif feature in _OPEN_CHOICE_FEATURES:
        prd.type = 'OPEN_CHOICE'
        profiles = UserProfile.objects.filter(
-            user__courseenrollment__course_id=course_id)
+            user__courseenrollment__course_id=course_id
+        )
        query_distribution = profiles.values(
            feature).annotate(Count(feature)).order_by()
        # query_distribution is of the form [{'featureval': 'value1', 'featureval__count': 4},
@@ -133,9 +148,12 @@ def profile_distribution(course_id, feature):

        # change none to no_data for valid json key
        if None in distribution:
-            distribution['no_data'] = distribution.pop(None)
-            # django does not properly count NULL values, so the above will alwasy be 0.
-            # this correctly counts null values
+            # django does not properly count NULL values when using annotate Count
+            # so
+            #     distribution['no_data'] = distribution.pop(None)
+            # would always be 0.
+
+            # Correctly count null values
            distribution['no_data'] = profiles.filter(
                **{feature: None}
            ).count()
--- a/lms/djangoapps/analytics/tests/test_distributions.py
+++ b/lms/djangoapps/analytics/tests/test_distributions.py
@@ -19,7 +19,10 @@ class TestAnalyticsDistributions(TestCase):
            profile__year_of_birth=i + 1930
        ) for i in xrange(30)]

-        self.ces = [CourseEnrollment.objects.create(course_id=self.course_id, user=user) for user in self.users]
+        self.ces = [CourseEnrollment.objects.create(
+            course_id=self.course_id,
+            user=user
+        ) for user in self.users]

    @raises(ValueError)
    def test_profile_distribution_bad_feature(self):
@@ -59,12 +62,23 @@ class TestAnalyticsDistributionsNoData(TestCase):

        self.nodata_users = [UserFactory(
            profile__year_of_birth=None,
-        ) for _ in xrange(4)]
+            profile__gender=[None, ''][i % 2]
+        ) for i in xrange(4)]

        self.users += self.nodata_users

        self.ces = tuple(CourseEnrollment.objects.create(course_id=self.course_id, user=user) for user in self.users)

+    def test_profile_distribution_easy_choice_nodata(self):
+        feature = 'gender'
+        self.assertIn(feature, AVAILABLE_PROFILE_FEATURES)
+        distribution = profile_distribution(self.course_id, feature)
+        print distribution
+        self.assertEqual(distribution.type, 'EASY_CHOICE')
+        self.assertTrue(hasattr(distribution, 'choices_display_names'))
+        self.assertIn('no_data', distribution.data)
+        self.assertEqual(distribution.data['no_data'], len(self.nodata_users))
+
    def test_profile_distribution_open_choice_nodata(self):
        feature = 'year_of_birth'
        self.assertIn(feature, AVAILABLE_PROFILE_FEATURES)
--- a/lms/djangoapps/instructor/access.py
+++ b/lms/djangoapps/instructor/access.py
@@ -22,7 +22,7 @@ def list_with_level(course, level):
    """
    List users who have 'level' access.

-    level is in ['instructor', 'staff', 'beta'] for standard courses.
+    `level` is in ['instructor', 'staff', 'beta'] for standard courses.
    There could be other levels specific to the course.
    If there is no Group for that course-level, returns an empty list
    """
@@ -42,7 +42,7 @@ def allow_access(course, user, level):
    """
    Allow user access to course modification.

-    level is one of ['instructor', 'staff', 'beta']
+    `level` is one of ['instructor', 'staff', 'beta']
    """
    _change_access(course, user, level, 'allow')

@@ -51,7 +51,7 @@ def revoke_access(course, user, level):
    """
    Revoke access from user to course modification.

-    level is one of ['instructor', 'staff', 'beta']
+    `level` is one of ['instructor', 'staff', 'beta']
    """
    _change_access(course, user, level, 'revoke')

@@ -60,7 +60,7 @@ def _change_access(course, user, level, action):
    """
    Change access of user.

-    level is one of ['instructor', 'staff', 'beta']
+    `level` is one of ['instructor', 'staff', 'beta']
    action is one of ['allow', 'revoke']

    NOTE: will create a group if it does not yet exist.
--- a/lms/djangoapps/instructor/views/instructor_dashboard.py
+++ b/lms/djangoapps/instructor/views/instructor_dashboard.py
@@ -49,7 +49,6 @@ def instructor_dashboard_2(request, course_id):

    context = {
        'course': course,
-        # 'cohorts_ajax_url': reverse('cohorts', kwargs={'course_id': course_id}),
        'old_dashboard_url': reverse('instructor_dashboard', kwargs={'course_id': course_id}),
        'sections': sections,
    }
@@ -83,7 +82,9 @@ def _section_course_info(course_id):
    section_data['has_started'] = course.has_started()
    section_data['has_ended'] = course.has_ended()
    try:
-        section_data['grade_cutoffs'] = "" + reduce(lambda memo, (letter, score): "{}: {}, ".format(letter, score) + memo, course.grade_cutoffs.items(), "")[:-2] + ""
+        def next(memo, (letter, score)):
+            return "{}: {}, ".format(letter, score) + memo
+        section_data['grade_cutoffs'] = reduce(next, course.grade_cutoffs.items(), "")[:-2]
    except:
        section_data['grade_cutoffs'] = "Not Available"
    # section_data['offline_grades'] = offline_grades_available(course_id)
--- a/lms/envs/common.py
+++ b/lms/envs/common.py
@@ -156,9 +156,6 @@ MITX_FEATURES = {
    # Toggle to enable chat availability (configured on a per-course
    # basis in Studio)
    'ENABLE_CHAT': False,
-
-    # Enable instructor dash to submit background tasks
-    'ENABLE_INSTRUCTOR_BETA_DASHBOARD': False,
 }

 # Used for A/B testing