Files
edx-platform/lms/djangoapps/analytics/distributions.py
2013-08-02 09:52:18 -04:00

88 lines
3.2 KiB
Python

"""
Profile Distributions
"""
from django.db.models import Count
from student.models import CourseEnrollment, UserProfile
_EASY_CHOICE_FEATURES = ('gender', 'level_of_education')
_OPEN_CHOICE_FEATURES = ('year_of_birth',)
AVAILABLE_PROFILE_FEATURES = _EASY_CHOICE_FEATURES + _OPEN_CHOICE_FEATURES
def profile_distribution(course_id, feature):
"""
Retrieve distribution of students over a given feature.
feature is one of AVAILABLE_PROFILE_FEATURES.
Return a dictionary {
'type': 'SOME_TYPE',
'data': {'key': 'val'},
'display_names': {'key': 'displaynameval'}
}
display_names is only return for EASY_CHOICE type eatuers
note no_data instead of None to be compatible with the json spec.
data types e.g.
EASY_CHOICE - choices with a restricted domain, e.g. level_of_education
OPEN_CHOICE - choices with a larger domain e.g. year_of_birth
"""
feature_results = {}
if not feature in AVAILABLE_PROFILE_FEATURES:
raise ValueError(
"unsupported feature requested for distribution '{}'".format(
feature)
)
if feature in _EASY_CHOICE_FEATURES:
if feature == 'gender':
raw_choices = UserProfile.GENDER_CHOICES
elif feature == 'level_of_education':
raw_choices = UserProfile.LEVEL_OF_EDUCATION_CHOICES
choices = [(short, full)
for (short, full) in raw_choices] + [('no_data', 'No Data')]
data = {}
for (short, full) in choices:
if feature == 'gender':
count = CourseEnrollment.objects.filter(
course_id=course_id, user__profile__gender=short
).count()
elif feature == 'level_of_education':
count = CourseEnrollment.objects.filter(
course_id=course_id, user__profile__level_of_education=short
).count()
data[short] = count
feature_results['data'] = data
feature_results['type'] = 'EASY_CHOICE'
feature_results['display_names'] = dict(choices)
elif feature in _OPEN_CHOICE_FEATURES:
profiles = UserProfile.objects.filter(
user__courseenrollment__course_id=course_id)
query_distribution = profiles.values(
feature).annotate(Count(feature)).order_by()
# query_distribution is of the form [{'featureval': 'value1', 'featureval__count': 4},
# {'featureval': 'value2', 'featureval__count': 2}, ...]
distribution = dict((vald[feature], vald[feature + '__count'])
for vald in query_distribution)
# distribution is of the form {'value1': 4, 'value2': 2, ...}
# change none to no_data for valid json key
if None in distribution:
distribution['no_data'] = distribution.pop(None)
# django does not properly count NULL values, so the above will alwasy be 0.
# this correctly counts null values
distribution['no_data'] = profiles.filter(
**{feature: None}
).count()
feature_results['data'] = distribution
feature_results['type'] = 'OPEN_CHOICE'
return feature_results