From c89031b0772a0e25267f44015959b94b851cd861 Mon Sep 17 00:00:00 2001
From: Victor Shnayder <victor@mitx.mit.edu>
Date: Tue, 4 Sep 2012 15:39:10 -0400
Subject: [PATCH 1/3] Export of answer distibutions to csv

- go through all students, all of their problems, save count for each answer
- return csv
- url exists, but no links to it yet
- Will need to integrate with Ike's new dashboard code
---
 lms/djangoapps/courseware/grades.py | 70 +++++++++++++++++++++++++++++
 lms/djangoapps/courseware/views.py  | 31 +++++++++++--
 lms/urls.py                         |  3 ++
 3 files changed, 100 insertions(+), 4 deletions(-)

diff --git a/lms/djangoapps/courseware/grades.py b/lms/djangoapps/courseware/grades.py
index f32da532df..eeaf15d093 100644
--- a/lms/djangoapps/courseware/grades.py
+++ b/lms/djangoapps/courseware/grades.py
@@ -4,11 +4,14 @@ from __future__ import division
 import random
 import logging
 
+from collections import defaultdict
 from django.conf import settings
+from django.contrib.auth.models import User
 
 from models import StudentModuleCache
 from module_render import get_module, get_instance_module
 from xmodule import graders
+from xmodule.capa_module import CapaModule
 from xmodule.course_module import CourseDescriptor
 from xmodule.graders import Score
 from models import StudentModule
@@ -24,6 +27,73 @@ def yield_module_descendents(module):
         stack.extend( next_module.get_display_items() )
         yield next_module
 
+def yield_problems(request, course, student):
+    """
+    Return an iterator over capa_modules that this student has
+    potentially answered.  (all that student has answered will definitely be in
+    the list, but there may be others as well).
+    """
+    grading_context = course.grading_context
+    student_module_cache = StudentModuleCache(course.id, student, grading_context['all_descriptors'])
+
+    for section_format, sections in grading_context['graded_sections'].iteritems():
+        for section in sections:
+
+            section_descriptor = section['section_descriptor']
+
+            # If the student hasn't seen a single problem in the section, skip it.
+            skip = True
+            for moduledescriptor in section['xmoduledescriptors']:
+                if student_module_cache.lookup(
+                        course.id, moduledescriptor.category, moduledescriptor.location.url()):
+                    skip = False
+                    break
+
+            if skip:
+                continue
+
+            section_module = get_module(student, request,
+                                        section_descriptor.location, student_module_cache,
+                                        course.id)
+            if section_module is None:
+                # student doesn't have access to this module, or something else
+                # went wrong.
+                log.debug("couldn't get module for student {0} for section location {1}"
+                          .format(student.username, section_descriptor.location))
+                continue
+
+            for problem in yield_module_descendents(section_module):
+                if isinstance(problem, CapaModule):
+                    yield problem
+
+def answer_distributions(request, course):
+    """
+    Given a course_descriptor, compute frequencies of answers for each problem:
+
+    Format is:
+
+    dict: (problem url_name, problem display_name, problem_id) -> (dict : answer ->  count)
+
+    TODO (vshnayder): this is currently doing a full linear pass through all
+    students and all problems.  This will be just a little slow.
+    """
+
+    counts = defaultdict(lambda: defaultdict(int))
+
+    enrolled_students = User.objects.filter(courseenrollment__course_id=course.id)
+
+    for student in enrolled_students:
+        for capa_module in yield_problems(request, course, student):
+            log.debug("looking at problem {0} for {1}. answers {2}".format(
+                capa_module.display_name, student.username, capa_module.lcp.student_answers))
+            for problem_id in capa_module.lcp.student_answers:
+                answer = capa_module.lcp.student_answers[problem_id]
+                key = (capa_module.url_name, capa_module.display_name, problem_id)
+                counts[key][answer] += 1
+
+    return counts
+
+
 def grade(student, request, course, student_module_cache=None, keep_raw_scores=False):
     """
     This grades a student as quickly as possible. It retuns the
diff --git a/lms/djangoapps/courseware/views.py b/lms/djangoapps/courseware/views.py
index bf361937e7..ba39d7d545 100644
--- a/lms/djangoapps/courseware/views.py
+++ b/lms/djangoapps/courseware/views.py
@@ -1,7 +1,9 @@
+import csv
 import json
 import logging
 import urllib
 import itertools
+import StringIO
 
 from functools import partial
 
@@ -219,9 +221,9 @@ def jump_to(request, course_id, location):
 
     # Rely on index to do all error handling and access control.
     return redirect('courseware_position',
-                    course_id=course_id, 
-                    chapter=chapter, 
-                    section=section, 
+                    course_id=course_id,
+                    chapter=chapter,
+                    section=section,
                     position=position)
 @ensure_csrf_cookie
 def course_info(request, course_id):
@@ -342,7 +344,7 @@ def progress(request, course_id, student_id=None):
     # NOTE: To make sure impersonation by instructor works, use
     # student instead of request.user in the rest of the function.
 
-    # The pre-fetching of groups is done to make auth checks not require an 
+    # The pre-fetching of groups is done to make auth checks not require an
     # additional DB lookup (this kills the Progress page in particular).
     student = User.objects.prefetch_related("groups").get(id=student.id)
 
@@ -370,3 +372,24 @@ def progress(request, course_id, student_id=None):
 
 
 
+@cache_control(no_cache=True, no_store=True, must_revalidate=True)
+def answers_export(request, course_id):
+    """
+    Export the distribution of student answers to all problems as a csv file.
+
+    - only displayed to course staff
+    """
+    course = get_course_with_access(request.user, course_id, 'staff')
+
+    dist = grades.answer_distributions(request, course)
+
+    response = HttpResponse(mimetype='text/csv')
+    response['Content-Disposition'] = 'attachment; filename=%s' % "answer_distribution.csv"
+
+    writer = csv.writer(response)
+    for (url_name, display_name, answer_id), answers in dist.items():
+        # HEADER? 
+        for a in answers:
+            writer.writerow([url_name, display_name, answer_id, a, answers[a]])
+
+    return response
diff --git a/lms/urls.py b/lms/urls.py
index 26aa10a3f4..c58b44d2ba 100644
--- a/lms/urls.py
+++ b/lms/urls.py
@@ -161,6 +161,9 @@ if settings.COURSEWARE_ENABLED:
             'instructor.views.grade_summary', name='grade_summary'),
         url(r'^courses/(?P<course_id>[^/]+/[^/]+/[^/]+)/enroll_students$',
             'instructor.views.enroll_students', name='enroll_students'),
+        url(r'^courses/(?P<course_id>[^/]+/[^/]+/[^/]+)/answers_export$',
+            'courseware.views.answers_export', name='answers_export'),
+
     )
 
     # discussion forums live within courseware, so courseware must be enabled first

From f04cd838e591ee3112816a1bdf32baf4d9c6567b Mon Sep 17 00:00:00 2001
From: Victor Shnayder <victor@mitx.mit.edu>
Date: Wed, 5 Sep 2012 13:26:06 -0400
Subject: [PATCH 2/3] Integrate csv export of answer distributions with Ike's
 sweet dashboard

---
 lms/djangoapps/courseware/views.py            | 23 -------------
 lms/djangoapps/instructor/views.py            | 34 ++++++++++++++++---
 .../courseware/instructor_dashboard.html      |  3 ++
 lms/urls.py                                   |  7 ++--
 4 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/lms/djangoapps/courseware/views.py b/lms/djangoapps/courseware/views.py
index ba39d7d545..60279d34c9 100644
--- a/lms/djangoapps/courseware/views.py
+++ b/lms/djangoapps/courseware/views.py
@@ -370,26 +370,3 @@ def progress(request, course_id, student_id=None):
 
     return render_to_response('courseware/progress.html', context)
 
-
-
-@cache_control(no_cache=True, no_store=True, must_revalidate=True)
-def answers_export(request, course_id):
-    """
-    Export the distribution of student answers to all problems as a csv file.
-
-    - only displayed to course staff
-    """
-    course = get_course_with_access(request.user, course_id, 'staff')
-
-    dist = grades.answer_distributions(request, course)
-
-    response = HttpResponse(mimetype='text/csv')
-    response['Content-Disposition'] = 'attachment; filename=%s' % "answer_distribution.csv"
-
-    writer = csv.writer(response)
-    for (url_name, display_name, answer_id), answers in dist.items():
-        # HEADER? 
-        for a in answers:
-            writer.writerow([url_name, display_name, answer_id, a, answers[a]])
-
-    return response
diff --git a/lms/djangoapps/instructor/views.py b/lms/djangoapps/instructor/views.py
index 92b2401216..0c36236021 100644
--- a/lms/djangoapps/instructor/views.py
+++ b/lms/djangoapps/instructor/views.py
@@ -48,7 +48,7 @@ def instructor_dashboard(request, course_id):
     """Display the instructor dashboard for a course."""
     course = get_course_with_access(request.user, course_id, 'staff')
 
-    instructor_access = has_access(request.user, course, 'instructor')		# an instructor can manage staff lists
+    instructor_access = has_access(request.user, course, 'instructor')   # an instructor can manage staff lists
 
     msg = ''
     # msg += ('POST=%s' % dict(request.POST)).replace('<','&lt;')
@@ -99,7 +99,7 @@ def instructor_dashboard(request, course_id):
             msg += "git pull on %s:<p>" % data_dir
             msg += "<pre>%s</pre></p>" % escape(os.popen(cmd).read())
             track.views.server_track(request, 'git pull %s' % data_dir, {}, page='idashboard')
-            
+
     if 'Reload course' in action:
         log.debug('reloading %s (%s)' % (course_id, course))
         try:
@@ -144,6 +144,10 @@ def instructor_dashboard(request, course_id):
         return return_csv('grades_%s_raw.csv' % course_id,
                           get_student_grade_summary_data(request, course, course_id, get_raw_scores=True))
 
+    elif 'Download CSV of answer distributions' in action:
+        track.views.server_track(request, 'dump-answer-dist-csv', {}, page='idashboard')
+        return return_csv('answer_dist_%s.csv' % course_id, get_answers_distribution(request, course_id))
+
     elif 'List course staff' in action:
         group = get_staff_group(course)
         msg += 'Staff group = %s' % group.name
@@ -290,7 +294,7 @@ def grade_summary(request, course_id):
 @ensure_csrf_cookie
 @cache_control(no_cache=True, no_store=True, must_revalidate=True)
 def enroll_students(request, course_id):
-    ''' Allows a staff member to enroll students in a course.
+    """Allows a staff member to enroll students in a course.
 
     This is a short-term hack for Berkeley courses launching fall
     2012. In the long term, we would like functionality like this, but
@@ -300,7 +304,7 @@ def enroll_students(request, course_id):
 
     It is poorly written and poorly tested, but it's designed to be
     stripped out.
-    '''
+    """
 
     course = get_course_with_access(request.user, course_id, 'staff')
     existing_students = [ce.user.email for ce in CourseEnrollment.objects.filter(course_id=course_id)]
@@ -328,6 +332,28 @@ def enroll_students(request, course_id):
                                                        'rejected_students': rejected_students,
                                                        'debug': new_students})
 
+
+def get_answers_distribution(request, course_id):
+    """
+    Get the distribution of answers for all graded problems in the course.
+
+    Return a dict with two keys:
+    'header': a header row
+    'data': a list of rows
+    """
+    course = get_course_with_access(request.user, course_id, 'staff')
+
+    dist = grades.answer_distributions(request, course)
+
+    d = {}
+    d['header'] = ['url_name', 'display name', 'answer id', 'answer', 'count']
+
+    d['data'] = [[url_name, display_name, answer_id, a, answers[a]]
+                 for (url_name, display_name, answer_id), answers in dist.items()
+                 for a in answers]
+    return d
+
+
 #-----------------------------------------------------------------------------
 
 
diff --git a/lms/templates/courseware/instructor_dashboard.html b/lms/templates/courseware/instructor_dashboard.html
index 29397e5c41..930ec7ef88 100644
--- a/lms/templates/courseware/instructor_dashboard.html
+++ b/lms/templates/courseware/instructor_dashboard.html
@@ -58,6 +58,9 @@ table.stat_table td {
     <input type="submit" name="action" value="Dump all RAW grades for all students in this course">
     <input type="submit" name="action" value="Download CSV of all RAW grades">
 
+    <p>
+    <input type="submit" name="action" value="Download CSV of answer distributions">
+
 %if instructor_access:
     <hr width="40%" style="align:left">
     <p>
diff --git a/lms/urls.py b/lms/urls.py
index c58b44d2ba..21d434272e 100644
--- a/lms/urls.py
+++ b/lms/urls.py
@@ -161,9 +161,6 @@ if settings.COURSEWARE_ENABLED:
             'instructor.views.grade_summary', name='grade_summary'),
         url(r'^courses/(?P<course_id>[^/]+/[^/]+/[^/]+)/enroll_students$',
             'instructor.views.enroll_students', name='enroll_students'),
-        url(r'^courses/(?P<course_id>[^/]+/[^/]+/[^/]+)/answers_export$',
-            'courseware.views.answers_export', name='answers_export'),
-
     )
 
     # discussion forums live within courseware, so courseware must be enabled first
@@ -200,8 +197,8 @@ if settings.WIKI_ENABLED:
     )
 
 if settings.QUICKEDIT:
-	urlpatterns += (url(r'^quickedit/(?P<id>[^/]*)$', 'dogfood.views.quickedit'),)
-	urlpatterns += (url(r'^dogfood/(?P<id>[^/]*)$', 'dogfood.views.df_capa_problem'),)
+    urlpatterns += (url(r'^quickedit/(?P<id>[^/]*)$', 'dogfood.views.quickedit'),)
+    urlpatterns += (url(r'^dogfood/(?P<id>[^/]*)$', 'dogfood.views.df_capa_problem'),)
 
 if settings.ASKBOT_ENABLED:
     urlpatterns += (url(r'^%s' % settings.ASKBOT_URL, include('askbot.urls')), \

From a8cd4633c0dd381e35b354507640734287519ef8 Mon Sep 17 00:00:00 2001
From: Victor Shnayder <victor@mitx.mit.edu>
Date: Wed, 5 Sep 2012 13:46:15 -0400
Subject: [PATCH 3/3] remove debugging statements

---
 lms/djangoapps/courseware/grades.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/lms/djangoapps/courseware/grades.py b/lms/djangoapps/courseware/grades.py
index eeaf15d093..e7e5b0d9aa 100644
--- a/lms/djangoapps/courseware/grades.py
+++ b/lms/djangoapps/courseware/grades.py
@@ -58,8 +58,8 @@ def yield_problems(request, course, student):
             if section_module is None:
                 # student doesn't have access to this module, or something else
                 # went wrong.
-                log.debug("couldn't get module for student {0} for section location {1}"
-                          .format(student.username, section_descriptor.location))
+                # log.debug("couldn't get module for student {0} for section location {1}"
+                #           .format(student.username, section_descriptor.location))
                 continue
 
             for problem in yield_module_descendents(section_module):
@@ -84,8 +84,6 @@ def answer_distributions(request, course):
 
     for student in enrolled_students:
         for capa_module in yield_problems(request, course, student):
-            log.debug("looking at problem {0} for {1}. answers {2}".format(
-                capa_module.display_name, student.username, capa_module.lcp.student_answers))
             for problem_id in capa_module.lcp.student_answers:
                 answer = capa_module.lcp.student_answers[problem_id]
                 key = (capa_module.url_name, capa_module.display_name, problem_id)