From bffd9ac38d799e7adb447c018391c2734bbe9840 Mon Sep 17 00:00:00 2001
From: ichuang <ichuang@mit.edu>
Date: Mon, 10 Sep 2012 22:28:37 -0400
Subject: [PATCH] center histogram bars; pep8

---
 lms/djangoapps/psychometrics/psychoanalyze.py | 120 ++++++++++--------
 1 file changed, 66 insertions(+), 54 deletions(-)

diff --git a/lms/djangoapps/psychometrics/psychoanalyze.py b/lms/djangoapps/psychometrics/psychoanalyze.py
index bb2a6ba6a8..dd7d328278 100644
--- a/lms/djangoapps/psychometrics/psychoanalyze.py
+++ b/lms/djangoapps/psychometrics/psychoanalyze.py
@@ -1,7 +1,7 @@
 #
 # File:   psychometrics/psychoanalyze.py
 #
-# generate pyschometrics plots from PsychometricData 
+# generate pyschometrics plots from PsychometricData
 
 from __future__ import division
 
@@ -19,98 +19,108 @@ from xmodule.modulestore import Location
 
 log = logging.getLogger("mitx.psychometrics")
 
-#db = "ocwtutor"	# for debugging
+#db = "ocwtutor"        # for debugging
 #db = "default"
 
-db = getattr(settings,'DATABASE_FOR_PSYCHOMETRICS','default')
+db = getattr(settings, 'DATABASE_FOR_PSYCHOMETRICS', 'default')
 
 #-----------------------------------------------------------------------------
 # fit functions
 
-def func_2pl(x,a,b):
+
+def func_2pl(x, a, b):
     """
     2-parameter logistic function
     """
     D = 1.7
-    edax = np.exp(D*a*(x-b))
-    return edax / (1+edax)
+    edax = np.exp(D * a * (x - b))
+    return edax / (1 + edax)
 
 #-----------------------------------------------------------------------------
 # statistics class
 
+
 class StatVar(object):
     """
     Simple statistics on floating point numbers: avg, sdv, var, min, max
     """
-    def __init__(self,unit=1):
+    def __init__(self, unit=1):
         self.sum = 0
         self.sum2 = 0
         self.cnt = 0
         self.unit = unit
         self.min = None
         self.max = None
-    def add(self,x):
+
+    def add(self, x):
         if x is None:
             return
         if self.min is None:
             self.min = x
         else:
-            if x<self.min:
+            if x < self.min:
                 self.min = x
         if self.max is None:
             self.max = x
         else:
-            if x>self.max:
+            if x > self.max:
                 self.max = x
         self.sum += x
         self.sum2 += x**2
         self.cnt += 1
+
     def avg(self):
         if self.cnt is None:
             return 0
         return self.sum / 1.0 / self.cnt / self.unit
+
     def var(self):
         if self.cnt is None:
             return 0
         return (self.sum2 / 1.0 / self.cnt / (self.unit**2)) - (self.avg()**2)
+
     def sdv(self):
         v = self.var()
         if v>0:
             return math.sqrt(v)
         else:
             return 0
+
     def __str__(self):
-        return 'cnt=%d, avg=%f, sdv=%f' % (self.cnt,self.avg(),self.sdv())
-    def __add__(self,x):
+        return 'cnt=%d, avg=%f, sdv=%f' % (self.cnt, self.avg(), self.sdv())
+
+    def __add__(self, x):
         self.add(x)
         return self
 
 #-----------------------------------------------------------------------------
 # histogram generator
 
-def make_histogram(ydata,bins=None):
+
+def make_histogram(ydata, bins=None):
     '''
     Generate histogram of ydata using bins provided, or by default bins
     from 0 to 100 by 10.  bins should be ordered in increasing order.
-    
+
     returns dict with keys being bins, and values being counts.
     special: hist['bins'] = bins
     '''
     if bins is None:
-        bins = range(0,100,10)
-        
+        bins = range(0, 100, 10)
+
     nbins = len(bins)
-    hist = dict(zip(bins,[0] * nbins))
+    hist = dict(zip(bins, [0] * nbins))
     for y in ydata:
-        for b in bins[::-1]:	# in reverse order
+        for b in bins[::-1]:    # in reverse order
             if y>b:
                 hist[b] += 1
                 break
     # hist['bins'] = bins
     return hist
-    
+
 #-----------------------------------------------------------------------------
 
+
 def problems_with_psychometric_data(course_id):
     '''
     Return dict of {problems (location urls): count} for which psychometric data is available.
@@ -118,36 +128,37 @@ def problems_with_psychometric_data(course_id):
     '''
     pmdset = PsychometricData.objects.using(db).filter(studentmodule__course_id=course_id)
     plist = [p['studentmodule__module_state_key'] for p in pmdset.values('studentmodule__module_state_key').distinct()]
-    problems = dict( (p,pmdset.filter(studentmodule__module_state_key=p).count()) for p in plist )
+    problems = dict( (p, pmdset.filter(studentmodule__module_state_key=p).count()) for p in plist )
 
     return problems
 
 #-----------------------------------------------------------------------------
 
+
 def generate_plots_for_problem(problem):
-    
+
     pmdset = PsychometricData.objects.using(db).filter(studentmodule__module_state_key=problem)
     nstudents = pmdset.count()
     msg = ""
     plots = []
 
     if nstudents < 2:
-        msg += "%s nstudents=%d --> skipping, too few" % (problem,nstudents)
+        msg += "%s nstudents=%d --> skipping, too few" % (problem, nstudents)
         return msg, plots
 
     max_grade = pmdset[0].studentmodule.max_grade
 
     agdat = pmdset.aggregate(Sum('attempts'), Max('attempts'))
     max_attempts = agdat['attempts__max']
-    total_attempts = agdat['attempts__sum']	# not used yet
+    total_attempts = agdat['attempts__sum']     # not used yet
 
     msg += "max attempts = %d" % max_attempts
 
-    xdat = range(1,max_attempts+1)
+    xdat = range(1, max_attempts + 1)
     dataset = {'xdat': xdat}
 
     # compute grade statistics
-    grades = [pmd.studentmodule.grade for pmd in pmdset]    
+    grades = [pmd.studentmodule.grade for pmd in pmdset]
     gsv = StatVar()
     for g in grades:
         gsv += g
@@ -171,14 +182,14 @@ def generate_plots_for_problem(problem):
         max_grade = gsv.max
 
     if max_grade > 1:
-        ghist = make_histogram(grades, np.linspace(0,max_grade,max_grade+1))
+        ghist = make_histogram(grades, np.linspace(0, max_grade, max_grade + 1))
         ghist_json = json.dumps(ghist.items())
 
         plot = {'title': "Grade histogram for %s" % problem,
                 'id': 'histogram',
                 'info': '',
                 'data': "var dhist = %s;\n" % ghist_json,
-                'cmd': "[ {data: dhist, bars: { show: true }} ], %s" % axisopts,
+                'cmd': '[ {data: dhist, bars: { show: true, align: "center" }} ], %s' % axisopts,
                 }
         plots.append(plot)
     else:
@@ -186,27 +197,27 @@ def generate_plots_for_problem(problem):
 
     # histogram of time differences between checks
     # Warning: this is inefficient - doesn't scale to large numbers of students
-    dtset = []	# time differences in minutes
+    dtset = []  # time differences in minutes
     dtsv = StatVar()
     for pmd in pmdset:
         try:
-            checktimes = eval(pmd.checktimes)		        # update log of attempt timestamps
+            checktimes = eval(pmd.checktimes)                   # update log of attempt timestamps
         except:
             continue
-        if len(checktimes)<2:
+        if len(checktimes) < 2:
             continue
         ct0 = checktimes[0]
         for ct in checktimes[1:]:
-            dt = (ct-ct0).total_seconds()/60.0
-            if dt<20:			# ignore if dt too long
+            dt = (ct - ct0).total_seconds() / 60.0
+            if dt < 20:                   # ignore if dt too long
                 dtset.append(dt)
                 dtsv += dt
             ct0 = ct
     if dtsv.cnt > 2:
         msg += "<br/><p><font color='brown'>Time differences between checks: %s</font></p>" % dtsv
-        bins = np.linspace(0,1.5*dtsv.sdv(),30)
-        dbar = bins[1]-bins[0]
-        thist = make_histogram(dtset,bins)
+        bins = np.linspace(0, 1.5 * dtsv.sdv(), 30)
+        dbar = bins[1] - bins[0]
+        thist = make_histogram(dtset, bins)
         thist_json = json.dumps(sorted(thist.items(), key=lambda(x): x[0]))
 
         axisopts = """{ xaxes: [{ axisLabel: 'Time (min)'}], yaxes: [{position: 'left',axisLabel: 'Count'}]}"""
@@ -215,33 +226,33 @@ def generate_plots_for_problem(problem):
                 'id': 'thistogram',
                 'info': '',
                 'data': "var thist = %s;\n" % thist_json,
-                'cmd': "[ {data: thist, bars: { show: true, barWidth:%f }} ], %s" % (dbar, axisopts),
+                'cmd': '[ {data: thist, bars: { show: true, align: "center", barWidth:%f }} ], %s' % (dbar, axisopts),
                 }
         plots.append(plot)
 
     # one IRT plot curve for each grade received (TODO: this assumes integer grades)
-    for grade in range(1,int(max_grade)+1):
+    for grade in range(1, int(max_grade) + 1):
         yset = {}
         gset = pmdset.filter(studentmodule__grade=grade)
         ngset = gset.count()
-        if ngset==0:
+        if ngset == 0:
             continue
         ydat = []
         ylast = 0
         for x in xdat:
-            y = gset.filter(attempts=x).count()/ngset
+            y = gset.filter(attempts=x).count() / ngset
             ydat.append( y + ylast )
             ylast = y + ylast
         yset['ydat'] = ydat
 
-        if len(ydat)>3:		# try to fit to logistic function if enough data points
-            cfp = curve_fit(func_2pl, xdat, ydat, [1.0, max_attempts/2.0])
+        if len(ydat) > 3:         # try to fit to logistic function if enough data points
+            cfp = curve_fit(func_2pl, xdat, ydat, [1.0, max_attempts / 2.0])
             yset['fitparam'] = cfp
-            yset['fitpts'] = func_2pl(np.array(xdat),*cfp[0])
-            yset['fiterr'] = [yd-yf for (yd,yf) in zip(ydat,yset['fitpts'])]
-            fitx = np.linspace(xdat[0],xdat[-1],100)
+            yset['fitpts'] = func_2pl(np.array(xdat), *cfp[0])
+            yset['fiterr'] = [yd - yf for (yd, yf) in zip(ydat, yset['fitpts'])]
+            fitx = np.linspace(xdat[0], xdat[-1], 100)
             yset['fitx'] = fitx
-            yset['fity'] = func_2pl(np.array(fitx),*cfp[0])
+            yset['fity'] = func_2pl(np.array(fitx), *cfp[0])
 
         dataset['grade_%d' % grade] = yset
 
@@ -257,27 +268,27 @@ def generate_plots_for_problem(problem):
          }"""
 
     # generate points for flot plot
-    for grade in range(1,int(max_grade)+1):
+    for grade in range(1, int(max_grade) + 1):
         jsdata = ""
         jsplots = []
         gkey = 'grade_%d' % grade
         if gkey in dataset:
             yset = dataset[gkey]
-            jsdata += "var d%d = %s;\n" % (grade,json.dumps(zip(xdat,yset['ydat'])))
+            jsdata += "var d%d = %s;\n" % (grade, json.dumps(zip(xdat, yset['ydat'])))
             jsplots.append('{ data: d%d, lines: { show: false }, points: { show: true}, color: "red" }' % grade)
             if 'fitpts' in yset:
-                jsdata += 'var fit = %s;\n' % (json.dumps(zip(yset['fitx'],yset['fity'])))
+                jsdata += 'var fit = %s;\n' % (json.dumps(zip(yset['fitx'], yset['fity'])))
                 jsplots.append('{ data: fit,  lines: { show: true }, color: "blue" }')
-                (a,b) = yset['fitparam'][0]
-                irtinfo = "(2PL: D=1.7, a=%6.3f, b=%6.3f)" % (a,b)
+                (a, b) = yset['fitparam'][0]
+                irtinfo = "(2PL: D=1.7, a=%6.3f, b=%6.3f)" % (a, b)
             else:
                 irtinfo = ""
 
-            plots.append({'title': 'IRT Plot for grade=%s %s' % (grade,irtinfo),
+            plots.append({'title': 'IRT Plot for grade=%s %s' % (grade, irtinfo),
                           'id': "irt%s" % grade,
                           'info': '',
                           'data': jsdata,
-                          'cmd' : '[%s], %s' % (','.join(jsplots), axisopts),
+                          'cmd': '[%s], %s' % (','.join(jsplots), axisopts),
                 })
 
     #log.debug('plots = %s' % plots)
@@ -285,6 +296,7 @@ def generate_plots_for_problem(problem):
 
 #-----------------------------------------------------------------------------
 
+
 def make_psychometrics_data_update_handler(studentmodule):
     """
     Construct and return a procedure which may be called to update
@@ -307,13 +319,13 @@ def make_psychometrics_data_update_handler(studentmodule):
             state = json.loads(sm.state)
             done = state['done']
         except:
-            log.exception("Oops, failed to eval state for %s (state=%s)" % (sm,sm.state))
+            log.exception("Oops, failed to eval state for %s (state=%s)" % (sm, sm.state))
             return
 
         pmd.done = done
         pmd.attempts = state['attempts']
         try:
-            checktimes = eval(pmd.checktimes)		        # update log of attempt timestamps
+            checktimes = eval(pmd.checktimes)                   # update log of attempt timestamps
         except:
             checktimes = []
         checktimes.append(datetime.datetime.now())