From bffd9ac38d799e7adb447c018391c2734bbe9840 Mon Sep 17 00:00:00 2001 From: ichuang Date: Mon, 10 Sep 2012 22:28:37 -0400 Subject: [PATCH] center histogram bars; pep8 --- lms/djangoapps/psychometrics/psychoanalyze.py | 120 ++++++++++-------- 1 file changed, 66 insertions(+), 54 deletions(-) diff --git a/lms/djangoapps/psychometrics/psychoanalyze.py b/lms/djangoapps/psychometrics/psychoanalyze.py index bb2a6ba6a8..dd7d328278 100644 --- a/lms/djangoapps/psychometrics/psychoanalyze.py +++ b/lms/djangoapps/psychometrics/psychoanalyze.py @@ -1,7 +1,7 @@ # # File: psychometrics/psychoanalyze.py # -# generate pyschometrics plots from PsychometricData +# generate pyschometrics plots from PsychometricData from __future__ import division @@ -19,98 +19,108 @@ from xmodule.modulestore import Location log = logging.getLogger("mitx.psychometrics") -#db = "ocwtutor" # for debugging +#db = "ocwtutor" # for debugging #db = "default" -db = getattr(settings,'DATABASE_FOR_PSYCHOMETRICS','default') +db = getattr(settings, 'DATABASE_FOR_PSYCHOMETRICS', 'default') #----------------------------------------------------------------------------- # fit functions -def func_2pl(x,a,b): + +def func_2pl(x, a, b): """ 2-parameter logistic function """ D = 1.7 - edax = np.exp(D*a*(x-b)) - return edax / (1+edax) + edax = np.exp(D * a * (x - b)) + return edax / (1 + edax) #----------------------------------------------------------------------------- # statistics class + class StatVar(object): """ Simple statistics on floating point numbers: avg, sdv, var, min, max """ - def __init__(self,unit=1): + def __init__(self, unit=1): self.sum = 0 self.sum2 = 0 self.cnt = 0 self.unit = unit self.min = None self.max = None - def add(self,x): + + def add(self, x): if x is None: return if self.min is None: self.min = x else: - if xself.max: + if x > self.max: self.max = x self.sum += x self.sum2 += x**2 self.cnt += 1 + def avg(self): if self.cnt is None: return 0 return self.sum / 1.0 / self.cnt / self.unit + def var(self): if self.cnt is None: return 0 return (self.sum2 / 1.0 / self.cnt / (self.unit**2)) - (self.avg()**2) + def sdv(self): v = self.var() if v>0: return math.sqrt(v) else: return 0 + def __str__(self): - return 'cnt=%d, avg=%f, sdv=%f' % (self.cnt,self.avg(),self.sdv()) - def __add__(self,x): + return 'cnt=%d, avg=%f, sdv=%f' % (self.cnt, self.avg(), self.sdv()) + + def __add__(self, x): self.add(x) return self #----------------------------------------------------------------------------- # histogram generator -def make_histogram(ydata,bins=None): + +def make_histogram(ydata, bins=None): ''' Generate histogram of ydata using bins provided, or by default bins from 0 to 100 by 10. bins should be ordered in increasing order. - + returns dict with keys being bins, and values being counts. special: hist['bins'] = bins ''' if bins is None: - bins = range(0,100,10) - + bins = range(0, 100, 10) + nbins = len(bins) - hist = dict(zip(bins,[0] * nbins)) + hist = dict(zip(bins, [0] * nbins)) for y in ydata: - for b in bins[::-1]: # in reverse order + for b in bins[::-1]: # in reverse order if y>b: hist[b] += 1 break # hist['bins'] = bins return hist - + #----------------------------------------------------------------------------- + def problems_with_psychometric_data(course_id): ''' Return dict of {problems (location urls): count} for which psychometric data is available. @@ -118,36 +128,37 @@ def problems_with_psychometric_data(course_id): ''' pmdset = PsychometricData.objects.using(db).filter(studentmodule__course_id=course_id) plist = [p['studentmodule__module_state_key'] for p in pmdset.values('studentmodule__module_state_key').distinct()] - problems = dict( (p,pmdset.filter(studentmodule__module_state_key=p).count()) for p in plist ) + problems = dict( (p, pmdset.filter(studentmodule__module_state_key=p).count()) for p in plist ) return problems #----------------------------------------------------------------------------- + def generate_plots_for_problem(problem): - + pmdset = PsychometricData.objects.using(db).filter(studentmodule__module_state_key=problem) nstudents = pmdset.count() msg = "" plots = [] if nstudents < 2: - msg += "%s nstudents=%d --> skipping, too few" % (problem,nstudents) + msg += "%s nstudents=%d --> skipping, too few" % (problem, nstudents) return msg, plots max_grade = pmdset[0].studentmodule.max_grade agdat = pmdset.aggregate(Sum('attempts'), Max('attempts')) max_attempts = agdat['attempts__max'] - total_attempts = agdat['attempts__sum'] # not used yet + total_attempts = agdat['attempts__sum'] # not used yet msg += "max attempts = %d" % max_attempts - xdat = range(1,max_attempts+1) + xdat = range(1, max_attempts + 1) dataset = {'xdat': xdat} # compute grade statistics - grades = [pmd.studentmodule.grade for pmd in pmdset] + grades = [pmd.studentmodule.grade for pmd in pmdset] gsv = StatVar() for g in grades: gsv += g @@ -171,14 +182,14 @@ def generate_plots_for_problem(problem): max_grade = gsv.max if max_grade > 1: - ghist = make_histogram(grades, np.linspace(0,max_grade,max_grade+1)) + ghist = make_histogram(grades, np.linspace(0, max_grade, max_grade + 1)) ghist_json = json.dumps(ghist.items()) plot = {'title': "Grade histogram for %s" % problem, 'id': 'histogram', 'info': '', 'data': "var dhist = %s;\n" % ghist_json, - 'cmd': "[ {data: dhist, bars: { show: true }} ], %s" % axisopts, + 'cmd': '[ {data: dhist, bars: { show: true, align: "center" }} ], %s' % axisopts, } plots.append(plot) else: @@ -186,27 +197,27 @@ def generate_plots_for_problem(problem): # histogram of time differences between checks # Warning: this is inefficient - doesn't scale to large numbers of students - dtset = [] # time differences in minutes + dtset = [] # time differences in minutes dtsv = StatVar() for pmd in pmdset: try: - checktimes = eval(pmd.checktimes) # update log of attempt timestamps + checktimes = eval(pmd.checktimes) # update log of attempt timestamps except: continue - if len(checktimes)<2: + if len(checktimes) < 2: continue ct0 = checktimes[0] for ct in checktimes[1:]: - dt = (ct-ct0).total_seconds()/60.0 - if dt<20: # ignore if dt too long + dt = (ct - ct0).total_seconds() / 60.0 + if dt < 20: # ignore if dt too long dtset.append(dt) dtsv += dt ct0 = ct if dtsv.cnt > 2: msg += "

Time differences between checks: %s

" % dtsv - bins = np.linspace(0,1.5*dtsv.sdv(),30) - dbar = bins[1]-bins[0] - thist = make_histogram(dtset,bins) + bins = np.linspace(0, 1.5 * dtsv.sdv(), 30) + dbar = bins[1] - bins[0] + thist = make_histogram(dtset, bins) thist_json = json.dumps(sorted(thist.items(), key=lambda(x): x[0])) axisopts = """{ xaxes: [{ axisLabel: 'Time (min)'}], yaxes: [{position: 'left',axisLabel: 'Count'}]}""" @@ -215,33 +226,33 @@ def generate_plots_for_problem(problem): 'id': 'thistogram', 'info': '', 'data': "var thist = %s;\n" % thist_json, - 'cmd': "[ {data: thist, bars: { show: true, barWidth:%f }} ], %s" % (dbar, axisopts), + 'cmd': '[ {data: thist, bars: { show: true, align: "center", barWidth:%f }} ], %s' % (dbar, axisopts), } plots.append(plot) # one IRT plot curve for each grade received (TODO: this assumes integer grades) - for grade in range(1,int(max_grade)+1): + for grade in range(1, int(max_grade) + 1): yset = {} gset = pmdset.filter(studentmodule__grade=grade) ngset = gset.count() - if ngset==0: + if ngset == 0: continue ydat = [] ylast = 0 for x in xdat: - y = gset.filter(attempts=x).count()/ngset + y = gset.filter(attempts=x).count() / ngset ydat.append( y + ylast ) ylast = y + ylast yset['ydat'] = ydat - if len(ydat)>3: # try to fit to logistic function if enough data points - cfp = curve_fit(func_2pl, xdat, ydat, [1.0, max_attempts/2.0]) + if len(ydat) > 3: # try to fit to logistic function if enough data points + cfp = curve_fit(func_2pl, xdat, ydat, [1.0, max_attempts / 2.0]) yset['fitparam'] = cfp - yset['fitpts'] = func_2pl(np.array(xdat),*cfp[0]) - yset['fiterr'] = [yd-yf for (yd,yf) in zip(ydat,yset['fitpts'])] - fitx = np.linspace(xdat[0],xdat[-1],100) + yset['fitpts'] = func_2pl(np.array(xdat), *cfp[0]) + yset['fiterr'] = [yd - yf for (yd, yf) in zip(ydat, yset['fitpts'])] + fitx = np.linspace(xdat[0], xdat[-1], 100) yset['fitx'] = fitx - yset['fity'] = func_2pl(np.array(fitx),*cfp[0]) + yset['fity'] = func_2pl(np.array(fitx), *cfp[0]) dataset['grade_%d' % grade] = yset @@ -257,27 +268,27 @@ def generate_plots_for_problem(problem): }""" # generate points for flot plot - for grade in range(1,int(max_grade)+1): + for grade in range(1, int(max_grade) + 1): jsdata = "" jsplots = [] gkey = 'grade_%d' % grade if gkey in dataset: yset = dataset[gkey] - jsdata += "var d%d = %s;\n" % (grade,json.dumps(zip(xdat,yset['ydat']))) + jsdata += "var d%d = %s;\n" % (grade, json.dumps(zip(xdat, yset['ydat']))) jsplots.append('{ data: d%d, lines: { show: false }, points: { show: true}, color: "red" }' % grade) if 'fitpts' in yset: - jsdata += 'var fit = %s;\n' % (json.dumps(zip(yset['fitx'],yset['fity']))) + jsdata += 'var fit = %s;\n' % (json.dumps(zip(yset['fitx'], yset['fity']))) jsplots.append('{ data: fit, lines: { show: true }, color: "blue" }') - (a,b) = yset['fitparam'][0] - irtinfo = "(2PL: D=1.7, a=%6.3f, b=%6.3f)" % (a,b) + (a, b) = yset['fitparam'][0] + irtinfo = "(2PL: D=1.7, a=%6.3f, b=%6.3f)" % (a, b) else: irtinfo = "" - plots.append({'title': 'IRT Plot for grade=%s %s' % (grade,irtinfo), + plots.append({'title': 'IRT Plot for grade=%s %s' % (grade, irtinfo), 'id': "irt%s" % grade, 'info': '', 'data': jsdata, - 'cmd' : '[%s], %s' % (','.join(jsplots), axisopts), + 'cmd': '[%s], %s' % (','.join(jsplots), axisopts), }) #log.debug('plots = %s' % plots) @@ -285,6 +296,7 @@ def generate_plots_for_problem(problem): #----------------------------------------------------------------------------- + def make_psychometrics_data_update_handler(studentmodule): """ Construct and return a procedure which may be called to update @@ -307,13 +319,13 @@ def make_psychometrics_data_update_handler(studentmodule): state = json.loads(sm.state) done = state['done'] except: - log.exception("Oops, failed to eval state for %s (state=%s)" % (sm,sm.state)) + log.exception("Oops, failed to eval state for %s (state=%s)" % (sm, sm.state)) return pmd.done = done pmd.attempts = state['attempts'] try: - checktimes = eval(pmd.checktimes) # update log of attempt timestamps + checktimes = eval(pmd.checktimes) # update log of attempt timestamps except: checktimes = [] checktimes.append(datetime.datetime.now())