From 0bf85992da090b624fb915cb87a1323736b6a67c Mon Sep 17 00:00:00 2001 From: ichuang Date: Sat, 8 Sep 2012 22:32:28 -0400 Subject: [PATCH] psychometrics djangoapp --- lms/djangoapps/psychometrics/__init__.py | 0 lms/djangoapps/psychometrics/admin.py | 8 + .../psychometrics/management/__init__.py | 0 .../management/commands/__init__.py | 0 .../management/commands/init_psychometrics.py | 66 ++++ lms/djangoapps/psychometrics/models.py | 45 +++ lms/djangoapps/psychometrics/psychoanalyze.py | 312 ++++++++++++++++++ 7 files changed, 431 insertions(+) create mode 100644 lms/djangoapps/psychometrics/__init__.py create mode 100644 lms/djangoapps/psychometrics/admin.py create mode 100644 lms/djangoapps/psychometrics/management/__init__.py create mode 100644 lms/djangoapps/psychometrics/management/commands/__init__.py create mode 100644 lms/djangoapps/psychometrics/management/commands/init_psychometrics.py create mode 100644 lms/djangoapps/psychometrics/models.py create mode 100644 lms/djangoapps/psychometrics/psychoanalyze.py diff --git a/lms/djangoapps/psychometrics/__init__.py b/lms/djangoapps/psychometrics/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lms/djangoapps/psychometrics/admin.py b/lms/djangoapps/psychometrics/admin.py new file mode 100644 index 0000000000..ff1a14d722 --- /dev/null +++ b/lms/djangoapps/psychometrics/admin.py @@ -0,0 +1,8 @@ +''' +django admin pages for courseware model +''' + +from psychometrics.models import * +from django.contrib import admin + +admin.site.register(PsychometricData) diff --git a/lms/djangoapps/psychometrics/management/__init__.py b/lms/djangoapps/psychometrics/management/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lms/djangoapps/psychometrics/management/commands/__init__.py b/lms/djangoapps/psychometrics/management/commands/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lms/djangoapps/psychometrics/management/commands/init_psychometrics.py b/lms/djangoapps/psychometrics/management/commands/init_psychometrics.py new file mode 100644 index 0000000000..b7c9779d08 --- /dev/null +++ b/lms/djangoapps/psychometrics/management/commands/init_psychometrics.py @@ -0,0 +1,66 @@ +#!/usr/bin/python +# +# generate pyschometrics data from tracking logs and student module data + +import os, sys, string +import datetime +import json + +from courseware.models import * +from track.models import * +from psychometrics.models import * +from xmodule.modulestore import Location + +from django.core.management.base import BaseCommand + +#db = "ocwtutor" # for debugging +db = "default" + +class Command(BaseCommand): + help = "initialize PsychometricData tables from StudentModule instances (and tracking data, if in SQL)." + help += "Note this is done for all courses for which StudentModule instances exist." + + def handle(self, *args, **options): + + # delete all pmd + + #PsychometricData.objects.all().delete() + #PsychometricData.objects.using(db).all().delete() + + smset = StudentModule.objects.using(db).exclude(max_grade=None) + + for sm in smset: + url = sm.module_state_key + location = Location(url) + if not location.category=="problem": + continue + try: + state = json.loads(sm.state) + done = state['done'] + except: + print "Oops, failed to eval state for %s (state=%s)" % (sm,sm.state) + continue + + if done: # only keep if problem completed + try: + pmd = PsychometricData.objects.using(db).get(studentmodule=sm) + except PsychometricData.DoesNotExist: + pmd = PsychometricData(studentmodule=sm) + + pmd.done = done + pmd.attempts = state['attempts'] + + # get attempt times from tracking log + uname = sm.student.username + tset = TrackingLog.objects.using(db).filter(username=uname, event_type__contains='save_problem_check') + tset = tset.filter(event_source='server') + tset = tset.filter(event__contains="'%s'" % url) + checktimes = [x.dtcreated for x in tset] + pmd.checktimes = json.dumps(checktimes) + if not len(checktimes)==pmd.attempts: + print "Oops, mismatch in number of attempts and check times for %s" % pmd + + #print pmd + pmd.save(using=db) + + print "%d PMD entries" % PsychometricData.objects.using(db).all().count() diff --git a/lms/djangoapps/psychometrics/models.py b/lms/djangoapps/psychometrics/models.py new file mode 100644 index 0000000000..4ffdf59120 --- /dev/null +++ b/lms/djangoapps/psychometrics/models.py @@ -0,0 +1,45 @@ +# +# db model for psychometrics data +# +# this data is collected in real time +# + +from django.db import models +from courseware.models import StudentModule + +class PsychometricData(models.Model): + """ + This data is a table linking student, module, and module performance, + including number of attempts, grade, max grade, and time of checks. + + Links to instances of StudentModule, but only those for capa problems. + + Note that StudentModule.module_state_key is nominally a Location instance (url string). + That means it is of the form {tag}://{org}/{course}/{category}/{name}[@{revision}] + and for capa problems, category = "problem". + + checktimes is extracted from tracking logs, or added by capa module via psychometrics callback. + """ + + studentmodule = models.ForeignKey(StudentModule, db_index=True, unique=True) # contains student, module_state_key, course_id + + done = models.BooleanField(default=False) + attempts = models.IntegerField(default=0) # extracted from studentmodule.state + checktimes = models.TextField(null=True, blank=True) # internally stored as list of datetime objects + + # keep in mind + # grade = studentmodule.grade + # max_grade = studentmodule.max_grade + # student = studentmodule.student + # course_id = studentmodule.course_id + # location = studentmodule.module_state_key + + def __unicode__(self): + sm = self.studentmodule + return "[PsychometricData] %s url=%s, grade=%s, max=%s, attempts=%s, ct=%s" % (sm.student, + sm.module_state_key, + sm.grade, + sm.max_grade, + self.attempts, + self.checktimes) + diff --git a/lms/djangoapps/psychometrics/psychoanalyze.py b/lms/djangoapps/psychometrics/psychoanalyze.py new file mode 100644 index 0000000000..e8dd7b4684 --- /dev/null +++ b/lms/djangoapps/psychometrics/psychoanalyze.py @@ -0,0 +1,312 @@ +# +# File: psychometrics/psychoanalyze.py +# +# generate pyschometrics plots from PsychometricData + +from __future__ import division + +import datetime +import logging +import json +import math +import numpy as np +from scipy.optimize import curve_fit + +from django.db.models import Sum, Max +from psychometrics.models import * +from xmodule.modulestore import Location + +log = logging.getLogger("mitx.psychometrics") + +#db = "ocwtutor" # for debugging +db = "default" + +#----------------------------------------------------------------------------- +# fit functions + +def func_2pl(x,a,b): + """ + 2-parameter logistic function + """ + D = 1.7 + edax = np.exp(D*a*(x-b)) + return edax / (1+edax) + +#----------------------------------------------------------------------------- +# statistics class + +class StatVar(object): + """ + Simple statistics on floating point numbers: avg, sdv, var, min, max + """ + def __init__(self,unit=1): + self.sum = 0 + self.sum2 = 0 + self.cnt = 0 + self.unit = unit + self.min = None + self.max = None + def add(self,x): + if x is None: + return + if self.min is None: + self.min = x + else: + if xself.max: + self.max = x + self.sum += x + self.sum2 += x**2 + self.cnt += 1 + def avg(self): + if self.cnt is None: + return 0 + return self.sum / 1.0 / self.cnt / self.unit + def var(self): + if self.cnt is None: + return 0 + return (self.sum2 / 1.0 / self.cnt / (self.unit**2)) - (self.avg()**2) + def sdv(self): + v = self.var() + if v>0: + return math.sqrt(v) + else: + return 0 + def __str__(self): + return 'cnt=%d, avg=%f, sdv=%f' % (self.cnt,self.avg(),self.sdv()) + def __add__(self,x): + self.add(x) + return self + +#----------------------------------------------------------------------------- +# histogram generator + +def make_histogram(ydata,bins=None): + ''' + Generate histogram of ydata using bins provided, or by default bins + from 0 to 100 by 10. bins should be ordered in increasing order. + + returns dict with keys being bins, and values being counts. + special: hist['bins'] = bins + ''' + if bins is None: + bins = range(0,100,10) + + nbins = len(bins) + hist = dict(zip(bins,[0] * nbins)) + for y in ydata: + for b in bins[::-1]: # in reverse order + if y>b: + hist[b] += 1 + break + # hist['bins'] = bins + return hist + +#----------------------------------------------------------------------------- + +def problems_with_psychometric_data(course_id): + ''' + Return dict of {problems (location urls): count} for which psychometric data is available. + Does this for a given course_id. + ''' + pmdset = PsychometricData.objects.using(db).filter(studentmodule__course_id=course_id) + plist = [p['studentmodule__module_state_key'] for p in pmdset.values('studentmodule__module_state_key').distinct()] + problems = dict( (p,pmdset.filter(studentmodule__module_state_key=p).count()) for p in plist ) + + return problems + +#----------------------------------------------------------------------------- + +def generate_plots_for_problem(problem): + + pmdset = PsychometricData.objects.using(db).filter(studentmodule__module_state_key=problem) + nstudents = pmdset.count() + msg = "" + plots = [] + + if nstudents < 2: + msg += "%s nstudents=%d --> skipping, too few" % (problem,nstudents) + return msg, plots + + max_grade = pmdset[0].studentmodule.max_grade + + agdat = pmdset.aggregate(Sum('attempts'), Max('attempts')) + max_attempts = agdat['attempts__max'] + total_attempts = agdat['attempts__sum'] # not used yet + + msg += "max attempts = %d" % max_attempts + + xdat = range(1,max_attempts+1) + dataset = {'xdat': xdat} + + # generate grade histogram + ghist = [] + + axisopts = """{ + xaxes: [{ + axisLabel: 'Grade' + }], + yaxes: [{ + position: 'left', + axisLabel: 'Count' + }] + }""" + + if max_grade > 1: + ghist = make_histogram([pmd.studentmodule.grade for pmd in pmdset],np.linspace(0,max_grade,max_grade+1)) + ghist_json = json.dumps(ghist.items()) + + plot = {'title': "Grade histogram for %s" % problem, + 'id': 'histogram', + 'info': '', + 'data': "var dhist = %s;\n" % ghist_json, + 'cmd': "[ {data: dhist, bars: { show: true }} ], %s" % axisopts, + } + plots.append(plot) + else: + msg += "
Not generating histogram: max_grade=%s" % max_grade + + # histogram of time differences between checks + # Warning: this is inefficient - doesn't scale to large numbers of students + dtset = [] # time differences in minutes + dtsv = StatVar() + for pmd in pmdset: + try: + checktimes = eval(pmd.checktimes) # update log of attempt timestamps + except: + continue + if len(checktimes)<2: + continue + ct0 = checktimes[0] + for ct in checktimes[1:]: + dt = (ct-ct0).total_seconds()/60.0 + if dt<20: # ignore if dt too long + dtset.append(dt) + dtsv += dt + ct0 = ct + if dtsv.cnt > 2: + msg += "
time differences between checks: %s" % dtsv + bins = np.linspace(0,1.5*dtsv.sdv(),30) + dbar = bins[1]-bins[0] + thist = make_histogram(dtset,bins) + thist_json = json.dumps(sorted(thist.items(), key=lambda(x): x[0])) + + axisopts = """{ xaxes: [{ axisLabel: 'Time (min)'}], yaxes: [{position: 'left',axisLabel: 'Count'}]}""" + + plot = {'title': "Histogram of time differences between checks", + 'id': 'thistogram', + 'info': '', + 'data': "var thist = %s;\n" % thist_json, + 'cmd': "[ {data: thist, bars: { show: true, barWidth:%f }} ], %s" % (dbar, axisopts), + } + plots.append(plot) + + # one IRT plot curve for each grade received (TODO: this assumes integer grades) + for grade in range(1,int(max_grade)+1): + yset = {} + gset = pmdset.filter(studentmodule__grade=grade) + ngset = gset.count() + if ngset==0: + continue + ydat = [] + ylast = 0 + for x in xdat: + y = gset.filter(attempts=x).count()/ngset + ydat.append( y + ylast ) + ylast = y + ylast + yset['ydat'] = ydat + + if len(ydat)>5: # try to fit to logistic function if enough data points + cfp = curve_fit(func_2pl, xdat, ydat, [1.0, max_attempts/2.0]) + yset['fitparam'] = cfp + yset['fitpts'] = func_2pl(np.array(xdat),*cfp[0]) + yset['fiterr'] = [yd-yf for (yd,yf) in zip(ydat,yset['fitpts'])] + fitx = np.linspace(xdat[0],xdat[-1],100) + yset['fitx'] = fitx + yset['fity'] = func_2pl(np.array(fitx),*cfp[0]) + + dataset['grade_%d' % grade] = yset + + axisopts = """{ + xaxes: [{ + axisLabel: 'Number of Attempts' + }], + yaxes: [{ + max:1.0, + position: 'left', + axisLabel: 'Probability of correctness' + }] + }""" + + # generate points for flot plot + for grade in range(1,int(max_grade)+1): + jsdata = "" + jsplots = [] + gkey = 'grade_%d' % grade + if gkey in dataset: + yset = dataset[gkey] + jsdata += "var d%d = %s;\n" % (grade,json.dumps(zip(xdat,yset['ydat']))) + jsplots.append('{ data: d%d, lines: { show: false }, points: { show: true}, color: "red" }' % grade) + if 'fitpts' in yset: + jsdata += 'var fit = %s;\n' % (json.dumps(zip(yset['fitx'],yset['fity']))) + jsplots.append('{ data: fit, lines: { show: true }, color: "blue" }') + (a,b) = yset['fitparam'][0] + irtinfo = "(2PL: D=1.7, a=%6.3f, b=%6.3f)" % (a,b) + else: + irtinfo = "" + + plots.append({'title': 'IRT Plot for grade=%s %s' % (grade,irtinfo), + 'id': "irt%s" % grade, + 'info': '', + 'data': jsdata, + 'cmd' : '[%s], %s' % (','.join(jsplots), axisopts), + }) + + #log.debug('plots = %s' % plots) + return msg, plots + +#----------------------------------------------------------------------------- + +def make_psychometrics_data_update_handler(studentmodule): + """ + Construct and return a procedure which may be called to update + the PsychometricsData instance for the given StudentModule instance. + """ + sm = studentmodule + try: + pmd = PsychometricData.objects.using(db).get(studentmodule=sm) + except PsychometricData.DoesNotExist: + pmd = PsychometricData(studentmodule=sm) + + def psychometrics_data_update_handler(state): + """ + This function may be called each time a problem is successfully checked + (eg on save_problem_check events in capa_module). + + state = instance state (a nice, uniform way to interface - for more future psychometric feature extraction) + """ + try: + state = json.loads(sm.state) + done = state['done'] + except: + log.exception("Oops, failed to eval state for %s (state=%s)" % (sm,sm.state)) + return + + pmd.done = done + pmd.attempts = state['attempts'] + try: + checktimes = eval(pmd.checktimes) # update log of attempt timestamps + except: + checktimes = [] + checktimes.append(datetime.datetime.now()) + pmd.checktimes = checktimes + try: + pmd.save() + except: + log.exception("Error in updating psychometrics data for %s" % sm) + + return psychometrics_data_update_handler