psychometrics djangoapp
This commit is contained in:
0
lms/djangoapps/psychometrics/__init__.py
Normal file
0
lms/djangoapps/psychometrics/__init__.py
Normal file
8
lms/djangoapps/psychometrics/admin.py
Normal file
8
lms/djangoapps/psychometrics/admin.py
Normal file
@@ -0,0 +1,8 @@
|
||||
'''
|
||||
django admin pages for courseware model
|
||||
'''
|
||||
|
||||
from psychometrics.models import *
|
||||
from django.contrib import admin
|
||||
|
||||
admin.site.register(PsychometricData)
|
||||
0
lms/djangoapps/psychometrics/management/__init__.py
Normal file
0
lms/djangoapps/psychometrics/management/__init__.py
Normal file
@@ -0,0 +1,66 @@
|
||||
#!/usr/bin/python
|
||||
#
|
||||
# generate pyschometrics data from tracking logs and student module data
|
||||
|
||||
import os, sys, string
|
||||
import datetime
|
||||
import json
|
||||
|
||||
from courseware.models import *
|
||||
from track.models import *
|
||||
from psychometrics.models import *
|
||||
from xmodule.modulestore import Location
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
#db = "ocwtutor" # for debugging
|
||||
db = "default"
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "initialize PsychometricData tables from StudentModule instances (and tracking data, if in SQL)."
|
||||
help += "Note this is done for all courses for which StudentModule instances exist."
|
||||
|
||||
def handle(self, *args, **options):
|
||||
|
||||
# delete all pmd
|
||||
|
||||
#PsychometricData.objects.all().delete()
|
||||
#PsychometricData.objects.using(db).all().delete()
|
||||
|
||||
smset = StudentModule.objects.using(db).exclude(max_grade=None)
|
||||
|
||||
for sm in smset:
|
||||
url = sm.module_state_key
|
||||
location = Location(url)
|
||||
if not location.category=="problem":
|
||||
continue
|
||||
try:
|
||||
state = json.loads(sm.state)
|
||||
done = state['done']
|
||||
except:
|
||||
print "Oops, failed to eval state for %s (state=%s)" % (sm,sm.state)
|
||||
continue
|
||||
|
||||
if done: # only keep if problem completed
|
||||
try:
|
||||
pmd = PsychometricData.objects.using(db).get(studentmodule=sm)
|
||||
except PsychometricData.DoesNotExist:
|
||||
pmd = PsychometricData(studentmodule=sm)
|
||||
|
||||
pmd.done = done
|
||||
pmd.attempts = state['attempts']
|
||||
|
||||
# get attempt times from tracking log
|
||||
uname = sm.student.username
|
||||
tset = TrackingLog.objects.using(db).filter(username=uname, event_type__contains='save_problem_check')
|
||||
tset = tset.filter(event_source='server')
|
||||
tset = tset.filter(event__contains="'%s'" % url)
|
||||
checktimes = [x.dtcreated for x in tset]
|
||||
pmd.checktimes = json.dumps(checktimes)
|
||||
if not len(checktimes)==pmd.attempts:
|
||||
print "Oops, mismatch in number of attempts and check times for %s" % pmd
|
||||
|
||||
#print pmd
|
||||
pmd.save(using=db)
|
||||
|
||||
print "%d PMD entries" % PsychometricData.objects.using(db).all().count()
|
||||
45
lms/djangoapps/psychometrics/models.py
Normal file
45
lms/djangoapps/psychometrics/models.py
Normal file
@@ -0,0 +1,45 @@
|
||||
#
|
||||
# db model for psychometrics data
|
||||
#
|
||||
# this data is collected in real time
|
||||
#
|
||||
|
||||
from django.db import models
|
||||
from courseware.models import StudentModule
|
||||
|
||||
class PsychometricData(models.Model):
|
||||
"""
|
||||
This data is a table linking student, module, and module performance,
|
||||
including number of attempts, grade, max grade, and time of checks.
|
||||
|
||||
Links to instances of StudentModule, but only those for capa problems.
|
||||
|
||||
Note that StudentModule.module_state_key is nominally a Location instance (url string).
|
||||
That means it is of the form {tag}://{org}/{course}/{category}/{name}[@{revision}]
|
||||
and for capa problems, category = "problem".
|
||||
|
||||
checktimes is extracted from tracking logs, or added by capa module via psychometrics callback.
|
||||
"""
|
||||
|
||||
studentmodule = models.ForeignKey(StudentModule, db_index=True, unique=True) # contains student, module_state_key, course_id
|
||||
|
||||
done = models.BooleanField(default=False)
|
||||
attempts = models.IntegerField(default=0) # extracted from studentmodule.state
|
||||
checktimes = models.TextField(null=True, blank=True) # internally stored as list of datetime objects
|
||||
|
||||
# keep in mind
|
||||
# grade = studentmodule.grade
|
||||
# max_grade = studentmodule.max_grade
|
||||
# student = studentmodule.student
|
||||
# course_id = studentmodule.course_id
|
||||
# location = studentmodule.module_state_key
|
||||
|
||||
def __unicode__(self):
|
||||
sm = self.studentmodule
|
||||
return "[PsychometricData] %s url=%s, grade=%s, max=%s, attempts=%s, ct=%s" % (sm.student,
|
||||
sm.module_state_key,
|
||||
sm.grade,
|
||||
sm.max_grade,
|
||||
self.attempts,
|
||||
self.checktimes)
|
||||
|
||||
312
lms/djangoapps/psychometrics/psychoanalyze.py
Normal file
312
lms/djangoapps/psychometrics/psychoanalyze.py
Normal file
@@ -0,0 +1,312 @@
|
||||
#
|
||||
# File: psychometrics/psychoanalyze.py
|
||||
#
|
||||
# generate pyschometrics plots from PsychometricData
|
||||
|
||||
from __future__ import division
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
import json
|
||||
import math
|
||||
import numpy as np
|
||||
from scipy.optimize import curve_fit
|
||||
|
||||
from django.db.models import Sum, Max
|
||||
from psychometrics.models import *
|
||||
from xmodule.modulestore import Location
|
||||
|
||||
log = logging.getLogger("mitx.psychometrics")
|
||||
|
||||
#db = "ocwtutor" # for debugging
|
||||
db = "default"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# fit functions
|
||||
|
||||
def func_2pl(x,a,b):
|
||||
"""
|
||||
2-parameter logistic function
|
||||
"""
|
||||
D = 1.7
|
||||
edax = np.exp(D*a*(x-b))
|
||||
return edax / (1+edax)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# statistics class
|
||||
|
||||
class StatVar(object):
|
||||
"""
|
||||
Simple statistics on floating point numbers: avg, sdv, var, min, max
|
||||
"""
|
||||
def __init__(self,unit=1):
|
||||
self.sum = 0
|
||||
self.sum2 = 0
|
||||
self.cnt = 0
|
||||
self.unit = unit
|
||||
self.min = None
|
||||
self.max = None
|
||||
def add(self,x):
|
||||
if x is None:
|
||||
return
|
||||
if self.min is None:
|
||||
self.min = x
|
||||
else:
|
||||
if x<self.min:
|
||||
self.min = x
|
||||
if self.max is None:
|
||||
self.max = x
|
||||
else:
|
||||
if x>self.max:
|
||||
self.max = x
|
||||
self.sum += x
|
||||
self.sum2 += x**2
|
||||
self.cnt += 1
|
||||
def avg(self):
|
||||
if self.cnt is None:
|
||||
return 0
|
||||
return self.sum / 1.0 / self.cnt / self.unit
|
||||
def var(self):
|
||||
if self.cnt is None:
|
||||
return 0
|
||||
return (self.sum2 / 1.0 / self.cnt / (self.unit**2)) - (self.avg()**2)
|
||||
def sdv(self):
|
||||
v = self.var()
|
||||
if v>0:
|
||||
return math.sqrt(v)
|
||||
else:
|
||||
return 0
|
||||
def __str__(self):
|
||||
return 'cnt=%d, avg=%f, sdv=%f' % (self.cnt,self.avg(),self.sdv())
|
||||
def __add__(self,x):
|
||||
self.add(x)
|
||||
return self
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# histogram generator
|
||||
|
||||
def make_histogram(ydata,bins=None):
|
||||
'''
|
||||
Generate histogram of ydata using bins provided, or by default bins
|
||||
from 0 to 100 by 10. bins should be ordered in increasing order.
|
||||
|
||||
returns dict with keys being bins, and values being counts.
|
||||
special: hist['bins'] = bins
|
||||
'''
|
||||
if bins is None:
|
||||
bins = range(0,100,10)
|
||||
|
||||
nbins = len(bins)
|
||||
hist = dict(zip(bins,[0] * nbins))
|
||||
for y in ydata:
|
||||
for b in bins[::-1]: # in reverse order
|
||||
if y>b:
|
||||
hist[b] += 1
|
||||
break
|
||||
# hist['bins'] = bins
|
||||
return hist
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
def problems_with_psychometric_data(course_id):
|
||||
'''
|
||||
Return dict of {problems (location urls): count} for which psychometric data is available.
|
||||
Does this for a given course_id.
|
||||
'''
|
||||
pmdset = PsychometricData.objects.using(db).filter(studentmodule__course_id=course_id)
|
||||
plist = [p['studentmodule__module_state_key'] for p in pmdset.values('studentmodule__module_state_key').distinct()]
|
||||
problems = dict( (p,pmdset.filter(studentmodule__module_state_key=p).count()) for p in plist )
|
||||
|
||||
return problems
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
def generate_plots_for_problem(problem):
|
||||
|
||||
pmdset = PsychometricData.objects.using(db).filter(studentmodule__module_state_key=problem)
|
||||
nstudents = pmdset.count()
|
||||
msg = ""
|
||||
plots = []
|
||||
|
||||
if nstudents < 2:
|
||||
msg += "%s nstudents=%d --> skipping, too few" % (problem,nstudents)
|
||||
return msg, plots
|
||||
|
||||
max_grade = pmdset[0].studentmodule.max_grade
|
||||
|
||||
agdat = pmdset.aggregate(Sum('attempts'), Max('attempts'))
|
||||
max_attempts = agdat['attempts__max']
|
||||
total_attempts = agdat['attempts__sum'] # not used yet
|
||||
|
||||
msg += "max attempts = %d" % max_attempts
|
||||
|
||||
xdat = range(1,max_attempts+1)
|
||||
dataset = {'xdat': xdat}
|
||||
|
||||
# generate grade histogram
|
||||
ghist = []
|
||||
|
||||
axisopts = """{
|
||||
xaxes: [{
|
||||
axisLabel: 'Grade'
|
||||
}],
|
||||
yaxes: [{
|
||||
position: 'left',
|
||||
axisLabel: 'Count'
|
||||
}]
|
||||
}"""
|
||||
|
||||
if max_grade > 1:
|
||||
ghist = make_histogram([pmd.studentmodule.grade for pmd in pmdset],np.linspace(0,max_grade,max_grade+1))
|
||||
ghist_json = json.dumps(ghist.items())
|
||||
|
||||
plot = {'title': "Grade histogram for %s" % problem,
|
||||
'id': 'histogram',
|
||||
'info': '',
|
||||
'data': "var dhist = %s;\n" % ghist_json,
|
||||
'cmd': "[ {data: dhist, bars: { show: true }} ], %s" % axisopts,
|
||||
}
|
||||
plots.append(plot)
|
||||
else:
|
||||
msg += "<br/>Not generating histogram: max_grade=%s" % max_grade
|
||||
|
||||
# histogram of time differences between checks
|
||||
# Warning: this is inefficient - doesn't scale to large numbers of students
|
||||
dtset = [] # time differences in minutes
|
||||
dtsv = StatVar()
|
||||
for pmd in pmdset:
|
||||
try:
|
||||
checktimes = eval(pmd.checktimes) # update log of attempt timestamps
|
||||
except:
|
||||
continue
|
||||
if len(checktimes)<2:
|
||||
continue
|
||||
ct0 = checktimes[0]
|
||||
for ct in checktimes[1:]:
|
||||
dt = (ct-ct0).total_seconds()/60.0
|
||||
if dt<20: # ignore if dt too long
|
||||
dtset.append(dt)
|
||||
dtsv += dt
|
||||
ct0 = ct
|
||||
if dtsv.cnt > 2:
|
||||
msg += "<br/>time differences between checks: %s" % dtsv
|
||||
bins = np.linspace(0,1.5*dtsv.sdv(),30)
|
||||
dbar = bins[1]-bins[0]
|
||||
thist = make_histogram(dtset,bins)
|
||||
thist_json = json.dumps(sorted(thist.items(), key=lambda(x): x[0]))
|
||||
|
||||
axisopts = """{ xaxes: [{ axisLabel: 'Time (min)'}], yaxes: [{position: 'left',axisLabel: 'Count'}]}"""
|
||||
|
||||
plot = {'title': "Histogram of time differences between checks",
|
||||
'id': 'thistogram',
|
||||
'info': '',
|
||||
'data': "var thist = %s;\n" % thist_json,
|
||||
'cmd': "[ {data: thist, bars: { show: true, barWidth:%f }} ], %s" % (dbar, axisopts),
|
||||
}
|
||||
plots.append(plot)
|
||||
|
||||
# one IRT plot curve for each grade received (TODO: this assumes integer grades)
|
||||
for grade in range(1,int(max_grade)+1):
|
||||
yset = {}
|
||||
gset = pmdset.filter(studentmodule__grade=grade)
|
||||
ngset = gset.count()
|
||||
if ngset==0:
|
||||
continue
|
||||
ydat = []
|
||||
ylast = 0
|
||||
for x in xdat:
|
||||
y = gset.filter(attempts=x).count()/ngset
|
||||
ydat.append( y + ylast )
|
||||
ylast = y + ylast
|
||||
yset['ydat'] = ydat
|
||||
|
||||
if len(ydat)>5: # try to fit to logistic function if enough data points
|
||||
cfp = curve_fit(func_2pl, xdat, ydat, [1.0, max_attempts/2.0])
|
||||
yset['fitparam'] = cfp
|
||||
yset['fitpts'] = func_2pl(np.array(xdat),*cfp[0])
|
||||
yset['fiterr'] = [yd-yf for (yd,yf) in zip(ydat,yset['fitpts'])]
|
||||
fitx = np.linspace(xdat[0],xdat[-1],100)
|
||||
yset['fitx'] = fitx
|
||||
yset['fity'] = func_2pl(np.array(fitx),*cfp[0])
|
||||
|
||||
dataset['grade_%d' % grade] = yset
|
||||
|
||||
axisopts = """{
|
||||
xaxes: [{
|
||||
axisLabel: 'Number of Attempts'
|
||||
}],
|
||||
yaxes: [{
|
||||
max:1.0,
|
||||
position: 'left',
|
||||
axisLabel: 'Probability of correctness'
|
||||
}]
|
||||
}"""
|
||||
|
||||
# generate points for flot plot
|
||||
for grade in range(1,int(max_grade)+1):
|
||||
jsdata = ""
|
||||
jsplots = []
|
||||
gkey = 'grade_%d' % grade
|
||||
if gkey in dataset:
|
||||
yset = dataset[gkey]
|
||||
jsdata += "var d%d = %s;\n" % (grade,json.dumps(zip(xdat,yset['ydat'])))
|
||||
jsplots.append('{ data: d%d, lines: { show: false }, points: { show: true}, color: "red" }' % grade)
|
||||
if 'fitpts' in yset:
|
||||
jsdata += 'var fit = %s;\n' % (json.dumps(zip(yset['fitx'],yset['fity'])))
|
||||
jsplots.append('{ data: fit, lines: { show: true }, color: "blue" }')
|
||||
(a,b) = yset['fitparam'][0]
|
||||
irtinfo = "(2PL: D=1.7, a=%6.3f, b=%6.3f)" % (a,b)
|
||||
else:
|
||||
irtinfo = ""
|
||||
|
||||
plots.append({'title': 'IRT Plot for grade=%s %s' % (grade,irtinfo),
|
||||
'id': "irt%s" % grade,
|
||||
'info': '',
|
||||
'data': jsdata,
|
||||
'cmd' : '[%s], %s' % (','.join(jsplots), axisopts),
|
||||
})
|
||||
|
||||
#log.debug('plots = %s' % plots)
|
||||
return msg, plots
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
def make_psychometrics_data_update_handler(studentmodule):
|
||||
"""
|
||||
Construct and return a procedure which may be called to update
|
||||
the PsychometricsData instance for the given StudentModule instance.
|
||||
"""
|
||||
sm = studentmodule
|
||||
try:
|
||||
pmd = PsychometricData.objects.using(db).get(studentmodule=sm)
|
||||
except PsychometricData.DoesNotExist:
|
||||
pmd = PsychometricData(studentmodule=sm)
|
||||
|
||||
def psychometrics_data_update_handler(state):
|
||||
"""
|
||||
This function may be called each time a problem is successfully checked
|
||||
(eg on save_problem_check events in capa_module).
|
||||
|
||||
state = instance state (a nice, uniform way to interface - for more future psychometric feature extraction)
|
||||
"""
|
||||
try:
|
||||
state = json.loads(sm.state)
|
||||
done = state['done']
|
||||
except:
|
||||
log.exception("Oops, failed to eval state for %s (state=%s)" % (sm,sm.state))
|
||||
return
|
||||
|
||||
pmd.done = done
|
||||
pmd.attempts = state['attempts']
|
||||
try:
|
||||
checktimes = eval(pmd.checktimes) # update log of attempt timestamps
|
||||
except:
|
||||
checktimes = []
|
||||
checktimes.append(datetime.datetime.now())
|
||||
pmd.checktimes = checktimes
|
||||
try:
|
||||
pmd.save()
|
||||
except:
|
||||
log.exception("Error in updating psychometrics data for %s" % sm)
|
||||
|
||||
return psychometrics_data_update_handler
|
||||
Reference in New Issue
Block a user