diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 84962963bd..0ee8e6175a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,8 +5,11 @@ These are notable changes in edx-platform. This is a rolling list of changes, in roughly chronological order, most recent first. Add your entries at or near the top. Include a label indicating the component affected. +Common: Added configurable backends for tracking events. Tracking events using +the python logging module is the default backend. Support for MongoDB and a +Django database is also available. -Blades: Added Learning Tools Interoperability (LTI) blade. Now LTI components +Blades: Added Learning Tools Interoperability (LTI) blade. Now LTI components can be included to courses. LMS: Added alphabetical sorting of forum categories and subcategories. @@ -287,4 +290,3 @@ Common: Updated CodeJail. Common: Allow setting of authentication session cookie name. LMS: Option to email students when enroll/un-enroll them. - diff --git a/cms/envs/aws.py b/cms/envs/aws.py index c2ba51a5f8..1e35c70a0a 100644 --- a/cms/envs/aws.py +++ b/cms/envs/aws.py @@ -161,3 +161,6 @@ BROKER_URL = "{0}://{1}:{2}@{3}/{4}".format(CELERY_BROKER_TRANSPORT, CELERY_BROKER_PASSWORD, CELERY_BROKER_HOSTNAME, CELERY_BROKER_VHOST) + +# Event tracking +TRACKING_BACKENDS.update(AUTH_TOKENS.get("TRACKING_BACKENDS", {})) diff --git a/cms/envs/common.py b/cms/envs/common.py index 9b9f58a7b5..4e8c1c1812 100644 --- a/cms/envs/common.py +++ b/cms/envs/common.py @@ -218,9 +218,6 @@ USE_L10N = True # Localization strings (e.g. django.po) are under this directory LOCALE_PATHS = (REPO_ROOT + '/conf/locale',) # mitx/conf/locale/ -# Tracking -TRACK_MAX_EVENT = 10000 - # Messages MESSAGE_STORAGE = 'django.contrib.messages.storage.session.SessionStorage' @@ -391,3 +388,16 @@ MKTG_URL_LINK_MAP = { } COURSES_WITH_UNSAFE_CODE = [] + +############################## EVENT TRACKING ################################# + +TRACK_MAX_EVENT = 10000 + +TRACKING_BACKENDS = { + 'logger': { + 'ENGINE': 'track.backends.logger.LoggerBackend', + 'OPTIONS': { + 'name': 'tracking' + } + } +} diff --git a/common/djangoapps/track/backends/__init__.py b/common/djangoapps/track/backends/__init__.py new file mode 100644 index 0000000000..999a5c086c --- /dev/null +++ b/common/djangoapps/track/backends/__init__.py @@ -0,0 +1,28 @@ +""" +Event tracking backend module. + +Contains the base class for event trackers, and implementation of some +backends. + +""" + +from __future__ import absolute_import + +import abc + + +# pylint: disable=unused-argument +class BaseBackend(object): + """ + Abstract Base Class for event tracking backends. + + """ + __metaclass__ = abc.ABCMeta + + def __init__(self, **kwargs): + pass + + @abc.abstractmethod + def send(self, event): + """Send event to tracker.""" + pass diff --git a/common/djangoapps/track/backends/django.py b/common/djangoapps/track/backends/django.py new file mode 100644 index 0000000000..24669dfac2 --- /dev/null +++ b/common/djangoapps/track/backends/django.py @@ -0,0 +1,86 @@ +""" +Event tracker backend that saves events to a Django database. + +""" + +# TODO: this module is very specific to the event schema, and is only +# brought here for legacy support. It should be updated when the +# schema changes or eventually deprecated. + +from __future__ import absolute_import + +import logging + +import dateutil + +from django.db import models + +from track.backends import BaseBackend + + +log = logging.getLogger('track.backends.django') + + +LOGFIELDS = [ + 'username', + 'ip', + 'event_source', + 'event_type', + 'event', + 'agent', + 'page', + 'time', + 'host' +] + + +class TrackingLog(models.Model): + """Defines the fields that are stored in the tracking log database.""" + + dtcreated = models.DateTimeField('creation date', auto_now_add=True) + username = models.CharField(max_length=32, blank=True) + ip = models.CharField(max_length=32, blank=True) + event_source = models.CharField(max_length=32) + event_type = models.CharField(max_length=512, blank=True) + event = models.TextField(blank=True) + agent = models.CharField(max_length=256, blank=True) + page = models.CharField(max_length=512, blank=True, null=True) + time = models.DateTimeField('event time') + host = models.CharField(max_length=64, blank=True) + + class Meta: + app_label = 'track' + db_table = 'track_trackinglog' + + def __unicode__(self): + fmt = ( + u"[{self.time}] {self.username}@{self.ip}: " + u"{self.event_source}| {self.event_type} | " + u"{self.page} | {self.event}" + ) + return fmt.format(self=self) + + +class DjangoBackend(BaseBackend): + """Event tracker backend that saves to a Django database""" + def __init__(self, name='default', **options): + """ + Configure database used by the backend. + + :Parameters: + + - `name` is the name of the database as specified in the project + settings. + + """ + super(DjangoBackend, self).__init__(**options) + self.name = name + + def send(self, event): + event['time'] = dateutil.parser.parse(event['time']) + field_values = {x: event.get(x, '') for x in LOGFIELDS} + tldat = TrackingLog(**field_values) + try: + tldat.save(using=self.name) + except Exception as e: # pylint: disable=broad-except + log.exception(e) diff --git a/common/djangoapps/track/backends/logger.py b/common/djangoapps/track/backends/logger.py new file mode 100644 index 0000000000..56261b1567 --- /dev/null +++ b/common/djangoapps/track/backends/logger.py @@ -0,0 +1,43 @@ +"""Event tracker backend that saves events to a python logger.""" + +from __future__ import absolute_import + +import logging +import json + +from django.conf import settings + +from track.backends import BaseBackend + + +log = logging.getLogger('track.backends.logger') + + +class LoggerBackend(BaseBackend): + """Event tracker backend that uses a python logger. + + Events are logged to the INFO level as JSON strings. + + """ + + def __init__(self, name, **kwargs): + """Event tracker backend that uses a python logger. + + :Parameters: + - `name`: identifier of the logger, which should have + been configured using the default python mechanisms. + + """ + super(LoggerBackend, self).__init__(**kwargs) + + self.event_logger = logging.getLogger(name) + + def send(self, event): + event_str = json.dumps(event) + + # TODO: remove trucation of the serialized event, either at a + # higher level during the emittion of the event, or by + # providing warnings when the events exceed certain size. + event_str = event_str[:settings.TRACK_MAX_EVENT] + + self.event_logger.info(event_str) diff --git a/common/djangoapps/track/backends/mongodb.py b/common/djangoapps/track/backends/mongodb.py new file mode 100644 index 0000000000..79d0505edd --- /dev/null +++ b/common/djangoapps/track/backends/mongodb.py @@ -0,0 +1,90 @@ +"""MongoDB event tracker backend.""" + +from __future__ import absolute_import + +import logging + +import pymongo +from pymongo import MongoClient +from pymongo.errors import PyMongoError + +from track.backends import BaseBackend + + +log = logging.getLogger('track.backends.mongodb') + + +class MongoBackend(BaseBackend): + """Class for a MongoDB event tracker Backend""" + + def __init__(self, **kwargs): + """ + Connect to a MongoDB. + + :Parameters: + + - `host`: hostname + - `port`: port + - `user`: collection username + - `password`: collection user password + - `database`: name of the database + - `collection`: name of the collection + - `extra`: parameters to pymongo.MongoClient not listed above + + """ + + super(MongoBackend, self).__init__(**kwargs) + + # Extract connection parameters from kwargs + + host = kwargs.get('host', 'localhost') + port = kwargs.get('port', 27017) + + user = kwargs.get('user', '') + password = kwargs.get('password', '') + + db_name = kwargs.get('database', 'track') + collection_name = kwargs.get('collection', 'events') + + # Other mongo connection arguments + extra = kwargs.get('extra', {}) + + # By default disable write acknowledgments, reducing the time + # blocking during an insert + extra['w'] = extra.get('w', 0) + + # Make timezone aware by default + extra['tz_aware'] = extra.get('tz_aware', True) + + # Connect to database and get collection + + self.connection = MongoClient( + host=host, + port=port, + **extra + ) + + self.collection = self.connection[db_name][collection_name] + + if user or password: + self.collection.database.authenticate(user, password) + + self._create_indexes() + + def _create_indexes(self): + # WARNING: The collection will be locked during the index + # creation. If the collection has a large number of + # documents in it, the operation can take a long time. + + # TODO: The creation of indexes can be moved to a Django + # management command or equivalent. There is also an option to + # run the indexing on the background, without locking. + self.collection.ensure_index([('time', pymongo.DESCENDING)]) + self.collection.ensure_index('event_type') + + def send(self, event): + try: + self.collection.insert(event, manipulate=False) + except PyMongoError: + msg = 'Error inserting to MongoDB event tracker backend' + log.exception(msg) diff --git a/common/djangoapps/track/backends/tests/test_django.py b/common/djangoapps/track/backends/tests/test_django.py new file mode 100644 index 0000000000..6de202235f --- /dev/null +++ b/common/djangoapps/track/backends/tests/test_django.py @@ -0,0 +1,25 @@ +from __future__ import absolute_import + +from django.test import TestCase + +from track.backends.django import DjangoBackend, TrackingLog + + +class TestDjangoBackend(TestCase): + def setUp(self): + self.backend = DjangoBackend() + + def test_django_backend(self): + event = { + 'username': 'test', + 'time': '2013-01-01T12:01:00-05:00' + } + self.backend.send(event) + + results = list(TrackingLog.objects.all()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0].username, 'test') + + # Check if time is stored in UTC + self.assertEqual(str(results[0].time), '2013-01-01 17:01:00+00:00') diff --git a/common/djangoapps/track/backends/tests/test_logger.py b/common/djangoapps/track/backends/tests/test_logger.py new file mode 100644 index 0000000000..2091d81ba3 --- /dev/null +++ b/common/djangoapps/track/backends/tests/test_logger.py @@ -0,0 +1,66 @@ +from __future__ import absolute_import + +import json +import logging + +from django.test import TestCase + +from track.backends.logger import LoggerBackend + + +class TestLoggerBackend(TestCase): + def setUp(self): + + self.handler = MockLoggingHandler() + self.handler.setLevel(logging.INFO) + + logger_name = 'track.backends.logger.test' + logger = logging.getLogger(logger_name) + logger.addHandler(self.handler) + + self.backend = LoggerBackend(name=logger_name) + + def test_logger_backend(self): + self.handler.reset() + + # Send a couple of events and check if they were recorded + # by the logger. The events are serialized to JSON. + + event = {'test': True} + event_as_json = json.dumps(event) + + self.backend.send(event) + self.backend.send(event) + + self.assertEqual( + self.handler.messages['info'], + [event_as_json, event_as_json] + ) + + +class MockLoggingHandler(logging.Handler): + """ + Mock logging handler. + + Stores records in a dictionry of lists by level. + + """ + + def __init__(self, *args, **kwargs): + super(MockLoggingHandler, self).__init__(*args, **kwargs) + self.messages = None + self.reset() + + def emit(self, record): + level = record.levelname.lower() + message = record.getMessage() + self.messages[level].append(message) + + def reset(self): + self.messages = { + 'debug': [], + 'info': [], + 'warning': [], + 'error': [], + 'critical': [], + } diff --git a/common/djangoapps/track/backends/tests/test_mongodb.py b/common/djangoapps/track/backends/tests/test_mongodb.py new file mode 100644 index 0000000000..2ba18a22a7 --- /dev/null +++ b/common/djangoapps/track/backends/tests/test_mongodb.py @@ -0,0 +1,43 @@ +from __future__ import absolute_import + +from uuid import uuid4 + +import pymongo + +from django.test import TestCase + +from track.backends.mongodb import MongoBackend + + +class TestMongoBackend(TestCase): + def setUp(self): + # Use a random database name to prevent problems with tests running + # simultenousely against the same mongo instance + database = '_track_backends_mongodb_{0}'.format(uuid4().hex) + collection = '_test' + + self.connection = pymongo.MongoClient() + self.database = self.connection[database] + self.collection = self.database[collection] + + # During tests, wait until mongo acknowledged the write + write_concern = 1 + + self.backend = MongoBackend( + database=database, + collection=collection, + w=write_concern + ) + + def test_mongo_backend(self): + self.backend.send({'test': 1}) + self.backend.send({'test': 2}) + + # Get all the objects in the db ignoring _id + results = list(self.collection.find({}, {'_id': False})) + + self.assertEqual(len(results), 2) + self.assertEqual(results, [{'test': 1}, {'test': 2}]) + + def tearDown(self): + self.connection.drop_database(self.database) diff --git a/common/djangoapps/track/models.py b/common/djangoapps/track/models.py index 1ac7656244..228819b059 100644 --- a/common/djangoapps/track/models.py +++ b/common/djangoapps/track/models.py @@ -1,23 +1 @@ -from django.db import models - - -class TrackingLog(models.Model): - """Defines the fields that are stored in the tracking log database""" - dtcreated = models.DateTimeField('creation date', auto_now_add=True) - username = models.CharField(max_length=32, blank=True) - ip = models.CharField(max_length=32, blank=True) - event_source = models.CharField(max_length=32) - event_type = models.CharField(max_length=512, blank=True) - event = models.TextField(blank=True) - agent = models.CharField(max_length=256, blank=True) - page = models.CharField(max_length=512, blank=True, null=True) - time = models.DateTimeField('event time') - host = models.CharField(max_length=64, blank=True) - - def __unicode__(self): - fmt = ( - u"[{self.time}] {self.username}@{self.ip}: " - u"{self.event_source}| {self.event_type} | " - u"{self.page} | {self.event}" - ) - return fmt.format(self=self) +from track.backends.django import TrackingLog diff --git a/common/djangoapps/track/tests/__init__.py b/common/djangoapps/track/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/common/djangoapps/track/tests/test_tracker.py b/common/djangoapps/track/tests/test_tracker.py new file mode 100644 index 0000000000..f8b11cb59e --- /dev/null +++ b/common/djangoapps/track/tests/test_tracker.py @@ -0,0 +1,117 @@ +from django.conf import settings +from django.test import TestCase +from django.test.utils import override_settings + +import track.tracker as tracker +from track.backends import BaseBackend + + +SIMPLE_SETTINGS = { + 'default': { + 'ENGINE': 'track.tests.test_tracker.DummyBackend', + 'OPTIONS': { + 'flag': True + } + } +} + +MULTI_SETTINGS = { + 'first': { + 'ENGINE': 'track.tests.test_tracker.DummyBackend', + }, + 'second': { + 'ENGINE': 'track.tests.test_tracker.DummyBackend', + } +} + + +class TestTrackerInstantiation(TestCase): + """Test that a helper function can instantiate backends from their name.""" + def setUp(self): + # pylint: disable=protected-access + self.get_backend = tracker._instantiate_backend_from_name + + def test_instatiate_backend(self): + name = 'track.tests.test_tracker.DummyBackend' + options = {'flag': True} + backend = self.get_backend(name, options) + + self.assertIsInstance(backend, DummyBackend) + self.assertTrue(backend.flag) + + def test_instatiate_backends_with_invalid_values(self): + def get_invalid_backend(name, parameters): + return self.get_backend(name, parameters) + + options = {} + name = 'track.backends.logger' + self.assertRaises(ValueError, get_invalid_backend, name, options) + + name = 'track.backends.logger.Foo' + self.assertRaises(ValueError, get_invalid_backend, name, options) + + name = 'this.package.does.not.exists' + self.assertRaises(ValueError, get_invalid_backend, name, options) + + name = 'unittest.TestCase' + self.assertRaises(ValueError, get_invalid_backend, name, options) + + +class TestTrackerDjangoInstantiation(TestCase): + """Test if backends are initialized properly from Django settings.""" + + @override_settings(TRACKING_BACKENDS=SIMPLE_SETTINGS) + def test_django_simple_settings(self): + """Test configuration of a simple backend""" + + backends = self._reload_backends() + + self.assertEqual(len(backends), 1) + + tracker.send({}) + + self.assertEqual(backends.values()[0].count, 1) + + @override_settings(TRACKING_BACKENDS=MULTI_SETTINGS) + def test_django_multi_settings(self): + """Test if multiple backends can be configured properly.""" + + backends = self._reload_backends().values() + + self.assertEqual(len(backends), 2) + + event_count = 10 + for _ in xrange(event_count): + tracker.send({}) + + self.assertEqual(backends[0].count, event_count) + self.assertEqual(backends[1].count, event_count) + + @override_settings(TRACKING_BACKENDS=MULTI_SETTINGS) + def test_django_remove_settings(self): + """Test if a backend can be remove by setting it to None.""" + + settings.TRACKING_BACKENDS.update({'second': None}) + + backends = self._reload_backends() + + self.assertEqual(len(backends), 1) + + def _reload_backends(self): + # pylint: disable=protected-access + + # Reset backends + tracker._initialize_backends_from_django_settings() + + return tracker.backends + + +class DummyBackend(BaseBackend): + def __init__(self, **options): + super(DummyBackend, self).__init__(**options) + self.flag = options.get('flag', False) + self.count = 0 + + # pylint: disable=unused-argument + def send(self, event): + self.count += 1 diff --git a/common/djangoapps/track/tracker.py b/common/djangoapps/track/tracker.py new file mode 100644 index 0000000000..e905f118c3 --- /dev/null +++ b/common/djangoapps/track/tracker.py @@ -0,0 +1,94 @@ +""" +Module that tracks analytics events by sending them to different +configurable backends. + +The backends can be configured using Django settings as the example +below:: + + TRACKING_BACKENDS = { + 'tracker_name': { + 'ENGINE': 'class.name.for.backend', + 'OPTIONS': { + 'host': ... , + 'port': ... , + ... + } + } + } + +""" + +import inspect +from importlib import import_module + +from django.conf import settings + +from track.backends import BaseBackend + + +__all__ = ['send'] + + +backends = {} + + +def _initialize_backends_from_django_settings(): + """ + Initialize the event tracking backends according to the + configuration in django settings + + """ + backends.clear() + + config = getattr(settings, 'TRACKING_BACKENDS', {}) + + for name, values in config.iteritems(): + # Ignore empty values to turn-off default tracker backends + if values: + engine = values['ENGINE'] + options = values.get('OPTIONS', {}) + backends[name] = _instantiate_backend_from_name(engine, options) + + +def _instantiate_backend_from_name(name, options): + """ + Instantiate an event tracker backend from the full module path to + the backend class. Useful when setting backends from configuration + files. + + """ + # Parse backend name + + try: + parts = name.split('.') + module_name = '.'.join(parts[:-1]) + class_name = parts[-1] + except IndexError: + raise ValueError('Invalid event track backend %s' % name) + + # Get and verify the backend class + + try: + module = import_module(module_name) + cls = getattr(module, class_name) + if not inspect.isclass(cls) or not issubclass(cls, BaseBackend): + raise TypeError + except (ValueError, AttributeError, TypeError, ImportError): + raise ValueError('Cannot find event track backend %s' % name) + + backend = cls(**options) + + return backend + + +def send(event): + """ + Send an event object to all the initialized backends. + + """ + for backend in backends.itervalues(): + backend.send(event) + + + +_initialize_backends_from_django_settings() diff --git a/common/djangoapps/track/views.py b/common/djangoapps/track/views.py index b65f9fa043..f979b65336 100644 --- a/common/djangoapps/track/views.py +++ b/common/djangoapps/track/views.py @@ -1,35 +1,27 @@ import json import logging -import pytz import datetime + import dateutil.parser +import pytz +from pytz import UTC from django.contrib.auth.decorators import login_required from django.http import HttpResponse from django.shortcuts import redirect from django.conf import settings -from mitxmako.shortcuts import render_to_response from django_future.csrf import ensure_csrf_cookie + +from mitxmako.shortcuts import render_to_response + +from track import tracker from track.models import TrackingLog -from pytz import UTC - -log = logging.getLogger("tracking") - -LOGFIELDS = ['username', 'ip', 'event_source', 'event_type', 'event', 'agent', 'page', 'time', 'host'] def log_event(event): - """Write tracking event to log file, and optionally to TrackingLog model.""" - event_str = json.dumps(event) - log.info(event_str[:settings.TRACK_MAX_EVENT]) - if settings.MITX_FEATURES.get('ENABLE_SQL_TRACKING_LOGS'): - event['time'] = dateutil.parser.parse(event['time']) - tldat = TrackingLog(**dict((x, event[x]) for x in LOGFIELDS)) - try: - tldat.save() - except Exception as err: - log.exception(err) + """Capture a event by sending it to the register trackers""" + tracker.send(event) def user_track(request): @@ -69,6 +61,7 @@ def user_track(request): } log_event(event) + return HttpResponse('success') @@ -96,8 +89,9 @@ def server_track(request, event_type, event, page=None): "host": request.META['SERVER_NAME'], } - if event_type.startswith("/event_logs") and request.user.is_staff: # don't log - return + if event_type.startswith("/event_logs") and request.user.is_staff: + return # don't log + log_event(event) diff --git a/lms/envs/aws.py b/lms/envs/aws.py index 35275036e5..57ac35aa99 100644 --- a/lms/envs/aws.py +++ b/lms/envs/aws.py @@ -254,3 +254,6 @@ BROKER_URL = "{0}://{1}:{2}@{3}/{4}".format(CELERY_BROKER_TRANSPORT, CELERY_BROKER_PASSWORD, CELERY_BROKER_HOSTNAME, CELERY_BROKER_VHOST) + +# Event tracking +TRACKING_BACKENDS.update(AUTH_TOKENS.get("TRACKING_BACKENDS", {})) diff --git a/lms/envs/common.py b/lms/envs/common.py index 739cb92c1e..7ae3d5ebb9 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -277,10 +277,6 @@ RSS_TIMEOUT = 600 STATIC_GRAB = False DEV_CONTENT = True -# FIXME: Should we be doing this truncation? -TRACK_MAX_EVENT = 10000 -DEBUG_TRACK_LOG = False - MITX_ROOT_URL = '' LOGIN_REDIRECT_URL = MITX_ROOT_URL + '/accounts/login' @@ -312,6 +308,32 @@ COURSE_SETTINGS = { # TODO (vshnayder): Will probably need to change as we get real access control in. LMS_MIGRATION_ALLOWED_IPS = [] + +############################## EVENT TRACKING ################################# + +# FIXME: Should we be doing this truncation? +TRACK_MAX_EVENT = 10000 + +DEBUG_TRACK_LOG = False + +TRACKING_BACKENDS = { + 'logger': { + 'ENGINE': 'track.backends.logger.LoggerBackend', + 'OPTIONS': { + 'name': 'tracking' + } + } +} + +# Backawrds compatibility with ENABLE_SQL_TRACKING_LOGS feature flag. +# In the future, adding the backend to TRACKING_BACKENDS enough. +if MITX_FEATURES.get('ENABLE_SQL_TRACKING_LOGS'): + TRACKING_BACKENDS.update({ + 'sql': { + 'ENGINE': 'track.backends.django.DjangoBackend' + } + }) + ######################## subdomain specific settings ########################### COURSE_LISTINGS = {} SUBDOMAIN_BRANDING = {}