diff --git a/common/djangoapps/track/shim.py b/common/djangoapps/track/shim.py index 4318f19069..dd0b885a7c 100644 --- a/common/djangoapps/track/shim.py +++ b/common/djangoapps/track/shim.py @@ -1,5 +1,14 @@ """Map new event context values to old top-level field values. Ensures events can be parsed by legacy parsers.""" +import json +import logging + +from opaque_keys import InvalidKeyError +from opaque_keys.edx.keys import UsageKey + + +log = logging.getLogger(__name__) + CONTEXT_FIELDS_TO_INCLUDE = [ 'username', 'session', @@ -13,33 +22,39 @@ class LegacyFieldMappingProcessor(object): """Ensures all required fields are included in emitted events""" def __call__(self, event): + context = event.get('context', {}) if 'context' in event: - context = event['context'] for field in CONTEXT_FIELDS_TO_INCLUDE: - if field in context: - event[field] = context[field] - else: - event[field] = '' + self.move_from_context(field, event) remove_shim_context(event) - if 'event_type' in event.get('context', {}): - event['event_type'] = event['context']['event_type'] - del event['context']['event_type'] - else: - event['event_type'] = event.get('name', '') - if 'data' in event: event['event'] = event['data'] del event['data'] else: event['event'] = {} - if 'timestamp' in event: + if 'timestamp' in context: + event['time'] = context['timestamp'] + del context['timestamp'] + elif 'timestamp' in event: event['time'] = event['timestamp'] + + if 'timestamp' in event: del event['timestamp'] - event['event_source'] = 'server' - event['page'] = None + self.move_from_context('event_type', event, event.get('name', '')) + self.move_from_context('event_source', event, 'server') + self.move_from_context('page', event, None) + + def move_from_context(self, field, event, default_value=''): + """Move a field from the context to the top level of the event.""" + context = event.get('context', {}) + if field in context: + event[field] = context[field] + del context[field] + else: + event[field] = default_value def remove_shim_context(event): @@ -52,3 +67,66 @@ def remove_shim_context(event): for field in context_fields_to_remove: if field in context: del context[field] + + +NAME_TO_EVENT_TYPE_MAP = { + 'edx.video.played': 'play_video', + 'edx.video.paused': 'pause_video', + 'edx.video.stopped': 'stop_video', + 'edx.video.loaded': 'load_video', + 'edx.video.transcript.shown': 'show_transcript', + 'edx.video.transcript.hidden': 'hide_transcript', +} + + +class VideoEventProcessor(object): + """ + Converts new format video events into the legacy video event format. + + Mobile devices cannot actually emit events that exactly match their counterparts emitted by the LMS javascript + video player. Instead of attempting to get them to do that, we instead insert a shim here that converts the events + they *can* easily emit and converts them into the legacy format. + + TODO: Remove this shim and perform the conversion as part of some batch canonicalization process. + + """ + + def __call__(self, event): + name = event.get('name') + if not name: + return + + if name not in NAME_TO_EVENT_TYPE_MAP: + return + + event['event_type'] = NAME_TO_EVENT_TYPE_MAP[name] + + if 'event' not in event: + return + + payload = event['event'] + + if 'module_id' in payload: + module_id = payload['module_id'] + try: + usage_key = UsageKey.from_string(module_id) + except InvalidKeyError: + log.warning('Unable to parse module_id "%s"', module_id, exc_info=True) + else: + payload['id'] = usage_key.html_id() + + del payload['module_id'] + + if 'current_time' in payload: + payload['currentTime'] = payload.pop('current_time') + + event['event'] = json.dumps(payload) + + if 'context' not in event: + return + + context = event['context'] + + if 'browser_page' in context: + page, _sep, _tail = context.pop('browser_page').rpartition('/') + event['page'] = page diff --git a/common/djangoapps/track/tests/__init__.py b/common/djangoapps/track/tests/__init__.py index e69de29bb2..3efbb4343c 100644 --- a/common/djangoapps/track/tests/__init__.py +++ b/common/djangoapps/track/tests/__init__.py @@ -0,0 +1,69 @@ +"""Helpers for tests related to emitting events to the tracking logs.""" + +from datetime import datetime + +from django.test import TestCase +from django.test.utils import override_settings +from freezegun import freeze_time +from pytz import UTC + +from eventtracking import tracker +from eventtracking.django import DjangoTracker + + +FROZEN_TIME = datetime(2013, 10, 3, 8, 24, 55, tzinfo=UTC) +IN_MEMORY_BACKEND_CONFIG = { + 'mem': { + 'ENGINE': 'track.tests.InMemoryBackend' + } +} + + +class InMemoryBackend(object): + """A backend that simply stores all events in memory""" + + def __init__(self): + super(InMemoryBackend, self).__init__() + self.events = [] + + def send(self, event): + """Store the event in a list""" + self.events.append(event) + + +@freeze_time(FROZEN_TIME) +@override_settings( + EVENT_TRACKING_BACKENDS=IN_MEMORY_BACKEND_CONFIG +) +class EventTrackingTestCase(TestCase): + """ + Supports capturing of emitted events in memory and inspecting them. + + Each test gets a "clean slate" and can retrieve any events emitted during their execution. + + """ + + # Make this more robust to the addition of new events that the test doesn't care about. + + def setUp(self): + super(EventTrackingTestCase, self).setUp() + + self.tracker = DjangoTracker() + tracker.register_tracker(self.tracker) + + @property + def backend(self): + """A reference to the in-memory backend that stores the events.""" + return self.tracker.backends['mem'] + + def get_event(self, idx=0): + """Retrieve an event emitted up to this point in the test.""" + return self.backend.events[idx] + + def assert_no_events_emitted(self): + """Ensure no events were emitted at this point in the test.""" + self.assertEquals(len(self.backend.events), 0) + + def assert_events_emitted(self): + """Ensure at least one event has been emitted at this point in the test.""" + self.assertGreaterEqual(len(self.backend.events), 1) diff --git a/common/djangoapps/track/tests/test_shim.py b/common/djangoapps/track/tests/test_shim.py index 12d8f21ae8..b30731fc72 100644 --- a/common/djangoapps/track/tests/test_shim.py +++ b/common/djangoapps/track/tests/test_shim.py @@ -1,42 +1,25 @@ """Ensure emitted events contain the fields legacy processors expect to find.""" -from datetime import datetime - -from freezegun import freeze_time from mock import sentinel -from django.test import TestCase from django.test.utils import override_settings -from pytz import UTC -from eventtracking.django import DjangoTracker +from track.tests import EventTrackingTestCase, FROZEN_TIME -IN_MEMORY_BACKEND = { - 'mem': { - 'ENGINE': 'track.tests.test_shim.InMemoryBackend' - } -} - LEGACY_SHIM_PROCESSOR = [ { 'ENGINE': 'track.shim.LegacyFieldMappingProcessor' } ] -FROZEN_TIME = datetime(2013, 10, 3, 8, 24, 55, tzinfo=UTC) - -@freeze_time(FROZEN_TIME) -class LegacyFieldMappingProcessorTestCase(TestCase): +class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase): """Ensure emitted events contain the fields legacy processors expect to find.""" @override_settings( - EVENT_TRACKING_BACKENDS=IN_MEMORY_BACKEND, EVENT_TRACKING_PROCESSORS=LEGACY_SHIM_PROCESSOR, ) def test_event_field_mapping(self): - django_tracker = DjangoTracker() - data = {sentinel.key: sentinel.value} context = { @@ -49,16 +32,15 @@ class LegacyFieldMappingProcessorTestCase(TestCase): 'user_id': sentinel.user_id, 'course_id': sentinel.course_id, 'org_id': sentinel.org_id, - 'event_type': sentinel.event_type, 'client_id': sentinel.client_id, } - with django_tracker.context('test', context): - django_tracker.emit(sentinel.name, data) + with self.tracker.context('test', context): + self.tracker.emit(sentinel.name, data) - emitted_event = django_tracker.backends['mem'].get_event() + emitted_event = self.get_event() expected_event = { - 'event_type': sentinel.event_type, + 'event_type': sentinel.name, 'name': sentinel.name, 'context': { 'user_id': sentinel.user_id, @@ -79,15 +61,12 @@ class LegacyFieldMappingProcessorTestCase(TestCase): self.assertEqual(expected_event, emitted_event) @override_settings( - EVENT_TRACKING_BACKENDS=IN_MEMORY_BACKEND, EVENT_TRACKING_PROCESSORS=LEGACY_SHIM_PROCESSOR, ) def test_missing_fields(self): - django_tracker = DjangoTracker() + self.tracker.emit(sentinel.name) - django_tracker.emit(sentinel.name) - - emitted_event = django_tracker.backends['mem'].get_event() + emitted_event = self.get_event() expected_event = { 'event_type': sentinel.name, @@ -104,19 +83,3 @@ class LegacyFieldMappingProcessorTestCase(TestCase): 'session': '', } self.assertEqual(expected_event, emitted_event) - - -class InMemoryBackend(object): - """A backend that simply stores all events in memory""" - - def __init__(self): - super(InMemoryBackend, self).__init__() - self.events = [] - - def send(self, event): - """Store the event in a list""" - self.events.append(event) - - def get_event(self): - """Return the first event that was emitted.""" - return self.events[0] diff --git a/common/djangoapps/track/views/segmentio.py b/common/djangoapps/track/views/segmentio.py index d89d32ff88..b613a75289 100644 --- a/common/djangoapps/track/views/segmentio.py +++ b/common/djangoapps/track/views/segmentio.py @@ -6,27 +6,25 @@ import logging from django.conf import settings from django.contrib.auth.models import User +from django.http import HttpResponse from django.views.decorators.http import require_POST from django_future.csrf import csrf_exempt -from eventtracking import tracker as eventtracker +from eventtracking import tracker from opaque_keys.edx.keys import CourseKey from opaque_keys import InvalidKeyError from util.json_request import expect_json, JsonResponse -from track import tracker -from track import shim - log = logging.getLogger(__name__) ERROR_UNAUTHORIZED = 'Unauthorized' -WARNING_IGNORED_CHANNEL = 'Channel ignored' -WARNING_IGNORED_ACTION = 'Action ignored' +WARNING_IGNORED_SOURCE = 'Source ignored' +WARNING_IGNORED_TYPE = 'Type ignored' ERROR_MISSING_USER_ID = 'Required user_id missing from context' ERROR_USER_NOT_EXIST = 'Specified user does not exist' ERROR_INVALID_USER_ID = 'Unable to parse userId as an integer' -ERROR_MISSING_EVENT_TYPE = 'The event_type field must be specified in the properties dictionary' +ERROR_MISSING_NAME = 'The name field must be specified in the properties dictionary' ERROR_MISSING_TIMESTAMP = 'Required timestamp field not found' ERROR_MISSING_RECEIVED_AT = 'Required receivedAt field not found' @@ -34,7 +32,7 @@ ERROR_MISSING_RECEIVED_AT = 'Required receivedAt field not found' @require_POST @expect_json @csrf_exempt -def track_segmentio_event(request): +def segmentio_event(request): """ An endpoint for logging events using segment.io's webhook integration. @@ -50,34 +48,9 @@ def track_segmentio_event(request): Many of the root fields of a standard edX tracking event are read out of the "properties" dictionary provided by the segment.io event, which is, in turn, provided by the client that emitted the event. - In order for an event to be logged the following preconditions must be met: - - * The "key" query string parameter must exactly match the django setting TRACKING_SEGMENTIO_WEBHOOK_SECRET. While - the endpoint is public, we want to limit access to it to the segment.io servers only. - * The value of the "channel" field of the event must be included in the list specified by the django setting - TRACKING_SEGMENTIO_ALLOWED_CHANNELS. This is intended to restrict the set of events to specific channels. For - example: just mobile devices. - * The value of the "action" field of the event must be included in the list specified by the django setting - TRACKING_SEGMENTIO_ALLOWED_ACTIONS. In order to make use of *all* of the features segment.io offers we would have - to implement some sort of persistent storage of information contained in some actions (like identify). For now, - we defer support of those actions and just support a limited set that can be handled without storing information - in external state. - * The value of the standard "userId" field of the event must be an integer that can be used to look up the user - using the primary key of the User model. - * Include an "event_type" field in the properties dictionary that indicates the edX event type. Note this can differ - from the "event" field found in the root of a segment.io event. The "event" field at the root of the structure is - intended to be human readable, the "event_type" field is expected to conform to the standard for naming events - found in the edX data documentation. - - Additionally the event can optionally: - - * Provide a "context" dictionary in the properties dictionary. This dictionary will be applied to the - existing context on the server overriding any existing keys. This context dictionary should include a "course_id" - field when the event is scoped to a particular course. The value of this field should be a valid course key. The - context may contain other arbitrary data that will be logged with the event, for example: identification - information for the device that emitted the event. - * Provide a "page" parameter in the properties dictionary which indicates the page that was being displayed to the - user or the mobile application screen that was visible to the user at the time the event was emitted. + In order for an event to be accepted and logged the "key" query string parameter must exactly match the django + setting TRACKING_SEGMENTIO_WEBHOOK_SECRET. While the endpoint is public, we want to limit access to it to the + segment.io servers only. """ @@ -88,7 +61,59 @@ def track_segmentio_event(request): expected_secret = getattr(settings, 'TRACKING_SEGMENTIO_WEBHOOK_SECRET', None) provided_secret = request.GET.get('key') if not expected_secret or provided_secret != expected_secret: - return failure_response(ERROR_UNAUTHORIZED, status=401) + return HttpResponse(status=401) + + try: + track_segmentio_event(request) + except EventValidationError as err: + log.warning( + 'Unable to process event received from segment.io: message="%s" event="%s"', + str(err), + request.body + ) + # Do not let the requestor know why the event wasn't saved. If the secret key is compromised this diagnostic + # information could be used to scrape useful information from the system. + + return HttpResponse(status=200) + + +class EventValidationError(Exception): + """Raised when an invalid event is received.""" + pass + + +def track_segmentio_event(request): # pylint: disable=too-many-statements + """ + Record an event received from segment.io to the tracking logs. + + This method assumes that the event has come from a trusted source. + + The received event must meet the following conditions in order to be logged: + + * The value of the "type" field of the event must be included in the list specified by the django setting + TRACKING_SEGMENTIO_ALLOWED_TYPES. In order to make use of *all* of the features segment.io offers we would have + to implement some sort of persistent storage of information contained in some actions (like identify). For now, + we defer support of those actions and just support a limited set that can be handled without storing information + in external state. + * The value of the standard "userId" field of the event must be an integer that can be used to look up the user + using the primary key of the User model. + * Include a "name" field in the properties dictionary that indicates the edX event name. Note this can differ + from the "event" field found in the root of a segment.io event. The "event" field at the root of the structure is + intended to be human readable, the "name" field is expected to conform to the standard for naming events + found in the edX data documentation. + * Have originated from a known and trusted segment.io client library. The django setting + TRACKING_SEGMENTIO_SOURCE_MAP maps the known library names to internal "event_source" strings. In order to be + logged the event must have a library name that is a valid key in that map. + + Additionally the event can optionally: + + * Provide a "context" dictionary in the properties dictionary. This dictionary will be applied to the + existing context on the server overriding any existing keys. This context dictionary should include a "course_id" + field when the event is scoped to a particular course. The value of this field should be a valid course key. The + context may contain other arbitrary data that will be logged with the event, for example: identification + information for the device that emitted the event. + + """ # The POST body will contain the JSON encoded event full_segment_event = request.json @@ -96,52 +121,51 @@ def track_segmentio_event(request): # We mostly care about the properties segment_event = full_segment_event.get('properties', {}) - def logged_failure_response(*args, **kwargs): - """Indicate a failure and log information about the event that will aide debugging efforts""" - failed_response = failure_response(*args, **kwargs) - log.warning('Unable to process event received from segment.io: %s', json.dumps(full_segment_event)) - return failed_response - - # Selectively listen to particular channels, note that the client can set the "event_source" field in the - # "properties" dict to override the channel provided by segment.io. This is necessary because there is a bug in some - # segment.io client libraries that prevented them from sending correct channel fields. - channel = segment_event.get('event_source') - allowed_channels = [c.lower() for c in getattr(settings, 'TRACKING_SEGMENTIO_ALLOWED_CHANNELS', [])] - if not channel or channel.lower() not in allowed_channels: - return response(WARNING_IGNORED_CHANNEL, committed=False) - - # Ignore actions that are unsupported - action = full_segment_event.get('action') - allowed_actions = [a.lower() for a in getattr(settings, 'TRACKING_SEGMENTIO_ALLOWED_ACTIONS', [])] - if not action or action.lower() not in allowed_actions: - return response(WARNING_IGNORED_ACTION, committed=False) + # Start with the context provided by segment.io in the "client" field if it exists + # We should tightly control which fields actually get included in the event emitted. + segment_context = full_segment_event.get('context') + # Build up the event context by parsing fields out of the event received from segment.io context = {} - # Start with the context provided by segment.io in the "client" field if it exists - segment_context = full_segment_event.get('context') - if segment_context: - context['client'] = segment_context - user_agent = segment_context.get('userAgent', '') + library_name = segment_context.get('library', {}).get('name') + source_map = getattr(settings, 'TRACKING_SEGMENTIO_SOURCE_MAP', {}) + event_source = source_map.get(library_name) + if not event_source: + raise EventValidationError(WARNING_IGNORED_SOURCE) else: - user_agent = '' + context['event_source'] = event_source + + # Ignore types that are unsupported + segment_event_type = full_segment_event.get('type') + allowed_types = [a.lower() for a in getattr(settings, 'TRACKING_SEGMENTIO_ALLOWED_TYPES', [])] + if not segment_event_type or segment_event_type.lower() not in allowed_types: + raise EventValidationError(WARNING_IGNORED_TYPE) + + if segment_context: + context['client'] = dict(segment_context) + context['agent'] = segment_context.get('userAgent', '') + for field in ('traits', 'integrations', 'userAgent'): + if field in context['client']: + del context['client'][field] # Overlay any context provided in the properties context.update(segment_event.get('context', {})) user_id = full_segment_event.get('userId') if not user_id: - return logged_failure_response(ERROR_MISSING_USER_ID) + raise EventValidationError(ERROR_MISSING_USER_ID) # userId is assumed to be the primary key of the django User model try: user = User.objects.get(pk=user_id) except User.DoesNotExist: - return logged_failure_response(ERROR_USER_NOT_EXIST) + raise EventValidationError(ERROR_USER_NOT_EXIST) except ValueError: - return logged_failure_response(ERROR_INVALID_USER_ID) + raise EventValidationError(ERROR_INVALID_USER_ID) else: context['user_id'] = user.id + context['username'] = user.username # course_id is expected to be provided in the context when applicable course_id = context.get('course_id') @@ -159,71 +183,22 @@ def track_segmentio_event(request): ) if 'timestamp' in full_segment_event: - time = parse_iso8601_timestamp(full_segment_event['timestamp']) + context['timestamp'] = parse_iso8601_timestamp(full_segment_event['timestamp']) else: - return logged_failure_response(ERROR_MISSING_TIMESTAMP) + raise EventValidationError(ERROR_MISSING_TIMESTAMP) if 'receivedAt' in full_segment_event: context['received_at'] = parse_iso8601_timestamp(full_segment_event['receivedAt']) else: - return logged_failure_response(ERROR_MISSING_RECEIVED_AT) + raise EventValidationError(ERROR_MISSING_RECEIVED_AT) - if 'event_type' in segment_event: - event_type = segment_event['event_type'] - else: - return logged_failure_response(ERROR_MISSING_EVENT_TYPE) + if 'name' not in segment_event: + raise EventValidationError(ERROR_MISSING_NAME) - with eventtracker.get_tracker().context('edx.segmentio', context): - complete_context = eventtracker.get_tracker().resolve_context() - event = { - "username": user.username, - "event_type": event_type, - "name": segment_event.get('name', ''), - # Will be either "mobile", "browser" or "server". These names happen to be identical to the names we already - # use so no mapping is necessary. - "event_source": channel, - # This timestamp is reported by the local clock on the device so it may be wildly incorrect. - "time": time, - "context": complete_context, - "page": segment_event.get('page'), - "host": complete_context.get('host', ''), - "agent": user_agent, - "ip": segment_event.get('ip', ''), - "event": segment_event.get('event', {}), - } + context['ip'] = segment_event.get('context', {}).get('ip', '') - # Some duplicated fields are passed into event-tracking via the context by track.middleware. - # Remove them from the event here since they are captured elsewhere. - shim.remove_shim_context(event) - - tracker.send(event) - - return response() - - -def response(message=None, status=200, committed=True): - """ - Produce a response from the segment.io event handler. - - Returns: A JSON encoded string giving more information about what action was taken while processing the request. - """ - result = { - 'committed': committed - } - - if message: - result['message'] = message - - return JsonResponse(result, status=status) - - -def failure_response(message, status=400): - """ - Return a failure response when something goes wrong handling segment.io events. - - Returns: A JSON encoded string giving more information about what went wrong when processing the request. - """ - return response(message=message, status=status, committed=False) + with tracker.get_tracker().context('edx.segmentio', context): + tracker.emit(segment_event['name'], segment_event.get('data', {})) def parse_iso8601_timestamp(timestamp): diff --git a/common/djangoapps/track/views/tests/test_segmentio.py b/common/djangoapps/track/views/tests/test_segmentio.py index d3746067a8..5c61229faa 100644 --- a/common/djangoapps/track/views/tests/test_segmentio.py +++ b/common/djangoapps/track/views/tests/test_segmentio.py @@ -3,85 +3,99 @@ from datetime import datetime import json -from ddt import ddt, data -from freezegun import freeze_time -from mock import patch, sentinel +from ddt import ddt, data, unpack +from mock import sentinel from django.contrib.auth.models import User -from django.test import TestCase from django.test.client import RequestFactory from django.test.utils import override_settings from track.middleware import TrackMiddleware +from track.tests import EventTrackingTestCase from track.views import segmentio -EXPECTED_TIME = datetime(2013, 10, 3, 8, 24, 55) SECRET = 'anything' ENDPOINT = '/segmentio/test/event' USER_ID = 10 +MOBILE_SHIM_PROCESSOR = [ + { + 'ENGINE': 'track.shim.LegacyFieldMappingProcessor' + }, + { + 'ENGINE': 'track.shim.VideoEventProcessor' + } +] + + +def expect_failure_with_message(message): + """Ensure the test raises an exception and does not emit an event""" + def test_decorator(func): + def test_decorated(self, *args, **kwargs): + self.assertRaisesRegexp(segmentio.EventValidationError, message, func, self, *args, **kwargs) + self.assert_no_events_emitted() + return test_decorated + return test_decorator + @ddt @override_settings( TRACKING_SEGMENTIO_WEBHOOK_SECRET=SECRET, TRACKING_IGNORE_URL_PATTERNS=[ENDPOINT], - TRACKING_SEGMENTIO_ALLOWED_ACTIONS=['Track', 'Screen'], - TRACKING_SEGMENTIO_ALLOWED_CHANNELS=['mobile'] + TRACKING_SEGMENTIO_ALLOWED_TYPES=['track'], + TRACKING_SEGMENTIO_SOURCE_MAP={'test-app': 'mobile'}, + EVENT_TRACKING_PROCESSORS=MOBILE_SHIM_PROCESSOR, ) -@freeze_time(EXPECTED_TIME) -class SegmentIOTrackingTestCase(TestCase): +class SegmentIOTrackingTestCase(EventTrackingTestCase): """Test processing of segment.io events""" def setUp(self): + super(SegmentIOTrackingTestCase, self).setUp() self.request_factory = RequestFactory() - patcher = patch('track.views.segmentio.tracker') - self.mock_tracker = patcher.start() - self.addCleanup(patcher.stop) - def test_get_request(self): request = self.request_factory.get(ENDPOINT) - response = segmentio.track_segmentio_event(request) + response = segmentio.segmentio_event(request) self.assertEquals(response.status_code, 405) - self.assertFalse(self.mock_tracker.send.called) # pylint: disable=maybe-no-member + self.assert_no_events_emitted() @override_settings( TRACKING_SEGMENTIO_WEBHOOK_SECRET=None ) def test_no_secret_config(self): request = self.request_factory.post(ENDPOINT) - response = segmentio.track_segmentio_event(request) - self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_UNAUTHORIZED, 401) - - def assert_segmentio_uncommitted_response(self, response, expected_message, expected_status=400): - """Assert that no event was emitted and an appropriate commit==false message was returned""" - self.assertEquals(response.status_code, expected_status) - parsed_content = json.loads(response.content) - self.assertEquals(parsed_content, {'committed': False, 'message': expected_message}) - self.assertFalse(self.mock_tracker.send.called) # pylint: disable=maybe-no-member + response = segmentio.segmentio_event(request) + self.assertEquals(response.status_code, 401) + self.assert_no_events_emitted() def test_no_secret_provided(self): request = self.request_factory.post(ENDPOINT) - response = segmentio.track_segmentio_event(request) - self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_UNAUTHORIZED, 401) + response = segmentio.segmentio_event(request) + self.assertEquals(response.status_code, 401) + self.assert_no_events_emitted() def test_secret_mismatch(self): request = self.create_request(key='y') - response = segmentio.track_segmentio_event(request) - self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_UNAUTHORIZED, 401) + response = segmentio.segmentio_event(request) + self.assertEquals(response.status_code, 401) + self.assert_no_events_emitted() def create_request(self, key=None, **kwargs): """Create a fake request that emulates a request from the segment.io servers to ours""" if key is None: key = SECRET - return self.request_factory.post(ENDPOINT + "?key=" + key, **kwargs) + request = self.request_factory.post(ENDPOINT + "?key=" + key, **kwargs) + if 'data' in kwargs: + request.json = json.loads(kwargs['data']) - @data('Identify', 'Group', 'Alias', 'Page', 'identify') + return request + + @data('identify', 'Group', 'Alias', 'Page', 'identify', 'screen') + @expect_failure_with_message(segmentio.WARNING_IGNORED_TYPE) def test_segmentio_ignore_actions(self, action): - response = self.post_segmentio_event(action=action) - self.assert_segmentio_uncommitted_response(response, segmentio.WARNING_IGNORED_ACTION, 200) + self.post_segmentio_event(action=action) def post_segmentio_event(self, **kwargs): """Post a fake segment.io event to the view that processes it""" @@ -89,12 +103,7 @@ class SegmentIOTrackingTestCase(TestCase): data=self.create_segmentio_event_json(**kwargs), content_type='application/json' ) - return segmentio.track_segmentio_event(request) - - @data('server', 'browser', 'Browser') - def test_segmentio_ignore_channels(self, channel): - response = self.post_segmentio_event(event_source=channel) - self.assert_segmentio_uncommitted_response(response, segmentio.WARNING_IGNORED_CHANNEL, 200) + segmentio.track_segmentio_event(request) def create_segmentio_event(self, **kwargs): """Populate a fake segment.io event with data of interest""" @@ -103,18 +112,16 @@ class SegmentIOTrackingTestCase(TestCase): "userId": kwargs.get('user_id', USER_ID), "event": "Did something", "properties": { - 'event_type': kwargs.get('event_type', ''), - 'event_source': kwargs.get('event_source', 'mobile'), - 'event': kwargs.get('event', {}), + 'name': kwargs.get('name', str(sentinel.name)), + 'data': kwargs.get('data', {}), 'context': { 'course_id': kwargs.get('course_id') or '', - }, - 'name': str(sentinel.name), + } }, - "channel": kwargs.get('channel', 'mobile'), + "channel": 'server', "context": { "library": { - "name": "unknown", + "name": kwargs.get('library_name', 'test-app'), "version": "unknown" }, 'userAgent': str(sentinel.user_agent), @@ -133,42 +140,50 @@ class SegmentIOTrackingTestCase(TestCase): "action": action } + if 'context' in kwargs: + sample_event['properties']['context'].update(kwargs['context']) + return sample_event def create_segmentio_event_json(self, **kwargs): """Return a json string containing a fake segment.io event""" return json.dumps(self.create_segmentio_event(**kwargs)) - def test_no_user_for_user_id(self): - response = self.post_segmentio_event(user_id=40) - self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_USER_NOT_EXIST, 400) + @expect_failure_with_message(segmentio.WARNING_IGNORED_SOURCE) + def test_segmentio_ignore_unknown_libraries(self): + self.post_segmentio_event(library_name='foo') + @expect_failure_with_message(segmentio.ERROR_USER_NOT_EXIST) + def test_no_user_for_user_id(self): + self.post_segmentio_event(user_id=40) + + @expect_failure_with_message(segmentio.ERROR_INVALID_USER_ID) def test_invalid_user_id(self): - response = self.post_segmentio_event(user_id='foobar') - self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_INVALID_USER_ID, 400) + self.post_segmentio_event(user_id='foobar') @data('foo/bar/baz', 'course-v1:foo+bar+baz') def test_success(self, course_id): middleware = TrackMiddleware() request = self.create_request( - data=self.create_segmentio_event_json(event_type=str(sentinel.event_type), event={'foo': 'bar'}, course_id=course_id), + data=self.create_segmentio_event_json(data={'foo': 'bar'}, course_id=course_id), content_type='application/json' ) User.objects.create(pk=USER_ID, username=str(sentinel.username)) middleware.process_request(request) # The middleware normally emits an event, make sure it doesn't in this case. - self.assertFalse(self.mock_tracker.send.called) # pylint: disable=maybe-no-member + self.assert_no_events_emitted() try: - response = segmentio.track_segmentio_event(request) + response = segmentio.segmentio_event(request) self.assertEquals(response.status_code, 200) expected_event = { 'username': str(sentinel.username), 'ip': '', + 'session': '', 'event_source': 'mobile', - 'event_type': str(sentinel.event_type), + 'event_type': str(sentinel.name), 'name': str(sentinel.name), 'event': {'foo': 'bar'}, 'agent': str(sentinel.user_agent), @@ -182,10 +197,9 @@ class SegmentIOTrackingTestCase(TestCase): 'path': ENDPOINT, 'client': { 'library': { - 'name': 'unknown', + 'name': 'test-app', 'version': 'unknown' - }, - 'userAgent': str(sentinel.user_agent) + } }, 'received_at': datetime.strptime("2014-08-27T16:33:39.100Z", "%Y-%m-%dT%H:%M:%S.%fZ"), }, @@ -193,7 +207,7 @@ class SegmentIOTrackingTestCase(TestCase): finally: middleware.process_response(request, None) - self.mock_tracker.send.assert_called_once_with(expected_event) # pylint: disable=maybe-no-member + self.assertEquals(self.get_event(), expected_event) def test_invalid_course_id(self): request = self.create_request( @@ -201,22 +215,22 @@ class SegmentIOTrackingTestCase(TestCase): content_type='application/json' ) User.objects.create(pk=USER_ID, username=str(sentinel.username)) - response = segmentio.track_segmentio_event(request) - self.assertEquals(response.status_code, 200) - self.assertTrue(self.mock_tracker.send.called) # pylint: disable=maybe-no-member + segmentio.track_segmentio_event(request) + self.assert_events_emitted() - def test_missing_event_type(self): + @expect_failure_with_message(segmentio.ERROR_MISSING_NAME) + def test_missing_name(self): sample_event_raw = self.create_segmentio_event() - del sample_event_raw['properties']['event_type'] + del sample_event_raw['properties']['name'] request = self.create_request( data=json.dumps(sample_event_raw), content_type='application/json' ) User.objects.create(pk=USER_ID, username=str(sentinel.username)) - response = segmentio.track_segmentio_event(request) - self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_MISSING_EVENT_TYPE, 400) + segmentio.track_segmentio_event(request) + @expect_failure_with_message(segmentio.ERROR_MISSING_TIMESTAMP) def test_missing_timestamp(self): sample_event_raw = self.create_event_without_fields('timestamp') request = self.create_request( @@ -225,8 +239,18 @@ class SegmentIOTrackingTestCase(TestCase): ) User.objects.create(pk=USER_ID, username=str(sentinel.username)) - response = segmentio.track_segmentio_event(request) - self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_MISSING_TIMESTAMP, 400) + segmentio.track_segmentio_event(request) + + @expect_failure_with_message(segmentio.ERROR_MISSING_RECEIVED_AT) + def test_missing_received_at(self): + sample_event_raw = self.create_event_without_fields('receivedAt') + request = self.create_request( + data=json.dumps(sample_event_raw), + content_type='application/json' + ) + User.objects.create(pk=USER_ID, username=str(sentinel.username)) + + segmentio.track_segmentio_event(request) def create_event_without_fields(self, *fields): """Create a fake event and remove some fields from it""" @@ -238,28 +262,108 @@ class SegmentIOTrackingTestCase(TestCase): return event - def test_missing_received_at(self): - sample_event_raw = self.create_event_without_fields('receivedAt') + def test_string_user_id(self): + User.objects.create(pk=USER_ID, username=str(sentinel.username)) + self.post_segmentio_event(user_id=str(USER_ID)) + self.assert_events_emitted() + + def test_hiding_failure(self): + sample_event_raw = self.create_event_without_fields('timestamp') request = self.create_request( data=json.dumps(sample_event_raw), content_type='application/json' ) User.objects.create(pk=USER_ID, username=str(sentinel.username)) - response = segmentio.track_segmentio_event(request) - self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_MISSING_RECEIVED_AT, 400) - - def test_string_user_id(self): - User.objects.create(pk=USER_ID, username=str(sentinel.username)) - response = self.post_segmentio_event(user_id=str(USER_ID)) - result = self.assert_segmentio_committed_response(response) - self.assertEquals(result['context']['user_id'], USER_ID) - - def assert_segmentio_committed_response(self, response): - """Assert that an event was emitted""" + response = segmentio.segmentio_event(request) self.assertEquals(response.status_code, 200) - parsed_content = json.loads(response.content) - self.assertEquals(parsed_content, {'committed': True}) - self.assertTrue(self.mock_tracker.send.called) # pylint: disable=maybe-no-member - return self.mock_tracker.send.mock_calls[0][1][0] + self.assert_no_events_emitted() + @data( + ('edx.video.played', 'play_video'), + ('edx.video.paused', 'pause_video'), + ('edx.video.stopped', 'stop_video'), + ('edx.video.loaded', 'load_video'), + ('edx.video.transcript.shown', 'show_transcript'), + ('edx.video.transcript.hidden', 'hide_transcript'), + ) + @unpack + def test_video_event(self, name, event_type): + course_id = 'foo/bar/baz' + middleware = TrackMiddleware() + + input_payload = { + 'current_time': 132.134456, + 'module_id': 'i4x://foo/bar/baz/some_module', + 'code': 'mobile' + } + if name == 'edx.video.loaded': + del input_payload['current_time'] + + request = self.create_request( + data=self.create_segmentio_event_json( + name=name, + data=input_payload, + context={ + 'course_id': course_id, + 'browser_page': 'https://testserver/courses/foo/bar/baz/courseware/Week_1/Activity/2', + 'application': { + 'name': 'edx.mobileapp.android', + 'version': '29', + 'component': 'videoplayer' + } + }), + content_type='application/json' + ) + User.objects.create(pk=USER_ID, username=str(sentinel.username)) + + middleware.process_request(request) + try: + response = segmentio.segmentio_event(request) + self.assertEquals(response.status_code, 200) + + expected_event_without_payload = { + 'username': str(sentinel.username), + 'ip': '', + 'session': '', + 'event_source': 'mobile', + 'event_type': event_type, + 'name': name, + 'agent': str(sentinel.user_agent), + 'page': 'https://testserver/courses/foo/bar/baz/courseware/Week_1/Activity', + 'time': datetime.strptime("2014-08-27T16:33:39.215Z", "%Y-%m-%dT%H:%M:%S.%fZ"), + 'host': 'testserver', + 'context': { + 'user_id': USER_ID, + 'course_id': course_id, + 'org_id': 'foo', + 'path': ENDPOINT, + 'client': { + 'library': { + 'name': 'test-app', + 'version': 'unknown' + } + }, + 'received_at': datetime.strptime("2014-08-27T16:33:39.100Z", "%Y-%m-%dT%H:%M:%S.%fZ"), + 'application': { + 'name': 'edx.mobileapp.android', + 'version': '29', + 'component': 'videoplayer' + } + }, + } + expected_payload = { + 'currentTime': 132.134456, + 'id': 'i4x-foo-bar-baz-some_module', + 'code': 'mobile' + } + if name == 'edx.video.loaded': + del expected_payload['currentTime'] + finally: + middleware.process_response(request, None) + + actual_event = dict(self.get_event()) + payload = json.loads(actual_event.pop('event')) + + self.assertEquals(actual_event, expected_event_without_payload) + self.assertEquals(payload, expected_payload) diff --git a/lms/envs/aws.py b/lms/envs/aws.py index fc2a70db9f..c6eb470058 100644 --- a/lms/envs/aws.py +++ b/lms/envs/aws.py @@ -396,9 +396,8 @@ STUDENT_FILEUPLOAD_MAX_SIZE = ENV_TOKENS.get("STUDENT_FILEUPLOAD_MAX_SIZE", STUD TRACKING_BACKENDS.update(AUTH_TOKENS.get("TRACKING_BACKENDS", {})) EVENT_TRACKING_BACKENDS.update(AUTH_TOKENS.get("EVENT_TRACKING_BACKENDS", {})) TRACKING_SEGMENTIO_WEBHOOK_SECRET = AUTH_TOKENS.get("TRACKING_SEGMENTIO_WEBHOOK_SECRET", TRACKING_SEGMENTIO_WEBHOOK_SECRET) -TRACKING_SEGMENTIO_ALLOWED_ACTIONS = ENV_TOKENS.get("TRACKING_SEGMENTIO_ALLOWED_ACTIONS", TRACKING_SEGMENTIO_ALLOWED_ACTIONS) -TRACKING_SEGMENTIO_ALLOWED_CHANNELS = ENV_TOKENS.get("TRACKING_SEGMENTIO_ALLOWED_CHANNELS", TRACKING_SEGMENTIO_ALLOWED_CHANNELS) - +TRACKING_SEGMENTIO_ALLOWED_TYPES = ENV_TOKENS.get("TRACKING_SEGMENTIO_ALLOWED_TYPES", TRACKING_SEGMENTIO_ALLOWED_TYPES) +TRACKING_SEGMENTIO_SOURCE_MAP = ENV_TOKENS.get("TRACKING_SEGMENTIO_SOURCE_MAP", TRACKING_SEGMENTIO_SOURCE_MAP) # Student identity verification settings VERIFY_STUDENT = AUTH_TOKENS.get("VERIFY_STUDENT", VERIFY_STUDENT) diff --git a/lms/envs/common.py b/lms/envs/common.py index f3c427a937..d31ac8efe8 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -506,6 +506,9 @@ EVENT_TRACKING_BACKENDS = { EVENT_TRACKING_PROCESSORS = [ { 'ENGINE': 'track.shim.LegacyFieldMappingProcessor' + }, + { + 'ENGINE': 'track.shim.VideoEventProcessor' } ] @@ -524,8 +527,11 @@ if FEATURES.get('ENABLE_SQL_TRACKING_LOGS'): }) TRACKING_SEGMENTIO_WEBHOOK_SECRET = None -TRACKING_SEGMENTIO_ALLOWED_ACTIONS = ['Track', 'Screen'] -TRACKING_SEGMENTIO_ALLOWED_CHANNELS = ['mobile'] +TRACKING_SEGMENTIO_ALLOWED_TYPES = ['track'] +TRACKING_SEGMENTIO_SOURCE_MAP = { + 'analytics-android': 'mobile', + 'analytics-ios': 'mobile', +} ######################## GOOGLE ANALYTICS ########################### GOOGLE_ANALYTICS_ACCOUNT = None diff --git a/lms/urls.py b/lms/urls.py index 719ac4c07d..a306ae4771 100644 --- a/lms/urls.py +++ b/lms/urls.py @@ -28,7 +28,7 @@ urlpatterns = ('', # nopep8 url(r'^reject_name_change$', 'student.views.reject_name_change'), url(r'^pending_name_changes$', 'student.views.pending_name_changes'), url(r'^event$', 'track.views.user_track'), - url(r'^segmentio/event$', 'track.views.segmentio.track_segmentio_event'), + url(r'^segmentio/event$', 'track.views.segmentio.segmentio_event'), url(r'^t/(?P