From 821c97fbb42aacc7072950f00bf18222fcecb795 Mon Sep 17 00:00:00 2001 From: Gabe Mulley Date: Tue, 14 Oct 2014 17:09:47 -0400 Subject: [PATCH] Implement shim for mobile video events. This PR addresses the following issues: 1) All requests return a 200 OK unless there is an authorization failure. This is deliberate in case the secret key is compromised. 2) Push all of the nasty logic necessary to generate compatible video events into the LMS instead of trying to do that mapping on the mobile devices. 3) Stop using the deprecated "action" field in the segment.io event. According to their support team this field should not be used anymore and is just around for backwards compatibility reasons. Fixes: AN-3818 --- common/djangoapps/track/shim.py | 106 ++++++- common/djangoapps/track/tests/__init__.py | 69 +++++ common/djangoapps/track/tests/test_shim.py | 53 +--- common/djangoapps/track/views/segmentio.py | 219 +++++++------- .../track/views/tests/test_segmentio.py | 272 ++++++++++++------ lms/envs/aws.py | 5 +- lms/envs/common.py | 10 +- lms/urls.py | 2 +- 8 files changed, 465 insertions(+), 271 deletions(-) diff --git a/common/djangoapps/track/shim.py b/common/djangoapps/track/shim.py index 4318f19069..dd0b885a7c 100644 --- a/common/djangoapps/track/shim.py +++ b/common/djangoapps/track/shim.py @@ -1,5 +1,14 @@ """Map new event context values to old top-level field values. Ensures events can be parsed by legacy parsers.""" +import json +import logging + +from opaque_keys import InvalidKeyError +from opaque_keys.edx.keys import UsageKey + + +log = logging.getLogger(__name__) + CONTEXT_FIELDS_TO_INCLUDE = [ 'username', 'session', @@ -13,33 +22,39 @@ class LegacyFieldMappingProcessor(object): """Ensures all required fields are included in emitted events""" def __call__(self, event): + context = event.get('context', {}) if 'context' in event: - context = event['context'] for field in CONTEXT_FIELDS_TO_INCLUDE: - if field in context: - event[field] = context[field] - else: - event[field] = '' + self.move_from_context(field, event) remove_shim_context(event) - if 'event_type' in event.get('context', {}): - event['event_type'] = event['context']['event_type'] - del event['context']['event_type'] - else: - event['event_type'] = event.get('name', '') - if 'data' in event: event['event'] = event['data'] del event['data'] else: event['event'] = {} - if 'timestamp' in event: + if 'timestamp' in context: + event['time'] = context['timestamp'] + del context['timestamp'] + elif 'timestamp' in event: event['time'] = event['timestamp'] + + if 'timestamp' in event: del event['timestamp'] - event['event_source'] = 'server' - event['page'] = None + self.move_from_context('event_type', event, event.get('name', '')) + self.move_from_context('event_source', event, 'server') + self.move_from_context('page', event, None) + + def move_from_context(self, field, event, default_value=''): + """Move a field from the context to the top level of the event.""" + context = event.get('context', {}) + if field in context: + event[field] = context[field] + del context[field] + else: + event[field] = default_value def remove_shim_context(event): @@ -52,3 +67,66 @@ def remove_shim_context(event): for field in context_fields_to_remove: if field in context: del context[field] + + +NAME_TO_EVENT_TYPE_MAP = { + 'edx.video.played': 'play_video', + 'edx.video.paused': 'pause_video', + 'edx.video.stopped': 'stop_video', + 'edx.video.loaded': 'load_video', + 'edx.video.transcript.shown': 'show_transcript', + 'edx.video.transcript.hidden': 'hide_transcript', +} + + +class VideoEventProcessor(object): + """ + Converts new format video events into the legacy video event format. + + Mobile devices cannot actually emit events that exactly match their counterparts emitted by the LMS javascript + video player. Instead of attempting to get them to do that, we instead insert a shim here that converts the events + they *can* easily emit and converts them into the legacy format. + + TODO: Remove this shim and perform the conversion as part of some batch canonicalization process. + + """ + + def __call__(self, event): + name = event.get('name') + if not name: + return + + if name not in NAME_TO_EVENT_TYPE_MAP: + return + + event['event_type'] = NAME_TO_EVENT_TYPE_MAP[name] + + if 'event' not in event: + return + + payload = event['event'] + + if 'module_id' in payload: + module_id = payload['module_id'] + try: + usage_key = UsageKey.from_string(module_id) + except InvalidKeyError: + log.warning('Unable to parse module_id "%s"', module_id, exc_info=True) + else: + payload['id'] = usage_key.html_id() + + del payload['module_id'] + + if 'current_time' in payload: + payload['currentTime'] = payload.pop('current_time') + + event['event'] = json.dumps(payload) + + if 'context' not in event: + return + + context = event['context'] + + if 'browser_page' in context: + page, _sep, _tail = context.pop('browser_page').rpartition('/') + event['page'] = page diff --git a/common/djangoapps/track/tests/__init__.py b/common/djangoapps/track/tests/__init__.py index e69de29bb2..3efbb4343c 100644 --- a/common/djangoapps/track/tests/__init__.py +++ b/common/djangoapps/track/tests/__init__.py @@ -0,0 +1,69 @@ +"""Helpers for tests related to emitting events to the tracking logs.""" + +from datetime import datetime + +from django.test import TestCase +from django.test.utils import override_settings +from freezegun import freeze_time +from pytz import UTC + +from eventtracking import tracker +from eventtracking.django import DjangoTracker + + +FROZEN_TIME = datetime(2013, 10, 3, 8, 24, 55, tzinfo=UTC) +IN_MEMORY_BACKEND_CONFIG = { + 'mem': { + 'ENGINE': 'track.tests.InMemoryBackend' + } +} + + +class InMemoryBackend(object): + """A backend that simply stores all events in memory""" + + def __init__(self): + super(InMemoryBackend, self).__init__() + self.events = [] + + def send(self, event): + """Store the event in a list""" + self.events.append(event) + + +@freeze_time(FROZEN_TIME) +@override_settings( + EVENT_TRACKING_BACKENDS=IN_MEMORY_BACKEND_CONFIG +) +class EventTrackingTestCase(TestCase): + """ + Supports capturing of emitted events in memory and inspecting them. + + Each test gets a "clean slate" and can retrieve any events emitted during their execution. + + """ + + # Make this more robust to the addition of new events that the test doesn't care about. + + def setUp(self): + super(EventTrackingTestCase, self).setUp() + + self.tracker = DjangoTracker() + tracker.register_tracker(self.tracker) + + @property + def backend(self): + """A reference to the in-memory backend that stores the events.""" + return self.tracker.backends['mem'] + + def get_event(self, idx=0): + """Retrieve an event emitted up to this point in the test.""" + return self.backend.events[idx] + + def assert_no_events_emitted(self): + """Ensure no events were emitted at this point in the test.""" + self.assertEquals(len(self.backend.events), 0) + + def assert_events_emitted(self): + """Ensure at least one event has been emitted at this point in the test.""" + self.assertGreaterEqual(len(self.backend.events), 1) diff --git a/common/djangoapps/track/tests/test_shim.py b/common/djangoapps/track/tests/test_shim.py index 12d8f21ae8..b30731fc72 100644 --- a/common/djangoapps/track/tests/test_shim.py +++ b/common/djangoapps/track/tests/test_shim.py @@ -1,42 +1,25 @@ """Ensure emitted events contain the fields legacy processors expect to find.""" -from datetime import datetime - -from freezegun import freeze_time from mock import sentinel -from django.test import TestCase from django.test.utils import override_settings -from pytz import UTC -from eventtracking.django import DjangoTracker +from track.tests import EventTrackingTestCase, FROZEN_TIME -IN_MEMORY_BACKEND = { - 'mem': { - 'ENGINE': 'track.tests.test_shim.InMemoryBackend' - } -} - LEGACY_SHIM_PROCESSOR = [ { 'ENGINE': 'track.shim.LegacyFieldMappingProcessor' } ] -FROZEN_TIME = datetime(2013, 10, 3, 8, 24, 55, tzinfo=UTC) - -@freeze_time(FROZEN_TIME) -class LegacyFieldMappingProcessorTestCase(TestCase): +class LegacyFieldMappingProcessorTestCase(EventTrackingTestCase): """Ensure emitted events contain the fields legacy processors expect to find.""" @override_settings( - EVENT_TRACKING_BACKENDS=IN_MEMORY_BACKEND, EVENT_TRACKING_PROCESSORS=LEGACY_SHIM_PROCESSOR, ) def test_event_field_mapping(self): - django_tracker = DjangoTracker() - data = {sentinel.key: sentinel.value} context = { @@ -49,16 +32,15 @@ class LegacyFieldMappingProcessorTestCase(TestCase): 'user_id': sentinel.user_id, 'course_id': sentinel.course_id, 'org_id': sentinel.org_id, - 'event_type': sentinel.event_type, 'client_id': sentinel.client_id, } - with django_tracker.context('test', context): - django_tracker.emit(sentinel.name, data) + with self.tracker.context('test', context): + self.tracker.emit(sentinel.name, data) - emitted_event = django_tracker.backends['mem'].get_event() + emitted_event = self.get_event() expected_event = { - 'event_type': sentinel.event_type, + 'event_type': sentinel.name, 'name': sentinel.name, 'context': { 'user_id': sentinel.user_id, @@ -79,15 +61,12 @@ class LegacyFieldMappingProcessorTestCase(TestCase): self.assertEqual(expected_event, emitted_event) @override_settings( - EVENT_TRACKING_BACKENDS=IN_MEMORY_BACKEND, EVENT_TRACKING_PROCESSORS=LEGACY_SHIM_PROCESSOR, ) def test_missing_fields(self): - django_tracker = DjangoTracker() + self.tracker.emit(sentinel.name) - django_tracker.emit(sentinel.name) - - emitted_event = django_tracker.backends['mem'].get_event() + emitted_event = self.get_event() expected_event = { 'event_type': sentinel.name, @@ -104,19 +83,3 @@ class LegacyFieldMappingProcessorTestCase(TestCase): 'session': '', } self.assertEqual(expected_event, emitted_event) - - -class InMemoryBackend(object): - """A backend that simply stores all events in memory""" - - def __init__(self): - super(InMemoryBackend, self).__init__() - self.events = [] - - def send(self, event): - """Store the event in a list""" - self.events.append(event) - - def get_event(self): - """Return the first event that was emitted.""" - return self.events[0] diff --git a/common/djangoapps/track/views/segmentio.py b/common/djangoapps/track/views/segmentio.py index d89d32ff88..b613a75289 100644 --- a/common/djangoapps/track/views/segmentio.py +++ b/common/djangoapps/track/views/segmentio.py @@ -6,27 +6,25 @@ import logging from django.conf import settings from django.contrib.auth.models import User +from django.http import HttpResponse from django.views.decorators.http import require_POST from django_future.csrf import csrf_exempt -from eventtracking import tracker as eventtracker +from eventtracking import tracker from opaque_keys.edx.keys import CourseKey from opaque_keys import InvalidKeyError from util.json_request import expect_json, JsonResponse -from track import tracker -from track import shim - log = logging.getLogger(__name__) ERROR_UNAUTHORIZED = 'Unauthorized' -WARNING_IGNORED_CHANNEL = 'Channel ignored' -WARNING_IGNORED_ACTION = 'Action ignored' +WARNING_IGNORED_SOURCE = 'Source ignored' +WARNING_IGNORED_TYPE = 'Type ignored' ERROR_MISSING_USER_ID = 'Required user_id missing from context' ERROR_USER_NOT_EXIST = 'Specified user does not exist' ERROR_INVALID_USER_ID = 'Unable to parse userId as an integer' -ERROR_MISSING_EVENT_TYPE = 'The event_type field must be specified in the properties dictionary' +ERROR_MISSING_NAME = 'The name field must be specified in the properties dictionary' ERROR_MISSING_TIMESTAMP = 'Required timestamp field not found' ERROR_MISSING_RECEIVED_AT = 'Required receivedAt field not found' @@ -34,7 +32,7 @@ ERROR_MISSING_RECEIVED_AT = 'Required receivedAt field not found' @require_POST @expect_json @csrf_exempt -def track_segmentio_event(request): +def segmentio_event(request): """ An endpoint for logging events using segment.io's webhook integration. @@ -50,34 +48,9 @@ def track_segmentio_event(request): Many of the root fields of a standard edX tracking event are read out of the "properties" dictionary provided by the segment.io event, which is, in turn, provided by the client that emitted the event. - In order for an event to be logged the following preconditions must be met: - - * The "key" query string parameter must exactly match the django setting TRACKING_SEGMENTIO_WEBHOOK_SECRET. While - the endpoint is public, we want to limit access to it to the segment.io servers only. - * The value of the "channel" field of the event must be included in the list specified by the django setting - TRACKING_SEGMENTIO_ALLOWED_CHANNELS. This is intended to restrict the set of events to specific channels. For - example: just mobile devices. - * The value of the "action" field of the event must be included in the list specified by the django setting - TRACKING_SEGMENTIO_ALLOWED_ACTIONS. In order to make use of *all* of the features segment.io offers we would have - to implement some sort of persistent storage of information contained in some actions (like identify). For now, - we defer support of those actions and just support a limited set that can be handled without storing information - in external state. - * The value of the standard "userId" field of the event must be an integer that can be used to look up the user - using the primary key of the User model. - * Include an "event_type" field in the properties dictionary that indicates the edX event type. Note this can differ - from the "event" field found in the root of a segment.io event. The "event" field at the root of the structure is - intended to be human readable, the "event_type" field is expected to conform to the standard for naming events - found in the edX data documentation. - - Additionally the event can optionally: - - * Provide a "context" dictionary in the properties dictionary. This dictionary will be applied to the - existing context on the server overriding any existing keys. This context dictionary should include a "course_id" - field when the event is scoped to a particular course. The value of this field should be a valid course key. The - context may contain other arbitrary data that will be logged with the event, for example: identification - information for the device that emitted the event. - * Provide a "page" parameter in the properties dictionary which indicates the page that was being displayed to the - user or the mobile application screen that was visible to the user at the time the event was emitted. + In order for an event to be accepted and logged the "key" query string parameter must exactly match the django + setting TRACKING_SEGMENTIO_WEBHOOK_SECRET. While the endpoint is public, we want to limit access to it to the + segment.io servers only. """ @@ -88,7 +61,59 @@ def track_segmentio_event(request): expected_secret = getattr(settings, 'TRACKING_SEGMENTIO_WEBHOOK_SECRET', None) provided_secret = request.GET.get('key') if not expected_secret or provided_secret != expected_secret: - return failure_response(ERROR_UNAUTHORIZED, status=401) + return HttpResponse(status=401) + + try: + track_segmentio_event(request) + except EventValidationError as err: + log.warning( + 'Unable to process event received from segment.io: message="%s" event="%s"', + str(err), + request.body + ) + # Do not let the requestor know why the event wasn't saved. If the secret key is compromised this diagnostic + # information could be used to scrape useful information from the system. + + return HttpResponse(status=200) + + +class EventValidationError(Exception): + """Raised when an invalid event is received.""" + pass + + +def track_segmentio_event(request): # pylint: disable=too-many-statements + """ + Record an event received from segment.io to the tracking logs. + + This method assumes that the event has come from a trusted source. + + The received event must meet the following conditions in order to be logged: + + * The value of the "type" field of the event must be included in the list specified by the django setting + TRACKING_SEGMENTIO_ALLOWED_TYPES. In order to make use of *all* of the features segment.io offers we would have + to implement some sort of persistent storage of information contained in some actions (like identify). For now, + we defer support of those actions and just support a limited set that can be handled without storing information + in external state. + * The value of the standard "userId" field of the event must be an integer that can be used to look up the user + using the primary key of the User model. + * Include a "name" field in the properties dictionary that indicates the edX event name. Note this can differ + from the "event" field found in the root of a segment.io event. The "event" field at the root of the structure is + intended to be human readable, the "name" field is expected to conform to the standard for naming events + found in the edX data documentation. + * Have originated from a known and trusted segment.io client library. The django setting + TRACKING_SEGMENTIO_SOURCE_MAP maps the known library names to internal "event_source" strings. In order to be + logged the event must have a library name that is a valid key in that map. + + Additionally the event can optionally: + + * Provide a "context" dictionary in the properties dictionary. This dictionary will be applied to the + existing context on the server overriding any existing keys. This context dictionary should include a "course_id" + field when the event is scoped to a particular course. The value of this field should be a valid course key. The + context may contain other arbitrary data that will be logged with the event, for example: identification + information for the device that emitted the event. + + """ # The POST body will contain the JSON encoded event full_segment_event = request.json @@ -96,52 +121,51 @@ def track_segmentio_event(request): # We mostly care about the properties segment_event = full_segment_event.get('properties', {}) - def logged_failure_response(*args, **kwargs): - """Indicate a failure and log information about the event that will aide debugging efforts""" - failed_response = failure_response(*args, **kwargs) - log.warning('Unable to process event received from segment.io: %s', json.dumps(full_segment_event)) - return failed_response - - # Selectively listen to particular channels, note that the client can set the "event_source" field in the - # "properties" dict to override the channel provided by segment.io. This is necessary because there is a bug in some - # segment.io client libraries that prevented them from sending correct channel fields. - channel = segment_event.get('event_source') - allowed_channels = [c.lower() for c in getattr(settings, 'TRACKING_SEGMENTIO_ALLOWED_CHANNELS', [])] - if not channel or channel.lower() not in allowed_channels: - return response(WARNING_IGNORED_CHANNEL, committed=False) - - # Ignore actions that are unsupported - action = full_segment_event.get('action') - allowed_actions = [a.lower() for a in getattr(settings, 'TRACKING_SEGMENTIO_ALLOWED_ACTIONS', [])] - if not action or action.lower() not in allowed_actions: - return response(WARNING_IGNORED_ACTION, committed=False) + # Start with the context provided by segment.io in the "client" field if it exists + # We should tightly control which fields actually get included in the event emitted. + segment_context = full_segment_event.get('context') + # Build up the event context by parsing fields out of the event received from segment.io context = {} - # Start with the context provided by segment.io in the "client" field if it exists - segment_context = full_segment_event.get('context') - if segment_context: - context['client'] = segment_context - user_agent = segment_context.get('userAgent', '') + library_name = segment_context.get('library', {}).get('name') + source_map = getattr(settings, 'TRACKING_SEGMENTIO_SOURCE_MAP', {}) + event_source = source_map.get(library_name) + if not event_source: + raise EventValidationError(WARNING_IGNORED_SOURCE) else: - user_agent = '' + context['event_source'] = event_source + + # Ignore types that are unsupported + segment_event_type = full_segment_event.get('type') + allowed_types = [a.lower() for a in getattr(settings, 'TRACKING_SEGMENTIO_ALLOWED_TYPES', [])] + if not segment_event_type or segment_event_type.lower() not in allowed_types: + raise EventValidationError(WARNING_IGNORED_TYPE) + + if segment_context: + context['client'] = dict(segment_context) + context['agent'] = segment_context.get('userAgent', '') + for field in ('traits', 'integrations', 'userAgent'): + if field in context['client']: + del context['client'][field] # Overlay any context provided in the properties context.update(segment_event.get('context', {})) user_id = full_segment_event.get('userId') if not user_id: - return logged_failure_response(ERROR_MISSING_USER_ID) + raise EventValidationError(ERROR_MISSING_USER_ID) # userId is assumed to be the primary key of the django User model try: user = User.objects.get(pk=user_id) except User.DoesNotExist: - return logged_failure_response(ERROR_USER_NOT_EXIST) + raise EventValidationError(ERROR_USER_NOT_EXIST) except ValueError: - return logged_failure_response(ERROR_INVALID_USER_ID) + raise EventValidationError(ERROR_INVALID_USER_ID) else: context['user_id'] = user.id + context['username'] = user.username # course_id is expected to be provided in the context when applicable course_id = context.get('course_id') @@ -159,71 +183,22 @@ def track_segmentio_event(request): ) if 'timestamp' in full_segment_event: - time = parse_iso8601_timestamp(full_segment_event['timestamp']) + context['timestamp'] = parse_iso8601_timestamp(full_segment_event['timestamp']) else: - return logged_failure_response(ERROR_MISSING_TIMESTAMP) + raise EventValidationError(ERROR_MISSING_TIMESTAMP) if 'receivedAt' in full_segment_event: context['received_at'] = parse_iso8601_timestamp(full_segment_event['receivedAt']) else: - return logged_failure_response(ERROR_MISSING_RECEIVED_AT) + raise EventValidationError(ERROR_MISSING_RECEIVED_AT) - if 'event_type' in segment_event: - event_type = segment_event['event_type'] - else: - return logged_failure_response(ERROR_MISSING_EVENT_TYPE) + if 'name' not in segment_event: + raise EventValidationError(ERROR_MISSING_NAME) - with eventtracker.get_tracker().context('edx.segmentio', context): - complete_context = eventtracker.get_tracker().resolve_context() - event = { - "username": user.username, - "event_type": event_type, - "name": segment_event.get('name', ''), - # Will be either "mobile", "browser" or "server". These names happen to be identical to the names we already - # use so no mapping is necessary. - "event_source": channel, - # This timestamp is reported by the local clock on the device so it may be wildly incorrect. - "time": time, - "context": complete_context, - "page": segment_event.get('page'), - "host": complete_context.get('host', ''), - "agent": user_agent, - "ip": segment_event.get('ip', ''), - "event": segment_event.get('event', {}), - } + context['ip'] = segment_event.get('context', {}).get('ip', '') - # Some duplicated fields are passed into event-tracking via the context by track.middleware. - # Remove them from the event here since they are captured elsewhere. - shim.remove_shim_context(event) - - tracker.send(event) - - return response() - - -def response(message=None, status=200, committed=True): - """ - Produce a response from the segment.io event handler. - - Returns: A JSON encoded string giving more information about what action was taken while processing the request. - """ - result = { - 'committed': committed - } - - if message: - result['message'] = message - - return JsonResponse(result, status=status) - - -def failure_response(message, status=400): - """ - Return a failure response when something goes wrong handling segment.io events. - - Returns: A JSON encoded string giving more information about what went wrong when processing the request. - """ - return response(message=message, status=status, committed=False) + with tracker.get_tracker().context('edx.segmentio', context): + tracker.emit(segment_event['name'], segment_event.get('data', {})) def parse_iso8601_timestamp(timestamp): diff --git a/common/djangoapps/track/views/tests/test_segmentio.py b/common/djangoapps/track/views/tests/test_segmentio.py index d3746067a8..5c61229faa 100644 --- a/common/djangoapps/track/views/tests/test_segmentio.py +++ b/common/djangoapps/track/views/tests/test_segmentio.py @@ -3,85 +3,99 @@ from datetime import datetime import json -from ddt import ddt, data -from freezegun import freeze_time -from mock import patch, sentinel +from ddt import ddt, data, unpack +from mock import sentinel from django.contrib.auth.models import User -from django.test import TestCase from django.test.client import RequestFactory from django.test.utils import override_settings from track.middleware import TrackMiddleware +from track.tests import EventTrackingTestCase from track.views import segmentio -EXPECTED_TIME = datetime(2013, 10, 3, 8, 24, 55) SECRET = 'anything' ENDPOINT = '/segmentio/test/event' USER_ID = 10 +MOBILE_SHIM_PROCESSOR = [ + { + 'ENGINE': 'track.shim.LegacyFieldMappingProcessor' + }, + { + 'ENGINE': 'track.shim.VideoEventProcessor' + } +] + + +def expect_failure_with_message(message): + """Ensure the test raises an exception and does not emit an event""" + def test_decorator(func): + def test_decorated(self, *args, **kwargs): + self.assertRaisesRegexp(segmentio.EventValidationError, message, func, self, *args, **kwargs) + self.assert_no_events_emitted() + return test_decorated + return test_decorator + @ddt @override_settings( TRACKING_SEGMENTIO_WEBHOOK_SECRET=SECRET, TRACKING_IGNORE_URL_PATTERNS=[ENDPOINT], - TRACKING_SEGMENTIO_ALLOWED_ACTIONS=['Track', 'Screen'], - TRACKING_SEGMENTIO_ALLOWED_CHANNELS=['mobile'] + TRACKING_SEGMENTIO_ALLOWED_TYPES=['track'], + TRACKING_SEGMENTIO_SOURCE_MAP={'test-app': 'mobile'}, + EVENT_TRACKING_PROCESSORS=MOBILE_SHIM_PROCESSOR, ) -@freeze_time(EXPECTED_TIME) -class SegmentIOTrackingTestCase(TestCase): +class SegmentIOTrackingTestCase(EventTrackingTestCase): """Test processing of segment.io events""" def setUp(self): + super(SegmentIOTrackingTestCase, self).setUp() self.request_factory = RequestFactory() - patcher = patch('track.views.segmentio.tracker') - self.mock_tracker = patcher.start() - self.addCleanup(patcher.stop) - def test_get_request(self): request = self.request_factory.get(ENDPOINT) - response = segmentio.track_segmentio_event(request) + response = segmentio.segmentio_event(request) self.assertEquals(response.status_code, 405) - self.assertFalse(self.mock_tracker.send.called) # pylint: disable=maybe-no-member + self.assert_no_events_emitted() @override_settings( TRACKING_SEGMENTIO_WEBHOOK_SECRET=None ) def test_no_secret_config(self): request = self.request_factory.post(ENDPOINT) - response = segmentio.track_segmentio_event(request) - self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_UNAUTHORIZED, 401) - - def assert_segmentio_uncommitted_response(self, response, expected_message, expected_status=400): - """Assert that no event was emitted and an appropriate commit==false message was returned""" - self.assertEquals(response.status_code, expected_status) - parsed_content = json.loads(response.content) - self.assertEquals(parsed_content, {'committed': False, 'message': expected_message}) - self.assertFalse(self.mock_tracker.send.called) # pylint: disable=maybe-no-member + response = segmentio.segmentio_event(request) + self.assertEquals(response.status_code, 401) + self.assert_no_events_emitted() def test_no_secret_provided(self): request = self.request_factory.post(ENDPOINT) - response = segmentio.track_segmentio_event(request) - self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_UNAUTHORIZED, 401) + response = segmentio.segmentio_event(request) + self.assertEquals(response.status_code, 401) + self.assert_no_events_emitted() def test_secret_mismatch(self): request = self.create_request(key='y') - response = segmentio.track_segmentio_event(request) - self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_UNAUTHORIZED, 401) + response = segmentio.segmentio_event(request) + self.assertEquals(response.status_code, 401) + self.assert_no_events_emitted() def create_request(self, key=None, **kwargs): """Create a fake request that emulates a request from the segment.io servers to ours""" if key is None: key = SECRET - return self.request_factory.post(ENDPOINT + "?key=" + key, **kwargs) + request = self.request_factory.post(ENDPOINT + "?key=" + key, **kwargs) + if 'data' in kwargs: + request.json = json.loads(kwargs['data']) - @data('Identify', 'Group', 'Alias', 'Page', 'identify') + return request + + @data('identify', 'Group', 'Alias', 'Page', 'identify', 'screen') + @expect_failure_with_message(segmentio.WARNING_IGNORED_TYPE) def test_segmentio_ignore_actions(self, action): - response = self.post_segmentio_event(action=action) - self.assert_segmentio_uncommitted_response(response, segmentio.WARNING_IGNORED_ACTION, 200) + self.post_segmentio_event(action=action) def post_segmentio_event(self, **kwargs): """Post a fake segment.io event to the view that processes it""" @@ -89,12 +103,7 @@ class SegmentIOTrackingTestCase(TestCase): data=self.create_segmentio_event_json(**kwargs), content_type='application/json' ) - return segmentio.track_segmentio_event(request) - - @data('server', 'browser', 'Browser') - def test_segmentio_ignore_channels(self, channel): - response = self.post_segmentio_event(event_source=channel) - self.assert_segmentio_uncommitted_response(response, segmentio.WARNING_IGNORED_CHANNEL, 200) + segmentio.track_segmentio_event(request) def create_segmentio_event(self, **kwargs): """Populate a fake segment.io event with data of interest""" @@ -103,18 +112,16 @@ class SegmentIOTrackingTestCase(TestCase): "userId": kwargs.get('user_id', USER_ID), "event": "Did something", "properties": { - 'event_type': kwargs.get('event_type', ''), - 'event_source': kwargs.get('event_source', 'mobile'), - 'event': kwargs.get('event', {}), + 'name': kwargs.get('name', str(sentinel.name)), + 'data': kwargs.get('data', {}), 'context': { 'course_id': kwargs.get('course_id') or '', - }, - 'name': str(sentinel.name), + } }, - "channel": kwargs.get('channel', 'mobile'), + "channel": 'server', "context": { "library": { - "name": "unknown", + "name": kwargs.get('library_name', 'test-app'), "version": "unknown" }, 'userAgent': str(sentinel.user_agent), @@ -133,42 +140,50 @@ class SegmentIOTrackingTestCase(TestCase): "action": action } + if 'context' in kwargs: + sample_event['properties']['context'].update(kwargs['context']) + return sample_event def create_segmentio_event_json(self, **kwargs): """Return a json string containing a fake segment.io event""" return json.dumps(self.create_segmentio_event(**kwargs)) - def test_no_user_for_user_id(self): - response = self.post_segmentio_event(user_id=40) - self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_USER_NOT_EXIST, 400) + @expect_failure_with_message(segmentio.WARNING_IGNORED_SOURCE) + def test_segmentio_ignore_unknown_libraries(self): + self.post_segmentio_event(library_name='foo') + @expect_failure_with_message(segmentio.ERROR_USER_NOT_EXIST) + def test_no_user_for_user_id(self): + self.post_segmentio_event(user_id=40) + + @expect_failure_with_message(segmentio.ERROR_INVALID_USER_ID) def test_invalid_user_id(self): - response = self.post_segmentio_event(user_id='foobar') - self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_INVALID_USER_ID, 400) + self.post_segmentio_event(user_id='foobar') @data('foo/bar/baz', 'course-v1:foo+bar+baz') def test_success(self, course_id): middleware = TrackMiddleware() request = self.create_request( - data=self.create_segmentio_event_json(event_type=str(sentinel.event_type), event={'foo': 'bar'}, course_id=course_id), + data=self.create_segmentio_event_json(data={'foo': 'bar'}, course_id=course_id), content_type='application/json' ) User.objects.create(pk=USER_ID, username=str(sentinel.username)) middleware.process_request(request) # The middleware normally emits an event, make sure it doesn't in this case. - self.assertFalse(self.mock_tracker.send.called) # pylint: disable=maybe-no-member + self.assert_no_events_emitted() try: - response = segmentio.track_segmentio_event(request) + response = segmentio.segmentio_event(request) self.assertEquals(response.status_code, 200) expected_event = { 'username': str(sentinel.username), 'ip': '', + 'session': '', 'event_source': 'mobile', - 'event_type': str(sentinel.event_type), + 'event_type': str(sentinel.name), 'name': str(sentinel.name), 'event': {'foo': 'bar'}, 'agent': str(sentinel.user_agent), @@ -182,10 +197,9 @@ class SegmentIOTrackingTestCase(TestCase): 'path': ENDPOINT, 'client': { 'library': { - 'name': 'unknown', + 'name': 'test-app', 'version': 'unknown' - }, - 'userAgent': str(sentinel.user_agent) + } }, 'received_at': datetime.strptime("2014-08-27T16:33:39.100Z", "%Y-%m-%dT%H:%M:%S.%fZ"), }, @@ -193,7 +207,7 @@ class SegmentIOTrackingTestCase(TestCase): finally: middleware.process_response(request, None) - self.mock_tracker.send.assert_called_once_with(expected_event) # pylint: disable=maybe-no-member + self.assertEquals(self.get_event(), expected_event) def test_invalid_course_id(self): request = self.create_request( @@ -201,22 +215,22 @@ class SegmentIOTrackingTestCase(TestCase): content_type='application/json' ) User.objects.create(pk=USER_ID, username=str(sentinel.username)) - response = segmentio.track_segmentio_event(request) - self.assertEquals(response.status_code, 200) - self.assertTrue(self.mock_tracker.send.called) # pylint: disable=maybe-no-member + segmentio.track_segmentio_event(request) + self.assert_events_emitted() - def test_missing_event_type(self): + @expect_failure_with_message(segmentio.ERROR_MISSING_NAME) + def test_missing_name(self): sample_event_raw = self.create_segmentio_event() - del sample_event_raw['properties']['event_type'] + del sample_event_raw['properties']['name'] request = self.create_request( data=json.dumps(sample_event_raw), content_type='application/json' ) User.objects.create(pk=USER_ID, username=str(sentinel.username)) - response = segmentio.track_segmentio_event(request) - self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_MISSING_EVENT_TYPE, 400) + segmentio.track_segmentio_event(request) + @expect_failure_with_message(segmentio.ERROR_MISSING_TIMESTAMP) def test_missing_timestamp(self): sample_event_raw = self.create_event_without_fields('timestamp') request = self.create_request( @@ -225,8 +239,18 @@ class SegmentIOTrackingTestCase(TestCase): ) User.objects.create(pk=USER_ID, username=str(sentinel.username)) - response = segmentio.track_segmentio_event(request) - self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_MISSING_TIMESTAMP, 400) + segmentio.track_segmentio_event(request) + + @expect_failure_with_message(segmentio.ERROR_MISSING_RECEIVED_AT) + def test_missing_received_at(self): + sample_event_raw = self.create_event_without_fields('receivedAt') + request = self.create_request( + data=json.dumps(sample_event_raw), + content_type='application/json' + ) + User.objects.create(pk=USER_ID, username=str(sentinel.username)) + + segmentio.track_segmentio_event(request) def create_event_without_fields(self, *fields): """Create a fake event and remove some fields from it""" @@ -238,28 +262,108 @@ class SegmentIOTrackingTestCase(TestCase): return event - def test_missing_received_at(self): - sample_event_raw = self.create_event_without_fields('receivedAt') + def test_string_user_id(self): + User.objects.create(pk=USER_ID, username=str(sentinel.username)) + self.post_segmentio_event(user_id=str(USER_ID)) + self.assert_events_emitted() + + def test_hiding_failure(self): + sample_event_raw = self.create_event_without_fields('timestamp') request = self.create_request( data=json.dumps(sample_event_raw), content_type='application/json' ) User.objects.create(pk=USER_ID, username=str(sentinel.username)) - response = segmentio.track_segmentio_event(request) - self.assert_segmentio_uncommitted_response(response, segmentio.ERROR_MISSING_RECEIVED_AT, 400) - - def test_string_user_id(self): - User.objects.create(pk=USER_ID, username=str(sentinel.username)) - response = self.post_segmentio_event(user_id=str(USER_ID)) - result = self.assert_segmentio_committed_response(response) - self.assertEquals(result['context']['user_id'], USER_ID) - - def assert_segmentio_committed_response(self, response): - """Assert that an event was emitted""" + response = segmentio.segmentio_event(request) self.assertEquals(response.status_code, 200) - parsed_content = json.loads(response.content) - self.assertEquals(parsed_content, {'committed': True}) - self.assertTrue(self.mock_tracker.send.called) # pylint: disable=maybe-no-member - return self.mock_tracker.send.mock_calls[0][1][0] + self.assert_no_events_emitted() + @data( + ('edx.video.played', 'play_video'), + ('edx.video.paused', 'pause_video'), + ('edx.video.stopped', 'stop_video'), + ('edx.video.loaded', 'load_video'), + ('edx.video.transcript.shown', 'show_transcript'), + ('edx.video.transcript.hidden', 'hide_transcript'), + ) + @unpack + def test_video_event(self, name, event_type): + course_id = 'foo/bar/baz' + middleware = TrackMiddleware() + + input_payload = { + 'current_time': 132.134456, + 'module_id': 'i4x://foo/bar/baz/some_module', + 'code': 'mobile' + } + if name == 'edx.video.loaded': + del input_payload['current_time'] + + request = self.create_request( + data=self.create_segmentio_event_json( + name=name, + data=input_payload, + context={ + 'course_id': course_id, + 'browser_page': 'https://testserver/courses/foo/bar/baz/courseware/Week_1/Activity/2', + 'application': { + 'name': 'edx.mobileapp.android', + 'version': '29', + 'component': 'videoplayer' + } + }), + content_type='application/json' + ) + User.objects.create(pk=USER_ID, username=str(sentinel.username)) + + middleware.process_request(request) + try: + response = segmentio.segmentio_event(request) + self.assertEquals(response.status_code, 200) + + expected_event_without_payload = { + 'username': str(sentinel.username), + 'ip': '', + 'session': '', + 'event_source': 'mobile', + 'event_type': event_type, + 'name': name, + 'agent': str(sentinel.user_agent), + 'page': 'https://testserver/courses/foo/bar/baz/courseware/Week_1/Activity', + 'time': datetime.strptime("2014-08-27T16:33:39.215Z", "%Y-%m-%dT%H:%M:%S.%fZ"), + 'host': 'testserver', + 'context': { + 'user_id': USER_ID, + 'course_id': course_id, + 'org_id': 'foo', + 'path': ENDPOINT, + 'client': { + 'library': { + 'name': 'test-app', + 'version': 'unknown' + } + }, + 'received_at': datetime.strptime("2014-08-27T16:33:39.100Z", "%Y-%m-%dT%H:%M:%S.%fZ"), + 'application': { + 'name': 'edx.mobileapp.android', + 'version': '29', + 'component': 'videoplayer' + } + }, + } + expected_payload = { + 'currentTime': 132.134456, + 'id': 'i4x-foo-bar-baz-some_module', + 'code': 'mobile' + } + if name == 'edx.video.loaded': + del expected_payload['currentTime'] + finally: + middleware.process_response(request, None) + + actual_event = dict(self.get_event()) + payload = json.loads(actual_event.pop('event')) + + self.assertEquals(actual_event, expected_event_without_payload) + self.assertEquals(payload, expected_payload) diff --git a/lms/envs/aws.py b/lms/envs/aws.py index fc2a70db9f..c6eb470058 100644 --- a/lms/envs/aws.py +++ b/lms/envs/aws.py @@ -396,9 +396,8 @@ STUDENT_FILEUPLOAD_MAX_SIZE = ENV_TOKENS.get("STUDENT_FILEUPLOAD_MAX_SIZE", STUD TRACKING_BACKENDS.update(AUTH_TOKENS.get("TRACKING_BACKENDS", {})) EVENT_TRACKING_BACKENDS.update(AUTH_TOKENS.get("EVENT_TRACKING_BACKENDS", {})) TRACKING_SEGMENTIO_WEBHOOK_SECRET = AUTH_TOKENS.get("TRACKING_SEGMENTIO_WEBHOOK_SECRET", TRACKING_SEGMENTIO_WEBHOOK_SECRET) -TRACKING_SEGMENTIO_ALLOWED_ACTIONS = ENV_TOKENS.get("TRACKING_SEGMENTIO_ALLOWED_ACTIONS", TRACKING_SEGMENTIO_ALLOWED_ACTIONS) -TRACKING_SEGMENTIO_ALLOWED_CHANNELS = ENV_TOKENS.get("TRACKING_SEGMENTIO_ALLOWED_CHANNELS", TRACKING_SEGMENTIO_ALLOWED_CHANNELS) - +TRACKING_SEGMENTIO_ALLOWED_TYPES = ENV_TOKENS.get("TRACKING_SEGMENTIO_ALLOWED_TYPES", TRACKING_SEGMENTIO_ALLOWED_TYPES) +TRACKING_SEGMENTIO_SOURCE_MAP = ENV_TOKENS.get("TRACKING_SEGMENTIO_SOURCE_MAP", TRACKING_SEGMENTIO_SOURCE_MAP) # Student identity verification settings VERIFY_STUDENT = AUTH_TOKENS.get("VERIFY_STUDENT", VERIFY_STUDENT) diff --git a/lms/envs/common.py b/lms/envs/common.py index f3c427a937..d31ac8efe8 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -506,6 +506,9 @@ EVENT_TRACKING_BACKENDS = { EVENT_TRACKING_PROCESSORS = [ { 'ENGINE': 'track.shim.LegacyFieldMappingProcessor' + }, + { + 'ENGINE': 'track.shim.VideoEventProcessor' } ] @@ -524,8 +527,11 @@ if FEATURES.get('ENABLE_SQL_TRACKING_LOGS'): }) TRACKING_SEGMENTIO_WEBHOOK_SECRET = None -TRACKING_SEGMENTIO_ALLOWED_ACTIONS = ['Track', 'Screen'] -TRACKING_SEGMENTIO_ALLOWED_CHANNELS = ['mobile'] +TRACKING_SEGMENTIO_ALLOWED_TYPES = ['track'] +TRACKING_SEGMENTIO_SOURCE_MAP = { + 'analytics-android': 'mobile', + 'analytics-ios': 'mobile', +} ######################## GOOGLE ANALYTICS ########################### GOOGLE_ANALYTICS_ACCOUNT = None diff --git a/lms/urls.py b/lms/urls.py index 719ac4c07d..a306ae4771 100644 --- a/lms/urls.py +++ b/lms/urls.py @@ -28,7 +28,7 @@ urlpatterns = ('', # nopep8 url(r'^reject_name_change$', 'student.views.reject_name_change'), url(r'^pending_name_changes$', 'student.views.pending_name_changes'), url(r'^event$', 'track.views.user_track'), - url(r'^segmentio/event$', 'track.views.segmentio.track_segmentio_event'), + url(r'^segmentio/event$', 'track.views.segmentio.segmentio_event'), url(r'^t/(?P