""" Segment API call wrappers """ import logging import sys import traceback import backoff import requests from simplejson.errors import JSONDecodeError from six import text_type # Maximum number of tries on Segment API calls MAX_TRIES = 4 # These are the required/optional keys in the learner dict that contain IDs we need to retire from Segment. REQUIRED_IDENTIFYING_KEYS = [('user', 'id'), 'original_username'] OPTIONAL_IDENTIFYING_KEYS = ['ecommerce_segment_id'] # The Segment Config API for bulk deleting users for a particular workspace BULK_REGULATE_URL = 'v1beta/workspaces/{}/regulations' # The Segment Config API for querying the status of a bulk user deletion request for a particular workspace BULK_REGULATE_STATUS_URL = 'v1beta/workspaces/{}/regulations/{}' # According to Segment this represents the maximum limits of the bulk regulation call. # https://reference.segmentapis.com/?version=latest#57a69434-76cc-43cc-a547-98c319182247 MAXIMUM_USERS_IN_REGULATION_REQUEST = 5000 LOG = logging.getLogger(__name__) def _backoff_handler(details): """ Simple logging handler for when timeout backoff occurs. """ LOG.error('Trying again in {wait:0.1f} seconds after {tries} tries calling {target}'.format(**details)) # Log the text response from any HTTPErrors, if possible try: LOG.error(traceback.format_exc()) exc = sys.exc_info()[1] LOG.error("HTTPError code {}: {}".format(exc.response.status_code, exc.response.text)) except Exception: # pylint: disable=broad-except pass def _wait_30_seconds(): """ Backoff generator that waits for 30 seconds. """ return backoff.constant(interval=30) def _http_status_giveup(exc): """ Giveup method that gives up backoff upon any non-5xx and 504 server errors. """ return not 429 == exc.response.status_code and not 500 <= exc.response.status_code < 600 def _retry_segment_api(): """ Decorator which enables retries with sane backoff defaults """ def inner(func): # pylint: disable=missing-docstring func_with_decode_backoff = backoff.on_exception( backoff.expo, JSONDecodeError, max_tries=MAX_TRIES, on_backoff=lambda details: _backoff_handler(details) # pylint: disable=unnecessary-lambda ) func_with_backoff = backoff.on_exception( backoff.expo, requests.exceptions.HTTPError, max_tries=MAX_TRIES, giveup=_http_status_giveup, on_backoff=lambda details: _backoff_handler(details) # pylint: disable=unnecessary-lambda ) func_with_timeout_backoff = backoff.on_exception( _wait_30_seconds, requests.exceptions.Timeout, max_tries=MAX_TRIES, on_backoff=lambda details: _backoff_handler(details) # pylint: disable=unnecessary-lambda ) return func_with_decode_backoff(func_with_backoff(func_with_timeout_backoff(func))) return inner class SegmentApi: """ Segment API client with convenience methods """ def __init__(self, base_url, auth_token, workspace_slug): self.base_url = base_url self.auth_token = auth_token self.workspace_slug = workspace_slug @_retry_segment_api() def _call_segment_post(self, url, params): """ Actually makes the Segment REST POST call. 5xx errors and timeouts will be retried via _retry_segment_api, all others will bubble up. """ headers = { "Authorization": "Bearer {}".format(self.auth_token), "Content-Type": "application/json" } resp = requests.post(self.base_url + url, json=params, headers=headers) resp.raise_for_status() return resp @_retry_segment_api() def _call_segment_get(self, url): """ Actually makes the Segment REST GET call. 5xx errors and timeouts will be retried via _retry_segment_api, all others will bubble up. """ headers = { "Authorization": "Bearer {}".format(self.auth_token) } resp = requests.get(self.base_url + url, headers=headers) resp.raise_for_status() return resp def _get_value_from_learner(self, learner, key): """ Return the value from a learner dict for the given key or 2-tuple of keys. Allows us to map things like learner['user']['id'] in a single entry in REQUIRED_IDENTIFYING_KEYS. """ if isinstance(key, tuple): val = learner[key[0]][key[1]] else: val = learner[key] return text_type(val) def _send_regulation_request(self, params): """ Make the call to the Segment Regulate API, cleanly report any errors """ resp_json = "" try: resp = self._call_segment_post(BULK_REGULATE_URL.format(self.workspace_slug), params) try: resp_json = resp.json() bulk_user_delete_id = resp_json['regulate_id'] LOG.info('Bulk user regulation queued. Id: {}'.format(bulk_user_delete_id)) except JSONDecodeError: resp_json = resp.text raise # If we get here we got some kind of JSON response from Segment, we'll try to get # the data we need. If it doesn't exist we'll bubble up the error from Segment and # eat the TypeError / KeyError since they won't be relevant. except (TypeError, KeyError, requests.exceptions.HTTPError, JSONDecodeError) as exc: LOG.exception(exc) err = u'Error was encountered for params: {} \n\n Response: {}'.format( params, text_type(resp_json) ).encode('utf-8') LOG.error(err) raise Exception(err) def delete_and_suppress_learner(self, learner): """ Delete AND suppress a single Segment user using the bulk user deletion REST API. :param learner: Single user retirement status row with its fields. """ # Send a list of one learner to be deleted by the multiple learner deletion call. return self.delete_and_suppress_learners([learner], 1) def unsuppress_learners_by_key(self, key, learners, chunk_size, beginning_idx=0): """ Sets up the Segment REST API calls to UNSUPPRESS users in chunks. :param key: Key in the learner dict to pull the ID we care about from. :param learners: List of learner dicts to be worked on. We only use the key passed in. :param chunk_size: How many learners should be retired in this batch. :param beginning_idx: Index into learners where this batch should start. """ curr_idx = beginning_idx while curr_idx < len(learners): start_idx = curr_idx end_idx = min(start_idx + chunk_size - 1, len(learners) - 1) LOG.info( "Attempting unsuppress for key '%s', start index %s, end index %s for learners '%s' through '%s'", key, start_idx, end_idx, learners[start_idx]['original_username'], learners[end_idx]['original_username'] ) learner_vals = [] for idx in range(start_idx, end_idx + 1): learner_vals.append(self._get_value_from_learner(learners[idx], key)) if len(learner_vals) >= MAXIMUM_USERS_IN_REGULATION_REQUEST: LOG.error( 'Attempting to UNSUPPRESS too many user values (%s) at once in bulk request - decrease chunk_size.', len(learner_vals) ) return params = { "regulation_type": "Unsuppress", "attributes": { "name": "userId", "values": learner_vals } } self._send_regulation_request(params) curr_idx += chunk_size def delete_and_suppress_learners(self, learners, chunk_size, beginning_idx=0): """ Sets up the Segment REST API calls to GDPR-delete users in chunks. :param learners: List of learner dicts returned from LMS, should contain all we need to retire this learner. :param chunk_size: How many learners should be retired in this batch. :param beginning_idx: Index into learners where this batch should start. """ curr_idx = beginning_idx while curr_idx < len(learners): start_idx = curr_idx end_idx = min(start_idx + chunk_size - 1, len(learners) - 1) LOG.info( "Attempting Segment deletion with start index %s, end index %s for learners (%s, %s) through (%s, %s)", start_idx, end_idx, learners[start_idx]['user']['id'], learners[start_idx]['original_username'], learners[end_idx]['user']['id'], learners[end_idx]['original_username'] ) learner_vals = [] for idx in range(start_idx, end_idx + 1): for id_key in REQUIRED_IDENTIFYING_KEYS: learner_vals.append(self._get_value_from_learner(learners[idx], id_key)) for id_key in OPTIONAL_IDENTIFYING_KEYS: if id_key in learners[idx]: learner_vals.append(self._get_value_from_learner(learners[idx], id_key)) if len(learner_vals) >= MAXIMUM_USERS_IN_REGULATION_REQUEST: LOG.error( 'Attempting to delete too many user values (%s) at once in bulk request - decrease chunk_size.', len(learner_vals) ) return params = { "regulation_type": "Suppress_With_Delete", "attributes": { "name": "userId", "values": learner_vals } } self._send_regulation_request(params) curr_idx += chunk_size def get_bulk_delete_status(self, bulk_delete_id): """ Queries the status of a previously submitted bulk delete request. :param bulk_delete_id: ID returned from a previously-submitted bulk delete request. """ resp = self._call_segment_get(BULK_REGULATE_STATUS_URL.format(self.workspace_slug, bulk_delete_id)) resp_json = resp.json() LOG.info(text_type(resp_json))