Files
edx-platform/scripts/user_retirement/utils/thirdparty_apis/segment_api.py
Muhammad Farhan Khan 65ea55c8aa Move user retirement scripts code from the tubular repo (#34063)
* refactor: Migragte user retirement scripts code from the tubular repo
2024-02-22 11:09:00 -05:00

284 lines
10 KiB
Python

"""
Segment API call wrappers
"""
import logging
import sys
import traceback
import backoff
import requests
from simplejson.errors import JSONDecodeError
from six import text_type
# Maximum number of tries on Segment API calls
MAX_TRIES = 4
# These are the required/optional keys in the learner dict that contain IDs we need to retire from Segment.
REQUIRED_IDENTIFYING_KEYS = [('user', 'id'), 'original_username']
OPTIONAL_IDENTIFYING_KEYS = ['ecommerce_segment_id']
# The Segment Config API for bulk deleting users for a particular workspace
BULK_REGULATE_URL = 'v1beta/workspaces/{}/regulations'
# The Segment Config API for querying the status of a bulk user deletion request for a particular workspace
BULK_REGULATE_STATUS_URL = 'v1beta/workspaces/{}/regulations/{}'
# According to Segment this represents the maximum limits of the bulk regulation call.
# https://reference.segmentapis.com/?version=latest#57a69434-76cc-43cc-a547-98c319182247
MAXIMUM_USERS_IN_REGULATION_REQUEST = 5000
LOG = logging.getLogger(__name__)
def _backoff_handler(details):
"""
Simple logging handler for when timeout backoff occurs.
"""
LOG.error('Trying again in {wait:0.1f} seconds after {tries} tries calling {target}'.format(**details))
# Log the text response from any HTTPErrors, if possible
try:
LOG.error(traceback.format_exc())
exc = sys.exc_info()[1]
LOG.error("HTTPError code {}: {}".format(exc.response.status_code, exc.response.text))
except Exception: # pylint: disable=broad-except
pass
def _wait_30_seconds():
"""
Backoff generator that waits for 30 seconds.
"""
return backoff.constant(interval=30)
def _http_status_giveup(exc):
"""
Giveup method that gives up backoff upon any non-5xx and 504 server errors.
"""
return not 429 == exc.response.status_code and not 500 <= exc.response.status_code < 600
def _retry_segment_api():
"""
Decorator which enables retries with sane backoff defaults
"""
def inner(func): # pylint: disable=missing-docstring
func_with_decode_backoff = backoff.on_exception(
backoff.expo,
JSONDecodeError,
max_tries=MAX_TRIES,
on_backoff=lambda details: _backoff_handler(details) # pylint: disable=unnecessary-lambda
)
func_with_backoff = backoff.on_exception(
backoff.expo,
requests.exceptions.HTTPError,
max_tries=MAX_TRIES,
giveup=_http_status_giveup,
on_backoff=lambda details: _backoff_handler(details) # pylint: disable=unnecessary-lambda
)
func_with_timeout_backoff = backoff.on_exception(
_wait_30_seconds,
requests.exceptions.Timeout,
max_tries=MAX_TRIES,
on_backoff=lambda details: _backoff_handler(details) # pylint: disable=unnecessary-lambda
)
return func_with_decode_backoff(func_with_backoff(func_with_timeout_backoff(func)))
return inner
class SegmentApi:
"""
Segment API client with convenience methods
"""
def __init__(self, base_url, auth_token, workspace_slug):
self.base_url = base_url
self.auth_token = auth_token
self.workspace_slug = workspace_slug
@_retry_segment_api()
def _call_segment_post(self, url, params):
"""
Actually makes the Segment REST POST call.
5xx errors and timeouts will be retried via _retry_segment_api,
all others will bubble up.
"""
headers = {
"Authorization": "Bearer {}".format(self.auth_token),
"Content-Type": "application/json"
}
resp = requests.post(self.base_url + url, json=params, headers=headers)
resp.raise_for_status()
return resp
@_retry_segment_api()
def _call_segment_get(self, url):
"""
Actually makes the Segment REST GET call.
5xx errors and timeouts will be retried via _retry_segment_api,
all others will bubble up.
"""
headers = {
"Authorization": "Bearer {}".format(self.auth_token)
}
resp = requests.get(self.base_url + url, headers=headers)
resp.raise_for_status()
return resp
def _get_value_from_learner(self, learner, key):
"""
Return the value from a learner dict for the given key or 2-tuple of keys.
Allows us to map things like learner['user']['id'] in a single entry in REQUIRED_IDENTIFYING_KEYS.
"""
if isinstance(key, tuple):
val = learner[key[0]][key[1]]
else:
val = learner[key]
return text_type(val)
def _send_regulation_request(self, params):
"""
Make the call to the Segment Regulate API, cleanly report any errors
"""
resp_json = ""
try:
resp = self._call_segment_post(BULK_REGULATE_URL.format(self.workspace_slug), params)
try:
resp_json = resp.json()
bulk_user_delete_id = resp_json['regulate_id']
LOG.info('Bulk user regulation queued. Id: {}'.format(bulk_user_delete_id))
except JSONDecodeError:
resp_json = resp.text
raise
# If we get here we got some kind of JSON response from Segment, we'll try to get
# the data we need. If it doesn't exist we'll bubble up the error from Segment and
# eat the TypeError / KeyError since they won't be relevant.
except (TypeError, KeyError, requests.exceptions.HTTPError, JSONDecodeError) as exc:
LOG.exception(exc)
err = u'Error was encountered for params: {} \n\n Response: {}'.format(
params,
text_type(resp_json)
).encode('utf-8')
LOG.error(err)
raise Exception(err)
def delete_and_suppress_learner(self, learner):
"""
Delete AND suppress a single Segment user using the bulk user deletion REST API.
:param learner: Single user retirement status row with its fields.
"""
# Send a list of one learner to be deleted by the multiple learner deletion call.
return self.delete_and_suppress_learners([learner], 1)
def unsuppress_learners_by_key(self, key, learners, chunk_size, beginning_idx=0):
"""
Sets up the Segment REST API calls to UNSUPPRESS users in chunks.
:param key: Key in the learner dict to pull the ID we care about from.
:param learners: List of learner dicts to be worked on. We only use the key passed in.
:param chunk_size: How many learners should be retired in this batch.
:param beginning_idx: Index into learners where this batch should start.
"""
curr_idx = beginning_idx
while curr_idx < len(learners):
start_idx = curr_idx
end_idx = min(start_idx + chunk_size - 1, len(learners) - 1)
LOG.info(
"Attempting unsuppress for key '%s', start index %s, end index %s for learners '%s' through '%s'",
key,
start_idx, end_idx,
learners[start_idx]['original_username'],
learners[end_idx]['original_username']
)
learner_vals = []
for idx in range(start_idx, end_idx + 1):
learner_vals.append(self._get_value_from_learner(learners[idx], key))
if len(learner_vals) >= MAXIMUM_USERS_IN_REGULATION_REQUEST:
LOG.error(
'Attempting to UNSUPPRESS too many user values (%s) at once in bulk request - decrease chunk_size.',
len(learner_vals)
)
return
params = {
"regulation_type": "Unsuppress",
"attributes": {
"name": "userId",
"values": learner_vals
}
}
self._send_regulation_request(params)
curr_idx += chunk_size
def delete_and_suppress_learners(self, learners, chunk_size, beginning_idx=0):
"""
Sets up the Segment REST API calls to GDPR-delete users in chunks.
:param learners: List of learner dicts returned from LMS, should contain all we need to retire this learner.
:param chunk_size: How many learners should be retired in this batch.
:param beginning_idx: Index into learners where this batch should start.
"""
curr_idx = beginning_idx
while curr_idx < len(learners):
start_idx = curr_idx
end_idx = min(start_idx + chunk_size - 1, len(learners) - 1)
LOG.info(
"Attempting Segment deletion with start index %s, end index %s for learners (%s, %s) through (%s, %s)",
start_idx, end_idx,
learners[start_idx]['user']['id'], learners[start_idx]['original_username'],
learners[end_idx]['user']['id'], learners[end_idx]['original_username']
)
learner_vals = []
for idx in range(start_idx, end_idx + 1):
for id_key in REQUIRED_IDENTIFYING_KEYS:
learner_vals.append(self._get_value_from_learner(learners[idx], id_key))
for id_key in OPTIONAL_IDENTIFYING_KEYS:
if id_key in learners[idx]:
learner_vals.append(self._get_value_from_learner(learners[idx], id_key))
if len(learner_vals) >= MAXIMUM_USERS_IN_REGULATION_REQUEST:
LOG.error(
'Attempting to delete too many user values (%s) at once in bulk request - decrease chunk_size.',
len(learner_vals)
)
return
params = {
"regulation_type": "Suppress_With_Delete",
"attributes": {
"name": "userId",
"values": learner_vals
}
}
self._send_regulation_request(params)
curr_idx += chunk_size
def get_bulk_delete_status(self, bulk_delete_id):
"""
Queries the status of a previously submitted bulk delete request.
:param bulk_delete_id: ID returned from a previously-submitted bulk delete request.
"""
resp = self._call_segment_get(BULK_REGULATE_STATUS_URL.format(self.workspace_slug, bulk_delete_id))
resp_json = resp.json()
LOG.info(text_type(resp_json))