Files
edx-platform/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py
usamasadiq dc36d0bfe8 Ran pyupgrade on lms/djangoapps
Ran pyupgrade on lms/djangoapps/mailing
Ran pyupgrade on lms/djangoapps/mobile_api
Ran pyupgrade on lms/djangoapps/monitoring
2021-02-19 17:47:37 +05:00

362 lines
12 KiB
Python

"""
Synchronizes a mailchimp list with the students of a course.
"""
import itertools
import logging
import math
import random
from collections import namedtuple
from itertools import chain
from django.core.management.base import BaseCommand
from mailsnake import MailSnake
from opaque_keys.edx.keys import CourseKey
from common.djangoapps.student.models import UserProfile, unique_id_for_user
BATCH_SIZE = 15000
# If you try to subscribe with too many users at once
# the transaction times out on the mailchimp side.
SUBSCRIBE_BATCH_SIZE = 1000
log = logging.getLogger('edx.mailchimp')
FIELD_TYPES = {'EDX_ID': 'text'}
class Command(BaseCommand):
"""
Synchronizes a mailchimp list with the students of a course.
"""
help = 'Synchronizes a mailchimp list with the students of a course.'
def add_arguments(self, parser):
parser.add_argument('--key',
required=True,
help='mailchimp api key')
parser.add_argument('--list',
dest='list_id',
required=True,
help='mailchimp list id')
parser.add_argument('--course',
dest='course_id',
required=True,
help='edx course_id')
parser.add_argument('--segments',
dest='num_segments',
type=int,
default=0,
help='number of static random segments to create')
def handle(self, *args, **options):
"""Synchronizes a mailchimp list with the students of a course."""
key = options['key']
list_id = options['list_id']
course_id = options['course_id']
num_segments = options['num_segments']
log.info('Syncronizing email list for %s', course_id)
mailchimp = connect_mailchimp(key)
subscribed = get_subscribed(mailchimp, list_id)
unsubscribed = get_unsubscribed(mailchimp, list_id)
cleaned = get_cleaned(mailchimp, list_id)
non_subscribed = unsubscribed.union(cleaned)
enrolled = get_enrolled_students(course_id)
exclude = subscribed.union(non_subscribed)
to_subscribe = get_student_data(enrolled, exclude=exclude)
tag_names = set(chain.from_iterable(list(d.keys()) for d in to_subscribe))
update_merge_tags(mailchimp, list_id, tag_names)
subscribe_with_data(mailchimp, list_id, to_subscribe)
enrolled_emails = set(enrolled.values_list('user__email', flat=True))
non_enrolled_emails = list(subscribed.difference(enrolled_emails))
unsubscribe(mailchimp, list_id, non_enrolled_emails)
subscribed = subscribed.union({d['EMAIL'] for d in to_subscribe})
make_segments(mailchimp, list_id, num_segments, subscribed)
def connect_mailchimp(api_key):
"""
Initializes connection to the mailchimp api
"""
mailchimp = MailSnake(api_key)
result = mailchimp.ping()
log.debug(result)
return mailchimp
def verify_list(mailchimp, list_id, course_id):
"""
Verifies that the given list_id corresponds to the course_id
Returns boolean: whether or not course_id matches list_id
"""
lists = mailchimp.lists(filters={'list_id': list_id})['data']
if len(lists) != 1:
log.error('incorrect list id')
return False
list_name = lists[0]['name']
log.debug('list name: %s', list_name)
# check that we are connecting to the correct list
parts = course_id.replace('_', ' ').replace('/', ' ').split()
count = sum(1 for p in parts if p in list_name)
if count < 3:
log.info(course_id)
log.info(list_name)
log.error('course_id does not match list name')
return False
return True
def get_student_data(students, exclude=None):
"""
Given a QuerySet of Django users, extracts id, username, and is_anonymous data.
Excludes any users provided in the optional `exclude` set.
Returns a list of dictionaries for each user, where the dictionary has keys
'EMAIL', 'FULLNAME', and 'EDX_ID'.
"""
# To speed the query, we won't retrieve the full User object, only
# two of its values. The namedtuple simulates the User object.
FakeUser = namedtuple('Fake', 'id username is_anonymous')
exclude = exclude if exclude else set()
def make(svalue):
"""
Given a User value entry `svalue`, extracts the student's email and fullname,
and provides a unique id for the user.
Returns a dictionary with keys 'EMAIL', 'FULLNAME', and 'EDX_ID'.
"""
fake_user = FakeUser(svalue['user_id'], svalue['user__username'], lambda: True)
entry = {
'EMAIL': svalue['user__email'],
'FULLNAME': svalue['name'].title(),
'EDX_ID': unique_id_for_user(fake_user)
}
return entry
fields = 'user__email', 'name', 'user_id', 'user__username'
values = students.values(*fields)
# TODO: Since `students` is a QuerySet, can we chain a filter here that would be more
# performant than calling a lambda for every user?
exclude_func = lambda s: s['user__email'] in exclude
return [make(s) for s in values if not exclude_func(s)]
def get_enrolled_students(course_id):
"""
Given a course_id, returns a QuerySet of all the active students
in the course.
"""
objects = UserProfile.objects
course_key = CourseKey.from_string(course_id)
students = objects.filter(user__courseenrollment__course_id=course_key,
user__courseenrollment__is_active=True)
return students
def get_subscribed(mailchimp, list_id):
"""Returns a set of email addresses subscribed to `list_id`"""
return get_members(mailchimp, list_id, 'subscribed')
def get_unsubscribed(mailchimp, list_id):
"""Returns a set of email addresses that have unsubscribed from `list_id`"""
return get_members(mailchimp, list_id, 'unsubscribed')
def get_cleaned(mailchimp, list_id):
"""
Returns a set of email addresses that have been cleaned from `list_id`
These email addresses may be invalid or have caused bounces, so you don't want
to re-add them back to the list.
"""
return get_members(mailchimp, list_id, 'cleaned')
def get_members(mailchimp, list_id, status):
"""
Given a mailchimp list id and a user status to filter on, returns all
members of the mailchimp list with that status.
Returns a set of email addresses.
"""
mc_get_members = mailchimp.listMembers
members = set()
for page in itertools.count():
response = mc_get_members(id=list_id,
status=status,
start=page,
limit=BATCH_SIZE)
data = response.get('data', [])
if not data:
break
members.update(d['email'] for d in data)
return members
def unsubscribe(mailchimp, list_id, emails):
"""
Batch unsubscribe the given email addresses from the list represented
by `list_id`
"""
batch_unsubscribe = mailchimp.listBatchUnsubscribe
result = batch_unsubscribe(id=list_id,
emails=emails,
send_goodbye=False,
delete_member=False)
log.debug(result)
def update_merge_tags(mailchimp, list_id, tag_names):
"""
This function is rather inscrutable. Given tag_names, which
in this code seems to be a list of ['FULLNAME', 'EMAIL', 'EDX_ID'],
we grab tags from the mailchimp list, then we verify tag_names has
'FULLNAME' and 'EMAIL' present, we get more data from mailchimp, then
sync the variables up to mailchimp using `listMergeVarAdd`.
The purpose of this function is unclear.
"""
mc_vars = mailchimp.listMergeVars(id=list_id)
mc_names = {v['name'] for v in mc_vars}
mc_merge = mailchimp.listMergeVarAdd
tags = [v['tag'] for v in mc_vars]
for name in tag_names:
tag = name_to_tag(name)
# verify FULLNAME is present
# TODO: Why is this under the for loop? It does nothing with the loop
# variable and seems like things would work if this was executed before or
# after the loop.
if 'FULLNAME' not in tags:
result = mc_merge(id=list_id,
tag='FULLNAME',
name='Full Name',
options={'field_type': 'text',
'public': False})
tags.append('FULLNAME')
log.debug(result)
# add extra tags if not present
if name not in mc_names and tag not in ['EMAIL', 'FULLNAME']:
ftype = FIELD_TYPES.get(name, 'number')
result = mc_merge(id=list_id,
tag=tag,
name=name,
options={'field_type': ftype,
'public': False})
tags.append(tag)
log.debug(result)
def subscribe_with_data(mailchimp, list_id, user_data):
"""
Given user_data in the form of a list of dictionaries for each user,
where the dictionary has keys 'EMAIL', 'FULLNAME', and 'EDX_ID', batch
subscribe the users to the given `list_id` via a Mailchimp api method.
Returns None
"""
format_entry = lambda e: {name_to_tag(k): v for k, v in e.items()}
formated_data = list(format_entry(e) for e in user_data)
# send the updates in batches of a fixed size
for batch in chunk(formated_data, SUBSCRIBE_BATCH_SIZE):
result = mailchimp.listBatchSubscribe(id=list_id,
batch=batch,
double_optin=False,
update_existing=True)
log.debug(
"Added: %s Error on: %s", result['add_count'], result['error_count']
)
def make_segments(mailchimp, list_id, count, emails):
"""
Segments the list of email addresses `emails` into `count` segments,
if count is nonzero.
For unknown historical reasons, lost to the winds of time, this is done with
a random order to the email addresses.
First, existing 'random_' mailchimp segments are deleted.
Then, the list of emails (the whole, large list) is shuffled.
Finally, the shuffled emails are chunked into `count` segments and re-uploaded
to mailchimp as 'random_'-prefixed segments.
"""
if count > 0:
# reset segments
segments = mailchimp.listStaticSegments(id=list_id)
for seg in segments:
if seg['name'].startswith('random'):
mailchimp.listStaticSegmentDel(id=list_id, seg_id=seg['id'])
# shuffle and split emails
emails = list(emails)
random.shuffle(emails) # Why do we do this?
chunk_size = int(math.ceil(float(len(emails)) / count))
chunks = list(chunk(emails, chunk_size))
# create segments and add emails
for seg in range(count):
name = f'random_{seg:002}'
seg_id = mailchimp.listStaticSegmentAdd(id=list_id, name=name)
for batch in chunk(chunks[seg], BATCH_SIZE):
mailchimp.listStaticSegmentMembersAdd(
id=list_id,
seg_id=seg_id,
batch=batch
)
def name_to_tag(name):
"""
Returns sanitized str `name`: no more than 10 characters,
with spaces replaced with `_`
"""
if len(name) > 10:
name = name[:10]
return name.replace(' ', '_').strip()
def chunk(elist, size):
"""
Generator. Yields a list of size `size` of the given list `elist`,
or a shorter list if at the end of the input.
"""
for i in range(0, len(elist), size):
yield elist[i:i + size]