Ran pyupgrade on lms/djangoapps/mailing Ran pyupgrade on lms/djangoapps/mobile_api Ran pyupgrade on lms/djangoapps/monitoring
362 lines
12 KiB
Python
362 lines
12 KiB
Python
"""
|
|
Synchronizes a mailchimp list with the students of a course.
|
|
"""
|
|
|
|
|
|
import itertools
|
|
import logging
|
|
import math
|
|
import random
|
|
from collections import namedtuple
|
|
from itertools import chain
|
|
|
|
from django.core.management.base import BaseCommand
|
|
from mailsnake import MailSnake
|
|
from opaque_keys.edx.keys import CourseKey
|
|
|
|
from common.djangoapps.student.models import UserProfile, unique_id_for_user
|
|
|
|
BATCH_SIZE = 15000
|
|
# If you try to subscribe with too many users at once
|
|
# the transaction times out on the mailchimp side.
|
|
SUBSCRIBE_BATCH_SIZE = 1000
|
|
|
|
log = logging.getLogger('edx.mailchimp')
|
|
|
|
FIELD_TYPES = {'EDX_ID': 'text'}
|
|
|
|
|
|
class Command(BaseCommand):
|
|
"""
|
|
Synchronizes a mailchimp list with the students of a course.
|
|
"""
|
|
help = 'Synchronizes a mailchimp list with the students of a course.'
|
|
|
|
def add_arguments(self, parser):
|
|
parser.add_argument('--key',
|
|
required=True,
|
|
help='mailchimp api key')
|
|
parser.add_argument('--list',
|
|
dest='list_id',
|
|
required=True,
|
|
help='mailchimp list id')
|
|
parser.add_argument('--course',
|
|
dest='course_id',
|
|
required=True,
|
|
help='edx course_id')
|
|
parser.add_argument('--segments',
|
|
dest='num_segments',
|
|
type=int,
|
|
default=0,
|
|
help='number of static random segments to create')
|
|
|
|
def handle(self, *args, **options):
|
|
"""Synchronizes a mailchimp list with the students of a course."""
|
|
key = options['key']
|
|
list_id = options['list_id']
|
|
course_id = options['course_id']
|
|
num_segments = options['num_segments']
|
|
|
|
log.info('Syncronizing email list for %s', course_id)
|
|
|
|
mailchimp = connect_mailchimp(key)
|
|
|
|
subscribed = get_subscribed(mailchimp, list_id)
|
|
unsubscribed = get_unsubscribed(mailchimp, list_id)
|
|
cleaned = get_cleaned(mailchimp, list_id)
|
|
non_subscribed = unsubscribed.union(cleaned)
|
|
|
|
enrolled = get_enrolled_students(course_id)
|
|
|
|
exclude = subscribed.union(non_subscribed)
|
|
to_subscribe = get_student_data(enrolled, exclude=exclude)
|
|
|
|
tag_names = set(chain.from_iterable(list(d.keys()) for d in to_subscribe))
|
|
update_merge_tags(mailchimp, list_id, tag_names)
|
|
|
|
subscribe_with_data(mailchimp, list_id, to_subscribe)
|
|
|
|
enrolled_emails = set(enrolled.values_list('user__email', flat=True))
|
|
non_enrolled_emails = list(subscribed.difference(enrolled_emails))
|
|
|
|
unsubscribe(mailchimp, list_id, non_enrolled_emails)
|
|
|
|
subscribed = subscribed.union({d['EMAIL'] for d in to_subscribe})
|
|
make_segments(mailchimp, list_id, num_segments, subscribed)
|
|
|
|
|
|
def connect_mailchimp(api_key):
|
|
"""
|
|
Initializes connection to the mailchimp api
|
|
"""
|
|
mailchimp = MailSnake(api_key)
|
|
result = mailchimp.ping()
|
|
log.debug(result)
|
|
|
|
return mailchimp
|
|
|
|
|
|
def verify_list(mailchimp, list_id, course_id):
|
|
"""
|
|
Verifies that the given list_id corresponds to the course_id
|
|
Returns boolean: whether or not course_id matches list_id
|
|
"""
|
|
lists = mailchimp.lists(filters={'list_id': list_id})['data']
|
|
|
|
if len(lists) != 1:
|
|
log.error('incorrect list id')
|
|
return False
|
|
|
|
list_name = lists[0]['name']
|
|
|
|
log.debug('list name: %s', list_name)
|
|
|
|
# check that we are connecting to the correct list
|
|
parts = course_id.replace('_', ' ').replace('/', ' ').split()
|
|
count = sum(1 for p in parts if p in list_name)
|
|
if count < 3:
|
|
log.info(course_id)
|
|
log.info(list_name)
|
|
log.error('course_id does not match list name')
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def get_student_data(students, exclude=None):
|
|
"""
|
|
Given a QuerySet of Django users, extracts id, username, and is_anonymous data.
|
|
Excludes any users provided in the optional `exclude` set.
|
|
|
|
Returns a list of dictionaries for each user, where the dictionary has keys
|
|
'EMAIL', 'FULLNAME', and 'EDX_ID'.
|
|
"""
|
|
# To speed the query, we won't retrieve the full User object, only
|
|
# two of its values. The namedtuple simulates the User object.
|
|
FakeUser = namedtuple('Fake', 'id username is_anonymous')
|
|
|
|
exclude = exclude if exclude else set()
|
|
|
|
def make(svalue):
|
|
"""
|
|
Given a User value entry `svalue`, extracts the student's email and fullname,
|
|
and provides a unique id for the user.
|
|
|
|
Returns a dictionary with keys 'EMAIL', 'FULLNAME', and 'EDX_ID'.
|
|
"""
|
|
fake_user = FakeUser(svalue['user_id'], svalue['user__username'], lambda: True)
|
|
|
|
entry = {
|
|
'EMAIL': svalue['user__email'],
|
|
'FULLNAME': svalue['name'].title(),
|
|
'EDX_ID': unique_id_for_user(fake_user)
|
|
}
|
|
|
|
return entry
|
|
|
|
fields = 'user__email', 'name', 'user_id', 'user__username'
|
|
values = students.values(*fields)
|
|
|
|
# TODO: Since `students` is a QuerySet, can we chain a filter here that would be more
|
|
# performant than calling a lambda for every user?
|
|
exclude_func = lambda s: s['user__email'] in exclude
|
|
return [make(s) for s in values if not exclude_func(s)]
|
|
|
|
|
|
def get_enrolled_students(course_id):
|
|
"""
|
|
Given a course_id, returns a QuerySet of all the active students
|
|
in the course.
|
|
"""
|
|
objects = UserProfile.objects
|
|
course_key = CourseKey.from_string(course_id)
|
|
students = objects.filter(user__courseenrollment__course_id=course_key,
|
|
user__courseenrollment__is_active=True)
|
|
return students
|
|
|
|
|
|
def get_subscribed(mailchimp, list_id):
|
|
"""Returns a set of email addresses subscribed to `list_id`"""
|
|
return get_members(mailchimp, list_id, 'subscribed')
|
|
|
|
|
|
def get_unsubscribed(mailchimp, list_id):
|
|
"""Returns a set of email addresses that have unsubscribed from `list_id`"""
|
|
return get_members(mailchimp, list_id, 'unsubscribed')
|
|
|
|
|
|
def get_cleaned(mailchimp, list_id):
|
|
"""
|
|
Returns a set of email addresses that have been cleaned from `list_id`
|
|
|
|
These email addresses may be invalid or have caused bounces, so you don't want
|
|
to re-add them back to the list.
|
|
"""
|
|
return get_members(mailchimp, list_id, 'cleaned')
|
|
|
|
|
|
def get_members(mailchimp, list_id, status):
|
|
"""
|
|
Given a mailchimp list id and a user status to filter on, returns all
|
|
members of the mailchimp list with that status.
|
|
|
|
Returns a set of email addresses.
|
|
"""
|
|
mc_get_members = mailchimp.listMembers
|
|
members = set()
|
|
|
|
for page in itertools.count():
|
|
response = mc_get_members(id=list_id,
|
|
status=status,
|
|
start=page,
|
|
limit=BATCH_SIZE)
|
|
data = response.get('data', [])
|
|
|
|
if not data:
|
|
break
|
|
|
|
members.update(d['email'] for d in data)
|
|
|
|
return members
|
|
|
|
|
|
def unsubscribe(mailchimp, list_id, emails):
|
|
"""
|
|
Batch unsubscribe the given email addresses from the list represented
|
|
by `list_id`
|
|
"""
|
|
batch_unsubscribe = mailchimp.listBatchUnsubscribe
|
|
result = batch_unsubscribe(id=list_id,
|
|
emails=emails,
|
|
send_goodbye=False,
|
|
delete_member=False)
|
|
log.debug(result)
|
|
|
|
|
|
def update_merge_tags(mailchimp, list_id, tag_names):
|
|
"""
|
|
This function is rather inscrutable. Given tag_names, which
|
|
in this code seems to be a list of ['FULLNAME', 'EMAIL', 'EDX_ID'],
|
|
we grab tags from the mailchimp list, then we verify tag_names has
|
|
'FULLNAME' and 'EMAIL' present, we get more data from mailchimp, then
|
|
sync the variables up to mailchimp using `listMergeVarAdd`.
|
|
|
|
The purpose of this function is unclear.
|
|
"""
|
|
mc_vars = mailchimp.listMergeVars(id=list_id)
|
|
mc_names = {v['name'] for v in mc_vars}
|
|
|
|
mc_merge = mailchimp.listMergeVarAdd
|
|
|
|
tags = [v['tag'] for v in mc_vars]
|
|
|
|
for name in tag_names:
|
|
tag = name_to_tag(name)
|
|
|
|
# verify FULLNAME is present
|
|
# TODO: Why is this under the for loop? It does nothing with the loop
|
|
# variable and seems like things would work if this was executed before or
|
|
# after the loop.
|
|
if 'FULLNAME' not in tags:
|
|
result = mc_merge(id=list_id,
|
|
tag='FULLNAME',
|
|
name='Full Name',
|
|
options={'field_type': 'text',
|
|
'public': False})
|
|
tags.append('FULLNAME')
|
|
log.debug(result)
|
|
|
|
# add extra tags if not present
|
|
if name not in mc_names and tag not in ['EMAIL', 'FULLNAME']:
|
|
ftype = FIELD_TYPES.get(name, 'number')
|
|
result = mc_merge(id=list_id,
|
|
tag=tag,
|
|
name=name,
|
|
options={'field_type': ftype,
|
|
'public': False})
|
|
tags.append(tag)
|
|
log.debug(result)
|
|
|
|
|
|
def subscribe_with_data(mailchimp, list_id, user_data):
|
|
"""
|
|
Given user_data in the form of a list of dictionaries for each user,
|
|
where the dictionary has keys 'EMAIL', 'FULLNAME', and 'EDX_ID', batch
|
|
subscribe the users to the given `list_id` via a Mailchimp api method.
|
|
|
|
Returns None
|
|
"""
|
|
format_entry = lambda e: {name_to_tag(k): v for k, v in e.items()}
|
|
formated_data = list(format_entry(e) for e in user_data)
|
|
|
|
# send the updates in batches of a fixed size
|
|
for batch in chunk(formated_data, SUBSCRIBE_BATCH_SIZE):
|
|
result = mailchimp.listBatchSubscribe(id=list_id,
|
|
batch=batch,
|
|
double_optin=False,
|
|
update_existing=True)
|
|
|
|
log.debug(
|
|
"Added: %s Error on: %s", result['add_count'], result['error_count']
|
|
)
|
|
|
|
|
|
def make_segments(mailchimp, list_id, count, emails):
|
|
"""
|
|
Segments the list of email addresses `emails` into `count` segments,
|
|
if count is nonzero.
|
|
|
|
For unknown historical reasons, lost to the winds of time, this is done with
|
|
a random order to the email addresses.
|
|
|
|
First, existing 'random_' mailchimp segments are deleted.
|
|
|
|
Then, the list of emails (the whole, large list) is shuffled.
|
|
|
|
Finally, the shuffled emails are chunked into `count` segments and re-uploaded
|
|
to mailchimp as 'random_'-prefixed segments.
|
|
"""
|
|
if count > 0:
|
|
# reset segments
|
|
segments = mailchimp.listStaticSegments(id=list_id)
|
|
for seg in segments:
|
|
if seg['name'].startswith('random'):
|
|
mailchimp.listStaticSegmentDel(id=list_id, seg_id=seg['id'])
|
|
|
|
# shuffle and split emails
|
|
emails = list(emails)
|
|
random.shuffle(emails) # Why do we do this?
|
|
|
|
chunk_size = int(math.ceil(float(len(emails)) / count))
|
|
chunks = list(chunk(emails, chunk_size))
|
|
|
|
# create segments and add emails
|
|
for seg in range(count):
|
|
name = f'random_{seg:002}'
|
|
seg_id = mailchimp.listStaticSegmentAdd(id=list_id, name=name)
|
|
for batch in chunk(chunks[seg], BATCH_SIZE):
|
|
mailchimp.listStaticSegmentMembersAdd(
|
|
id=list_id,
|
|
seg_id=seg_id,
|
|
batch=batch
|
|
)
|
|
|
|
|
|
def name_to_tag(name):
|
|
"""
|
|
Returns sanitized str `name`: no more than 10 characters,
|
|
with spaces replaced with `_`
|
|
"""
|
|
if len(name) > 10:
|
|
name = name[:10]
|
|
return name.replace(' ', '_').strip()
|
|
|
|
|
|
def chunk(elist, size):
|
|
"""
|
|
Generator. Yields a list of size `size` of the given list `elist`,
|
|
or a shorter list if at the end of the input.
|
|
"""
|
|
for i in range(0, len(elist), size):
|
|
yield elist[i:i + size]
|