Merge pull request #7672 from edx/feanil/new_mailchimp_sync
Update Mailchimp sync scripts to be within platform
This commit is contained in:
0
lms/djangoapps/mailing/__init__.py
Normal file
0
lms/djangoapps/mailing/__init__.py
Normal file
0
lms/djangoapps/mailing/management/__init__.py
Normal file
0
lms/djangoapps/mailing/management/__init__.py
Normal file
55
lms/djangoapps/mailing/management/commands/mailchimp_id.py
Normal file
55
lms/djangoapps/mailing/management/commands/mailchimp_id.py
Normal file
@@ -0,0 +1,55 @@
|
||||
"""
|
||||
mailchimp_id: Returns whether or not a given mailchimp key represents
|
||||
a valid list.
|
||||
"""
|
||||
import sys
|
||||
from optparse import make_option
|
||||
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
|
||||
from mailsnake import MailSnake
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""
|
||||
Given a mailchimp key, validates that a list with that key
|
||||
exists in mailchimp.
|
||||
"""
|
||||
args = '<mailchimp_key web_id>'
|
||||
help = 'Get the list id from a web_id'
|
||||
|
||||
option_list = BaseCommand.option_list + (
|
||||
make_option('--key', action='store', help='mailchimp api key'),
|
||||
make_option('--webid', action='store', dest='web_id', type=int,
|
||||
help='mailchimp list web id'),
|
||||
)
|
||||
|
||||
def parse_options(self, options):
|
||||
"""Parses `options` of the command."""
|
||||
if not options['key']:
|
||||
raise CommandError('missing key')
|
||||
|
||||
if not options['web_id']:
|
||||
raise CommandError('missing list web id')
|
||||
|
||||
return options['key'], options['web_id']
|
||||
|
||||
def handle(self, *args, **options):
|
||||
"""
|
||||
Validates that the id passed in exists in mailchimp.
|
||||
"""
|
||||
key, web_id = self.parse_options(options)
|
||||
|
||||
mailchimp = MailSnake(key)
|
||||
|
||||
lists = mailchimp.lists()['data']
|
||||
by_web_id = {l['web_id']: l for l in lists}
|
||||
|
||||
list_with_id = by_web_id.get(web_id, None)
|
||||
|
||||
if list_with_id:
|
||||
print "id: {} for web_id: {}".format(list_with_id['id'], web_id)
|
||||
print "list name: {}".format(list_with_id['name'])
|
||||
else:
|
||||
print "list with web_id: {} not found.".format(web_id)
|
||||
sys.exit(1)
|
||||
@@ -0,0 +1,79 @@
|
||||
"""
|
||||
Synchronizes the announcement list with all active students.
|
||||
"""
|
||||
import logging
|
||||
from optparse import make_option
|
||||
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
|
||||
from django.contrib.auth.models import User
|
||||
|
||||
from .mailchimp_sync_course import (
|
||||
connect_mailchimp, get_cleaned,
|
||||
get_subscribed, get_unsubscribed,
|
||||
subscribe_with_data
|
||||
)
|
||||
|
||||
log = logging.getLogger('edx.mailchimp')
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""
|
||||
Synchronizes the announcement list with all active students.
|
||||
"""
|
||||
args = '<mailchimp_key mailchimp_list course_id>'
|
||||
help = 'Synchronizes the announcement list with all active students.'
|
||||
|
||||
option_list = BaseCommand.option_list + (
|
||||
make_option('--key', action='store', help='mailchimp api key'),
|
||||
make_option('--list', action='store', dest='list_id',
|
||||
help='mailchimp list id'),
|
||||
)
|
||||
|
||||
def parse_options(self, options):
|
||||
"""Parses `options` of the command."""
|
||||
if not options['key']:
|
||||
raise CommandError('missing key')
|
||||
|
||||
if not options['list_id']:
|
||||
raise CommandError('missing list id')
|
||||
|
||||
return (options['key'], options['list_id'])
|
||||
|
||||
def handle(self, *args, **options):
|
||||
key, list_id = self.parse_options(options)
|
||||
|
||||
log.info('Syncronizing announcement mailing list')
|
||||
|
||||
mailchimp = connect_mailchimp(key)
|
||||
|
||||
subscribed = get_subscribed(mailchimp, list_id)
|
||||
unsubscribed = get_unsubscribed(mailchimp, list_id)
|
||||
cleaned = get_cleaned(mailchimp, list_id)
|
||||
non_subscribed = unsubscribed.union(cleaned)
|
||||
|
||||
enrolled = get_enrolled()
|
||||
exclude = subscribed.union(non_subscribed)
|
||||
to_subscribe = get_data(enrolled, exclude=exclude)
|
||||
|
||||
subscribe_with_data(mailchimp, list_id, to_subscribe)
|
||||
|
||||
|
||||
def get_enrolled():
|
||||
"""
|
||||
Filter out all users who signed up via a Microsite, which UserSignupSource tracks
|
||||
"""
|
||||
## TODO (Feanil) This grabs all inactive students and MUST be changed (or, could exclude inactive users in get_data)
|
||||
return User.objects.raw('SELECT * FROM auth_user where id not in (SELECT user_id from student_usersignupsource)')
|
||||
|
||||
|
||||
def get_data(users, exclude=None):
|
||||
"""
|
||||
users: set of Django users
|
||||
exclude [optional]: set of Django users to exclude
|
||||
|
||||
returns: {'EMAIL': u.email} for all users in users less those in `exclude`
|
||||
"""
|
||||
exclude = exclude if exclude else set()
|
||||
emails = (u.email for u in users)
|
||||
return ({'EMAIL': e} for e in emails if e not in exclude)
|
||||
@@ -0,0 +1,368 @@
|
||||
"""
|
||||
Synchronizes a mailchimp list with the students of a course.
|
||||
"""
|
||||
import logging
|
||||
import math
|
||||
import random
|
||||
import itertools
|
||||
from itertools import chain
|
||||
from optparse import make_option
|
||||
from collections import namedtuple
|
||||
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
|
||||
from mailsnake import MailSnake
|
||||
|
||||
from student.models import UserProfile, unique_id_for_user
|
||||
from opaque_keys.edx.keys import CourseKey
|
||||
|
||||
|
||||
BATCH_SIZE = 15000
|
||||
# If you try to subscribe with too many users at once
|
||||
# the transaction times out on the mailchimp side.
|
||||
SUBSCRIBE_BATCH_SIZE = 1000
|
||||
|
||||
log = logging.getLogger('edx.mailchimp')
|
||||
|
||||
FIELD_TYPES = {'EDX_ID': 'text'}
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""
|
||||
Synchronizes a mailchimp list with the students of a course.
|
||||
"""
|
||||
args = '<mailchimp_key mailchimp_list course_id>'
|
||||
help = 'Synchronizes a mailchimp list with the students of a course.'
|
||||
|
||||
option_list = BaseCommand.option_list + (
|
||||
make_option('--key', action='store', help='mailchimp api key'),
|
||||
make_option('--list', action='store', dest='list_id',
|
||||
help='mailchimp list id'),
|
||||
make_option('--course', action='store', dest='course_id',
|
||||
help='xmodule course_id'),
|
||||
|
||||
make_option('--segments', action='store', dest='segments',
|
||||
default=0, type=int,
|
||||
help='number of static random segments to create'),
|
||||
)
|
||||
|
||||
def parse_options(self, options):
|
||||
"""Parses `options` of the command."""
|
||||
if not options['key']:
|
||||
raise CommandError('missing key')
|
||||
|
||||
if not options['list_id']:
|
||||
raise CommandError('missing list id')
|
||||
|
||||
if not options['course_id']:
|
||||
raise CommandError('missing course id')
|
||||
|
||||
return (options['key'], options['list_id'],
|
||||
options['course_id'], options['segments'])
|
||||
|
||||
def handle(self, *args, **options):
|
||||
"""Synchronizes a mailchimp list with the students of a course."""
|
||||
key, list_id, course_id, nsegments = self.parse_options(options)
|
||||
|
||||
log.info('Syncronizing email list for %s', course_id)
|
||||
|
||||
mailchimp = connect_mailchimp(key)
|
||||
|
||||
subscribed = get_subscribed(mailchimp, list_id)
|
||||
unsubscribed = get_unsubscribed(mailchimp, list_id)
|
||||
cleaned = get_cleaned(mailchimp, list_id)
|
||||
non_subscribed = unsubscribed.union(cleaned)
|
||||
|
||||
enrolled = get_enrolled_students(course_id)
|
||||
|
||||
exclude = subscribed.union(non_subscribed)
|
||||
to_subscribe = get_student_data(enrolled, exclude=exclude)
|
||||
|
||||
tag_names = set(chain.from_iterable(d.keys() for d in to_subscribe))
|
||||
update_merge_tags(mailchimp, list_id, tag_names)
|
||||
|
||||
subscribe_with_data(mailchimp, list_id, to_subscribe)
|
||||
|
||||
enrolled_emails = set(enrolled.values_list('user__email', flat=True))
|
||||
non_enrolled_emails = list(subscribed.difference(enrolled_emails))
|
||||
|
||||
unsubscribe(mailchimp, list_id, non_enrolled_emails)
|
||||
|
||||
subscribed = subscribed.union(set(d['EMAIL'] for d in to_subscribe))
|
||||
make_segments(mailchimp, list_id, nsegments, subscribed)
|
||||
|
||||
|
||||
def connect_mailchimp(api_key):
|
||||
"""
|
||||
Initializes connection to the mailchimp api
|
||||
"""
|
||||
mailchimp = MailSnake(api_key)
|
||||
result = mailchimp.ping()
|
||||
log.debug(result)
|
||||
|
||||
return mailchimp
|
||||
|
||||
|
||||
def verify_list(mailchimp, list_id, course_id):
|
||||
"""
|
||||
Verifies that the given list_id corresponds to the course_id
|
||||
Returns boolean: whether or not course_id matches list_id
|
||||
"""
|
||||
lists = mailchimp.lists(filters={'list_id': list_id})['data']
|
||||
|
||||
if len(lists) != 1:
|
||||
log.error('incorrect list id')
|
||||
return False
|
||||
|
||||
list_name = lists[0]['name']
|
||||
|
||||
log.debug('list name: %s', list_name)
|
||||
|
||||
# check that we are connecting to the correct list
|
||||
parts = course_id.replace('_', ' ').replace('/', ' ').split()
|
||||
count = sum(1 for p in parts if p in list_name)
|
||||
if count < 3:
|
||||
log.info(course_id)
|
||||
log.info(list_name)
|
||||
log.error('course_id does not match list name')
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def get_student_data(students, exclude=None):
|
||||
"""
|
||||
Given a QuerySet of Django users, extracts id, username, and is_anonymous data.
|
||||
Excludes any users provided in the optional `exclude` set.
|
||||
|
||||
Returns a list of dictionaries for each user, where the dictionary has keys
|
||||
'EMAIL', 'FULLNAME', and 'EDX_ID'.
|
||||
"""
|
||||
# To speed the query, we won't retrieve the full User object, only
|
||||
# two of its values. The namedtuple simulates the User object.
|
||||
FakeUser = namedtuple('Fake', 'id username is_anonymous') # pylint: disable=invalid-name
|
||||
|
||||
exclude = exclude if exclude else set()
|
||||
|
||||
def make(svalue):
|
||||
"""
|
||||
Given a User value entry `svalue`, extracts the student's email and fullname,
|
||||
and provides a unique id for the user.
|
||||
|
||||
Returns a dictionary with keys 'EMAIL', 'FULLNAME', and 'EDX_ID'.
|
||||
"""
|
||||
fake_user = FakeUser(svalue['user_id'], svalue['user__username'], lambda: True)
|
||||
|
||||
entry = {
|
||||
'EMAIL': svalue['user__email'],
|
||||
'FULLNAME': svalue['name'].title(),
|
||||
'EDX_ID': unique_id_for_user(fake_user)
|
||||
}
|
||||
|
||||
return entry
|
||||
|
||||
fields = 'user__email', 'name', 'user_id', 'user__username'
|
||||
values = students.values(*fields)
|
||||
|
||||
# TODO: Since `students` is a QuerySet, can we chain a filter here that would be more
|
||||
# performant than calling a lambda for every user?
|
||||
exclude_func = lambda s: s['user__email'] in exclude
|
||||
return [make(s) for s in values if not exclude_func(s)]
|
||||
|
||||
|
||||
def get_enrolled_students(course_id):
|
||||
"""
|
||||
Given a course_id, returns a QuerySet of all the active students
|
||||
in the course.
|
||||
"""
|
||||
objects = UserProfile.objects
|
||||
course_key = CourseKey.from_string(course_id)
|
||||
students = objects.filter(user__courseenrollment__course_id=course_key,
|
||||
user__courseenrollment__is_active=True)
|
||||
return students
|
||||
|
||||
|
||||
def get_subscribed(mailchimp, list_id):
|
||||
"""Returns a set of email addresses subscribed to `list_id`"""
|
||||
return get_members(mailchimp, list_id, 'subscribed')
|
||||
|
||||
|
||||
def get_unsubscribed(mailchimp, list_id):
|
||||
"""Returns a set of email addresses that have unsubscribed from `list_id`"""
|
||||
return get_members(mailchimp, list_id, 'unsubscribed')
|
||||
|
||||
|
||||
def get_cleaned(mailchimp, list_id):
|
||||
"""
|
||||
Returns a set of email addresses that have been cleaned from `list_id`
|
||||
|
||||
These email addresses may be invalid or have caused bounces, so you don't want
|
||||
to re-add them back to the list.
|
||||
"""
|
||||
return get_members(mailchimp, list_id, 'cleaned')
|
||||
|
||||
|
||||
def get_members(mailchimp, list_id, status):
|
||||
"""
|
||||
Given a mailchimp list id and a user status to filter on, returns all
|
||||
members of the mailchimp list with that status.
|
||||
|
||||
Returns a set of email addresses.
|
||||
"""
|
||||
mc_get_members = mailchimp.listMembers
|
||||
members = set()
|
||||
|
||||
for page in itertools.count():
|
||||
response = mc_get_members(id=list_id,
|
||||
status=status,
|
||||
start=page,
|
||||
limit=BATCH_SIZE)
|
||||
data = response.get('data', [])
|
||||
|
||||
if not data:
|
||||
break
|
||||
|
||||
members.update(d['email'] for d in data)
|
||||
|
||||
return members
|
||||
|
||||
|
||||
def unsubscribe(mailchimp, list_id, emails):
|
||||
"""
|
||||
Batch unsubscribe the given email addresses from the list represented
|
||||
by `list_id`
|
||||
"""
|
||||
batch_unsubscribe = mailchimp.listBatchUnsubscribe
|
||||
result = batch_unsubscribe(id=list_id,
|
||||
emails=emails,
|
||||
send_goodbye=False,
|
||||
delete_member=False)
|
||||
log.debug(result)
|
||||
|
||||
|
||||
def update_merge_tags(mailchimp, list_id, tag_names):
|
||||
"""
|
||||
This function is rather inscrutable. Given tag_names, which
|
||||
in this code seems to be a list of ['FULLNAME', 'EMAIL', 'EDX_ID'],
|
||||
we grab tags from the mailchimp list, then we verify tag_names has
|
||||
'FULLNAME' and 'EMAIL' present, we get more data from mailchimp, then
|
||||
sync the variables up to mailchimp using `listMergeVarAdd`.
|
||||
|
||||
The purpose of this function is unclear.
|
||||
"""
|
||||
mc_vars = mailchimp.listMergeVars(id=list_id)
|
||||
mc_names = set(v['name'] for v in mc_vars)
|
||||
|
||||
mc_merge = mailchimp.listMergeVarAdd
|
||||
|
||||
tags = [v['tag'] for v in mc_vars]
|
||||
|
||||
for name in tag_names:
|
||||
tag = name_to_tag(name)
|
||||
|
||||
# verify FULLNAME is present
|
||||
# TODO: Why is this under the for loop? It does nothing with the loop
|
||||
# variable and seems like things would work if this was executed before or
|
||||
# after the loop.
|
||||
if 'FULLNAME' not in tags:
|
||||
result = mc_merge(id=list_id,
|
||||
tag='FULLNAME',
|
||||
name='Full Name',
|
||||
options={'field_type': 'text',
|
||||
'public': False})
|
||||
tags.append('FULLNAME')
|
||||
log.debug(result)
|
||||
|
||||
# add extra tags if not present
|
||||
if name not in mc_names and tag not in ['EMAIL', 'FULLNAME']:
|
||||
ftype = FIELD_TYPES.get(name, 'number')
|
||||
result = mc_merge(id=list_id,
|
||||
tag=tag,
|
||||
name=name,
|
||||
options={'field_type': ftype,
|
||||
'public': False})
|
||||
tags.append(tag)
|
||||
log.debug(result)
|
||||
|
||||
|
||||
def subscribe_with_data(mailchimp, list_id, user_data):
|
||||
"""
|
||||
Given user_data in the form of a list of dictionaries for each user,
|
||||
where the dictionary has keys 'EMAIL', 'FULLNAME', and 'EDX_ID', batch
|
||||
subscribe the users to the given `list_id` via a Mailchimp api method.
|
||||
|
||||
Returns None
|
||||
"""
|
||||
format_entry = lambda e: {name_to_tag(k): v for k, v in e.iteritems()}
|
||||
formated_data = list(format_entry(e) for e in user_data)
|
||||
|
||||
# send the updates in batches of a fixed size
|
||||
for batch in chunk(formated_data, SUBSCRIBE_BATCH_SIZE):
|
||||
result = mailchimp.listBatchSubscribe(id=list_id,
|
||||
batch=batch,
|
||||
double_optin=False,
|
||||
update_existing=True)
|
||||
|
||||
log.debug(
|
||||
"Added: %s Error on: %s", result['add_count'], result['error_count']
|
||||
)
|
||||
|
||||
|
||||
def make_segments(mailchimp, list_id, count, emails):
|
||||
"""
|
||||
Segments the list of email addresses `emails` into `count` segments,
|
||||
if count is nonzero.
|
||||
|
||||
For unknown historical reasons, lost to the winds of time, this is done with
|
||||
a random order to the email addresses.
|
||||
|
||||
First, existing 'random_' mailchimp segments are deleted.
|
||||
|
||||
Then, the list of emails (the whole, large list) is shuffled.
|
||||
|
||||
Finally, the shuffled emails are chunked into `count` segments and re-uploaded
|
||||
to mailchimp as 'random_'-prefixed segments.
|
||||
"""
|
||||
if count > 0:
|
||||
# reset segments
|
||||
segments = mailchimp.listStaticSegments(id=list_id)
|
||||
for seg in segments:
|
||||
if seg['name'].startswith('random'):
|
||||
mailchimp.listStaticSegmentDel(id=list_id, seg_id=seg['id'])
|
||||
|
||||
# shuffle and split emails
|
||||
emails = list(emails)
|
||||
random.shuffle(emails) # Why do we do this?
|
||||
|
||||
chunk_size = int(math.ceil(float(len(emails)) / count))
|
||||
chunks = list(chunk(emails, chunk_size))
|
||||
|
||||
# create segments and add emails
|
||||
for seg in xrange(count):
|
||||
name = 'random_{0:002}'.format(seg)
|
||||
seg_id = mailchimp.listStaticSegmentAdd(id=list_id, name=name)
|
||||
for batch in chunk(chunks[seg], BATCH_SIZE):
|
||||
mailchimp.listStaticSegmentMembersAdd(
|
||||
id=list_id,
|
||||
seg_id=seg_id,
|
||||
batch=batch
|
||||
)
|
||||
|
||||
|
||||
def name_to_tag(name):
|
||||
"""
|
||||
Returns sanitized str `name`: no more than 10 characters,
|
||||
with spaces replaced with `_`
|
||||
"""
|
||||
if len(name) > 10:
|
||||
name = name[:10]
|
||||
return name.replace(' ', '_').strip()
|
||||
|
||||
|
||||
def chunk(elist, size):
|
||||
"""
|
||||
Generator. Yields a list of size `size` of the given list `elist`,
|
||||
or a shorter list if at the end of the input.
|
||||
"""
|
||||
for i in xrange(0, len(elist), size):
|
||||
yield elist[i:i + size]
|
||||
@@ -1722,6 +1722,9 @@ INSTALLED_APPS = (
|
||||
'openedx.core.djangoapps.content.course_structures',
|
||||
'course_structure_api',
|
||||
|
||||
# Mailchimp Syncing
|
||||
'mailing',
|
||||
|
||||
# CORS and cross-domain CSRF
|
||||
'corsheaders',
|
||||
'cors_csrf',
|
||||
|
||||
@@ -149,3 +149,6 @@ testtools==0.9.34
|
||||
|
||||
# Used for Segment.io analytics
|
||||
analytics-python==0.4.4
|
||||
|
||||
# Needed for mailchimp(mailing djangoapp)
|
||||
mailsnake==1.6.2
|
||||
|
||||
Reference in New Issue
Block a user