diff --git a/lms/djangoapps/mailing/__init__.py b/lms/djangoapps/mailing/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lms/djangoapps/mailing/management/__init__.py b/lms/djangoapps/mailing/management/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lms/djangoapps/mailing/management/commands/__init__.py b/lms/djangoapps/mailing/management/commands/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lms/djangoapps/mailing/management/commands/mailchimp_id.py b/lms/djangoapps/mailing/management/commands/mailchimp_id.py new file mode 100644 index 0000000000..922d10b80d --- /dev/null +++ b/lms/djangoapps/mailing/management/commands/mailchimp_id.py @@ -0,0 +1,55 @@ +""" +mailchimp_id: Returns whether or not a given mailchimp key represents +a valid list. +""" +import sys +from optparse import make_option + +from django.core.management.base import BaseCommand, CommandError + +from mailsnake import MailSnake + + +class Command(BaseCommand): + """ + Given a mailchimp key, validates that a list with that key + exists in mailchimp. + """ + args = '' + help = 'Get the list id from a web_id' + + option_list = BaseCommand.option_list + ( + make_option('--key', action='store', help='mailchimp api key'), + make_option('--webid', action='store', dest='web_id', type=int, + help='mailchimp list web id'), + ) + + def parse_options(self, options): + """Parses `options` of the command.""" + if not options['key']: + raise CommandError('missing key') + + if not options['web_id']: + raise CommandError('missing list web id') + + return options['key'], options['web_id'] + + def handle(self, *args, **options): + """ + Validates that the id passed in exists in mailchimp. + """ + key, web_id = self.parse_options(options) + + mailchimp = MailSnake(key) + + lists = mailchimp.lists()['data'] + by_web_id = {l['web_id']: l for l in lists} + + list_with_id = by_web_id.get(web_id, None) + + if list_with_id: + print "id: {} for web_id: {}".format(list_with_id['id'], web_id) + print "list name: {}".format(list_with_id['name']) + else: + print "list with web_id: {} not found.".format(web_id) + sys.exit(1) diff --git a/lms/djangoapps/mailing/management/commands/mailchimp_sync_announcements.py b/lms/djangoapps/mailing/management/commands/mailchimp_sync_announcements.py new file mode 100644 index 0000000000..718f25b34d --- /dev/null +++ b/lms/djangoapps/mailing/management/commands/mailchimp_sync_announcements.py @@ -0,0 +1,79 @@ +""" +Synchronizes the announcement list with all active students. +""" +import logging +from optparse import make_option + +from django.core.management.base import BaseCommand, CommandError + +from django.contrib.auth.models import User + +from .mailchimp_sync_course import ( + connect_mailchimp, get_cleaned, + get_subscribed, get_unsubscribed, + subscribe_with_data +) + +log = logging.getLogger('edx.mailchimp') + + +class Command(BaseCommand): + """ + Synchronizes the announcement list with all active students. + """ + args = '' + help = 'Synchronizes the announcement list with all active students.' + + option_list = BaseCommand.option_list + ( + make_option('--key', action='store', help='mailchimp api key'), + make_option('--list', action='store', dest='list_id', + help='mailchimp list id'), + ) + + def parse_options(self, options): + """Parses `options` of the command.""" + if not options['key']: + raise CommandError('missing key') + + if not options['list_id']: + raise CommandError('missing list id') + + return (options['key'], options['list_id']) + + def handle(self, *args, **options): + key, list_id = self.parse_options(options) + + log.info('Syncronizing announcement mailing list') + + mailchimp = connect_mailchimp(key) + + subscribed = get_subscribed(mailchimp, list_id) + unsubscribed = get_unsubscribed(mailchimp, list_id) + cleaned = get_cleaned(mailchimp, list_id) + non_subscribed = unsubscribed.union(cleaned) + + enrolled = get_enrolled() + exclude = subscribed.union(non_subscribed) + to_subscribe = get_data(enrolled, exclude=exclude) + + subscribe_with_data(mailchimp, list_id, to_subscribe) + + +def get_enrolled(): + """ + Filter out all users who signed up via a Microsite, which UserSignupSource tracks + """ + ## TODO (Feanil) This grabs all inactive students and MUST be changed (or, could exclude inactive users in get_data) + return User.objects.raw('SELECT * FROM auth_user where id not in (SELECT user_id from student_usersignupsource)') + + +def get_data(users, exclude=None): + """ + users: set of Django users + exclude [optional]: set of Django users to exclude + + returns: {'EMAIL': u.email} for all users in users less those in `exclude` + """ + exclude = exclude if exclude else set() + emails = (u.email for u in users) + return ({'EMAIL': e} for e in emails if e not in exclude) diff --git a/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py b/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py new file mode 100644 index 0000000000..3a1919fbbe --- /dev/null +++ b/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py @@ -0,0 +1,368 @@ +""" +Synchronizes a mailchimp list with the students of a course. +""" +import logging +import math +import random +import itertools +from itertools import chain +from optparse import make_option +from collections import namedtuple + +from django.core.management.base import BaseCommand, CommandError + +from mailsnake import MailSnake + +from student.models import UserProfile, unique_id_for_user +from opaque_keys.edx.keys import CourseKey + + +BATCH_SIZE = 15000 +# If you try to subscribe with too many users at once +# the transaction times out on the mailchimp side. +SUBSCRIBE_BATCH_SIZE = 1000 + +log = logging.getLogger('edx.mailchimp') + +FIELD_TYPES = {'EDX_ID': 'text'} + + +class Command(BaseCommand): + """ + Synchronizes a mailchimp list with the students of a course. + """ + args = '' + help = 'Synchronizes a mailchimp list with the students of a course.' + + option_list = BaseCommand.option_list + ( + make_option('--key', action='store', help='mailchimp api key'), + make_option('--list', action='store', dest='list_id', + help='mailchimp list id'), + make_option('--course', action='store', dest='course_id', + help='xmodule course_id'), + + make_option('--segments', action='store', dest='segments', + default=0, type=int, + help='number of static random segments to create'), + ) + + def parse_options(self, options): + """Parses `options` of the command.""" + if not options['key']: + raise CommandError('missing key') + + if not options['list_id']: + raise CommandError('missing list id') + + if not options['course_id']: + raise CommandError('missing course id') + + return (options['key'], options['list_id'], + options['course_id'], options['segments']) + + def handle(self, *args, **options): + """Synchronizes a mailchimp list with the students of a course.""" + key, list_id, course_id, nsegments = self.parse_options(options) + + log.info('Syncronizing email list for %s', course_id) + + mailchimp = connect_mailchimp(key) + + subscribed = get_subscribed(mailchimp, list_id) + unsubscribed = get_unsubscribed(mailchimp, list_id) + cleaned = get_cleaned(mailchimp, list_id) + non_subscribed = unsubscribed.union(cleaned) + + enrolled = get_enrolled_students(course_id) + + exclude = subscribed.union(non_subscribed) + to_subscribe = get_student_data(enrolled, exclude=exclude) + + tag_names = set(chain.from_iterable(d.keys() for d in to_subscribe)) + update_merge_tags(mailchimp, list_id, tag_names) + + subscribe_with_data(mailchimp, list_id, to_subscribe) + + enrolled_emails = set(enrolled.values_list('user__email', flat=True)) + non_enrolled_emails = list(subscribed.difference(enrolled_emails)) + + unsubscribe(mailchimp, list_id, non_enrolled_emails) + + subscribed = subscribed.union(set(d['EMAIL'] for d in to_subscribe)) + make_segments(mailchimp, list_id, nsegments, subscribed) + + +def connect_mailchimp(api_key): + """ + Initializes connection to the mailchimp api + """ + mailchimp = MailSnake(api_key) + result = mailchimp.ping() + log.debug(result) + + return mailchimp + + +def verify_list(mailchimp, list_id, course_id): + """ + Verifies that the given list_id corresponds to the course_id + Returns boolean: whether or not course_id matches list_id + """ + lists = mailchimp.lists(filters={'list_id': list_id})['data'] + + if len(lists) != 1: + log.error('incorrect list id') + return False + + list_name = lists[0]['name'] + + log.debug('list name: %s', list_name) + + # check that we are connecting to the correct list + parts = course_id.replace('_', ' ').replace('/', ' ').split() + count = sum(1 for p in parts if p in list_name) + if count < 3: + log.info(course_id) + log.info(list_name) + log.error('course_id does not match list name') + return False + + return True + + +def get_student_data(students, exclude=None): + """ + Given a QuerySet of Django users, extracts id, username, and is_anonymous data. + Excludes any users provided in the optional `exclude` set. + + Returns a list of dictionaries for each user, where the dictionary has keys + 'EMAIL', 'FULLNAME', and 'EDX_ID'. + """ + # To speed the query, we won't retrieve the full User object, only + # two of its values. The namedtuple simulates the User object. + FakeUser = namedtuple('Fake', 'id username is_anonymous') # pylint: disable=invalid-name + + exclude = exclude if exclude else set() + + def make(svalue): + """ + Given a User value entry `svalue`, extracts the student's email and fullname, + and provides a unique id for the user. + + Returns a dictionary with keys 'EMAIL', 'FULLNAME', and 'EDX_ID'. + """ + fake_user = FakeUser(svalue['user_id'], svalue['user__username'], lambda: True) + + entry = { + 'EMAIL': svalue['user__email'], + 'FULLNAME': svalue['name'].title(), + 'EDX_ID': unique_id_for_user(fake_user) + } + + return entry + + fields = 'user__email', 'name', 'user_id', 'user__username' + values = students.values(*fields) + + # TODO: Since `students` is a QuerySet, can we chain a filter here that would be more + # performant than calling a lambda for every user? + exclude_func = lambda s: s['user__email'] in exclude + return [make(s) for s in values if not exclude_func(s)] + + +def get_enrolled_students(course_id): + """ + Given a course_id, returns a QuerySet of all the active students + in the course. + """ + objects = UserProfile.objects + course_key = CourseKey.from_string(course_id) + students = objects.filter(user__courseenrollment__course_id=course_key, + user__courseenrollment__is_active=True) + return students + + +def get_subscribed(mailchimp, list_id): + """Returns a set of email addresses subscribed to `list_id`""" + return get_members(mailchimp, list_id, 'subscribed') + + +def get_unsubscribed(mailchimp, list_id): + """Returns a set of email addresses that have unsubscribed from `list_id`""" + return get_members(mailchimp, list_id, 'unsubscribed') + + +def get_cleaned(mailchimp, list_id): + """ + Returns a set of email addresses that have been cleaned from `list_id` + + These email addresses may be invalid or have caused bounces, so you don't want + to re-add them back to the list. + """ + return get_members(mailchimp, list_id, 'cleaned') + + +def get_members(mailchimp, list_id, status): + """ + Given a mailchimp list id and a user status to filter on, returns all + members of the mailchimp list with that status. + + Returns a set of email addresses. + """ + mc_get_members = mailchimp.listMembers + members = set() + + for page in itertools.count(): + response = mc_get_members(id=list_id, + status=status, + start=page, + limit=BATCH_SIZE) + data = response.get('data', []) + + if not data: + break + + members.update(d['email'] for d in data) + + return members + + +def unsubscribe(mailchimp, list_id, emails): + """ + Batch unsubscribe the given email addresses from the list represented + by `list_id` + """ + batch_unsubscribe = mailchimp.listBatchUnsubscribe + result = batch_unsubscribe(id=list_id, + emails=emails, + send_goodbye=False, + delete_member=False) + log.debug(result) + + +def update_merge_tags(mailchimp, list_id, tag_names): + """ + This function is rather inscrutable. Given tag_names, which + in this code seems to be a list of ['FULLNAME', 'EMAIL', 'EDX_ID'], + we grab tags from the mailchimp list, then we verify tag_names has + 'FULLNAME' and 'EMAIL' present, we get more data from mailchimp, then + sync the variables up to mailchimp using `listMergeVarAdd`. + + The purpose of this function is unclear. + """ + mc_vars = mailchimp.listMergeVars(id=list_id) + mc_names = set(v['name'] for v in mc_vars) + + mc_merge = mailchimp.listMergeVarAdd + + tags = [v['tag'] for v in mc_vars] + + for name in tag_names: + tag = name_to_tag(name) + + # verify FULLNAME is present + # TODO: Why is this under the for loop? It does nothing with the loop + # variable and seems like things would work if this was executed before or + # after the loop. + if 'FULLNAME' not in tags: + result = mc_merge(id=list_id, + tag='FULLNAME', + name='Full Name', + options={'field_type': 'text', + 'public': False}) + tags.append('FULLNAME') + log.debug(result) + + # add extra tags if not present + if name not in mc_names and tag not in ['EMAIL', 'FULLNAME']: + ftype = FIELD_TYPES.get(name, 'number') + result = mc_merge(id=list_id, + tag=tag, + name=name, + options={'field_type': ftype, + 'public': False}) + tags.append(tag) + log.debug(result) + + +def subscribe_with_data(mailchimp, list_id, user_data): + """ + Given user_data in the form of a list of dictionaries for each user, + where the dictionary has keys 'EMAIL', 'FULLNAME', and 'EDX_ID', batch + subscribe the users to the given `list_id` via a Mailchimp api method. + + Returns None + """ + format_entry = lambda e: {name_to_tag(k): v for k, v in e.iteritems()} + formated_data = list(format_entry(e) for e in user_data) + + # send the updates in batches of a fixed size + for batch in chunk(formated_data, SUBSCRIBE_BATCH_SIZE): + result = mailchimp.listBatchSubscribe(id=list_id, + batch=batch, + double_optin=False, + update_existing=True) + + log.debug( + "Added: %s Error on: %s", result['add_count'], result['error_count'] + ) + + +def make_segments(mailchimp, list_id, count, emails): + """ + Segments the list of email addresses `emails` into `count` segments, + if count is nonzero. + + For unknown historical reasons, lost to the winds of time, this is done with + a random order to the email addresses. + + First, existing 'random_' mailchimp segments are deleted. + + Then, the list of emails (the whole, large list) is shuffled. + + Finally, the shuffled emails are chunked into `count` segments and re-uploaded + to mailchimp as 'random_'-prefixed segments. + """ + if count > 0: + # reset segments + segments = mailchimp.listStaticSegments(id=list_id) + for seg in segments: + if seg['name'].startswith('random'): + mailchimp.listStaticSegmentDel(id=list_id, seg_id=seg['id']) + + # shuffle and split emails + emails = list(emails) + random.shuffle(emails) # Why do we do this? + + chunk_size = int(math.ceil(float(len(emails)) / count)) + chunks = list(chunk(emails, chunk_size)) + + # create segments and add emails + for seg in xrange(count): + name = 'random_{0:002}'.format(seg) + seg_id = mailchimp.listStaticSegmentAdd(id=list_id, name=name) + for batch in chunk(chunks[seg], BATCH_SIZE): + mailchimp.listStaticSegmentMembersAdd( + id=list_id, + seg_id=seg_id, + batch=batch + ) + + +def name_to_tag(name): + """ + Returns sanitized str `name`: no more than 10 characters, + with spaces replaced with `_` + """ + if len(name) > 10: + name = name[:10] + return name.replace(' ', '_').strip() + + +def chunk(elist, size): + """ + Generator. Yields a list of size `size` of the given list `elist`, + or a shorter list if at the end of the input. + """ + for i in xrange(0, len(elist), size): + yield elist[i:i + size] diff --git a/lms/envs/common.py b/lms/envs/common.py index d349d691e9..ac4e49cb7a 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -1722,6 +1722,9 @@ INSTALLED_APPS = ( 'openedx.core.djangoapps.content.course_structures', 'course_structure_api', + # Mailchimp Syncing + 'mailing', + # CORS and cross-domain CSRF 'corsheaders', 'cors_csrf', diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt index a3b6cdaeac..4656174ec2 100644 --- a/requirements/edx/base.txt +++ b/requirements/edx/base.txt @@ -149,3 +149,6 @@ testtools==0.9.34 # Used for Segment.io analytics analytics-python==0.4.4 + +# Needed for mailchimp(mailing djangoapp) +mailsnake==1.6.2