From 5c0adf04ad50355755b5c1bbe53f771882b3736b Mon Sep 17 00:00:00 2001 From: Feanil Patel Date: Thu, 19 Feb 2015 16:25:17 -0500 Subject: [PATCH 1/6] Move mailchimp to a newer version of the edxapp. --- lms/djangoapps/mailing/__init__.py | 0 lms/djangoapps/mailing/management/__init__.py | 0 .../mailing/management/commands/__init__.py | 0 .../management/commands/mailchimp_id.py | 43 +++ .../commands/mailchimp_sync_announcements.py | 69 +++++ .../commands/mailchimp_sync_course.py | 274 ++++++++++++++++++ 6 files changed, 386 insertions(+) create mode 100644 lms/djangoapps/mailing/__init__.py create mode 100644 lms/djangoapps/mailing/management/__init__.py create mode 100644 lms/djangoapps/mailing/management/commands/__init__.py create mode 100644 lms/djangoapps/mailing/management/commands/mailchimp_id.py create mode 100644 lms/djangoapps/mailing/management/commands/mailchimp_sync_announcements.py create mode 100644 lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py diff --git a/lms/djangoapps/mailing/__init__.py b/lms/djangoapps/mailing/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lms/djangoapps/mailing/management/__init__.py b/lms/djangoapps/mailing/management/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lms/djangoapps/mailing/management/commands/__init__.py b/lms/djangoapps/mailing/management/commands/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lms/djangoapps/mailing/management/commands/mailchimp_id.py b/lms/djangoapps/mailing/management/commands/mailchimp_id.py new file mode 100644 index 0000000000..4d440be3ac --- /dev/null +++ b/lms/djangoapps/mailing/management/commands/mailchimp_id.py @@ -0,0 +1,43 @@ +import sys +from optparse import make_option + +from django.core.management.base import BaseCommand, CommandError + +from mailsnake import MailSnake + + +class Command(BaseCommand): + args = '' + help = 'Get the list id from a web_id' + + option_list = BaseCommand.option_list + ( + make_option('--key', action='store', help='mailchimp api key'), + make_option('--webid', action='store', dest='web_id', type=int, + help='mailchimp list web id'), + ) + + def parse_options(self, options): + if not options['key']: + raise CommandError('missing key') + + if not options['web_id']: + raise CommandError('missing list web id') + + return options['key'], options['web_id'] + + def handle(self, *args, **options): + key, web_id = self.parse_options(options) + + mailchimp = MailSnake(key) + + lists = mailchimp.lists()['data'] + by_web_id = {l['web_id']: l for l in lists} + + list_with_id = by_web_id.get(web_id, None) + + if list_with_id: + print "id: {} for web_id: {}".format(list_with_id['id'], web_id) + print "list name: {}".format(list_with_id['name']) + else: + print "list with web_id: {} not found.".format(web_id) + sys.exit(1) diff --git a/lms/djangoapps/mailing/management/commands/mailchimp_sync_announcements.py b/lms/djangoapps/mailing/management/commands/mailchimp_sync_announcements.py new file mode 100644 index 0000000000..bc6c86323b --- /dev/null +++ b/lms/djangoapps/mailing/management/commands/mailchimp_sync_announcements.py @@ -0,0 +1,69 @@ +import logging +import math +import random +import itertools +from itertools import chain +from optparse import make_option +from collections import namedtuple + + +from django.core.management.base import BaseCommand, CommandError + +from mailsnake import MailSnake + +from django.contrib.auth.models import User + +from .mailchimp_sync_course import (connect_mailchimp, get_subscribed, + get_unsubscribed, get_cleaned, + subscribe_with_data) + +log = logging.getLogger('edx.mailchimp') + + +class Command(BaseCommand): + args = '' + help = 'Synchronizes a mailchimp list with the students of a course.' + + option_list = BaseCommand.option_list + ( + make_option('--key', action='store', help='mailchimp api key'), + make_option('--list', action='store', dest='list_id', + help='mailchimp list id'), + ) + + def parse_options(self, options): + if not options['key']: + raise CommandError('missing key') + + if not options['list_id']: + raise CommandError('missing list id') + + return (options['key'], options['list_id']) + + def handle(self, *args, **options): + key, list_id = self.parse_options(options) + + log.info('Syncronizing announcement mailing list') + + mailchimp = connect_mailchimp(key) + + subscribed = get_subscribed(mailchimp, list_id) + unsubscribed = get_unsubscribed(mailchimp, list_id) + cleaned = get_cleaned(mailchimp, list_id) + non_subscribed = unsubscribed.union(cleaned) + + enrolled = get_enrolled() + exclude = subscribed.union(non_subscribed) + to_subscribe = get_data(enrolled, exclude=exclude) + + subscribe_with_data(mailchimp, list_id, to_subscribe) + + +def get_enrolled(): + # cdodge: filter out all users who signed up via a Microsite, which UserSignupSource tracks + return User.objects.raw('SELECT * FROM auth_user where id not in (SELECT user_id from student_usersignupsource)') + + +def get_data(users, exclude=None): + exclude = exclude if exclude else set() + emails = (u.email for u in users) + return ({'EMAIL': e} for e in emails if e not in exclude) diff --git a/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py b/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py new file mode 100644 index 0000000000..f232f799ca --- /dev/null +++ b/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py @@ -0,0 +1,274 @@ +import logging +import math +import random +import time +import itertools +from itertools import chain +from optparse import make_option +from collections import namedtuple + + +from django.core.management.base import BaseCommand, CommandError + +from mailsnake import MailSnake + +from student.models import UserProfile, unique_id_for_user + + +BATCH_SIZE = 15000 +# If you try to subscribe with too many users at once +# the transaction times out on the mailchimp side. +SUBSCRIBE_BATCH_SIZE = 1000 + +log = logging.getLogger('edx.mailchimp') + +FIELD_TYPES = {'EDX_ID': 'text'} + + +class Command(BaseCommand): + args = '' + help = 'Synchronizes a mailchimp list with the students of a course.' + + option_list = BaseCommand.option_list + ( + make_option('--key', action='store', help='mailchimp api key'), + make_option('--list', action='store', dest='list_id', + help='mailchimp list id'), + make_option('--course', action='store', dest='course_id', + help='xmodule course_id'), + + make_option('--segments', action='store', dest='segments', + default=0, type=int, + help='number of static random segments to create'), + ) + + def parse_options(self, options): + if not options['key']: + raise CommandError('missing key') + + if not options['list_id']: + raise CommandError('missing list id') + + if not options['course_id']: + raise CommandError('missing course id') + + return (options['key'], options['list_id'], + options['course_id'], options['segments']) + + def handle(self, *args, **options): + key, list_id, course_id, nsegments = self.parse_options(options) + + log.info('Syncronizing email list for {0}'.format(course_id)) + + mailchimp = connect_mailchimp(key) + + # if not verify_list(mailchimp, list_id, course_id): + # raise CommandError('course_id does not match list name') + + subscribed = get_subscribed(mailchimp, list_id) + unsubscribed = get_unsubscribed(mailchimp, list_id) + cleaned = get_cleaned(mailchimp, list_id) + non_subscribed = unsubscribed.union(cleaned) + + enrolled = get_enrolled_students(course_id) + + exclude = subscribed.union(non_subscribed) + to_subscribe = get_student_data(enrolled, exclude=exclude) + + tag_names = set(chain.from_iterable(d.keys() for d in to_subscribe)) + update_merge_tags(mailchimp, list_id, tag_names) + + subscribe_with_data(mailchimp, list_id, to_subscribe) + + enrolled_emails = set(enrolled.values_list('user__email', flat=True)) + non_enrolled_emails = list(subscribed.difference(enrolled_emails)) + + unsubscribe(mailchimp, list_id, non_enrolled_emails) + + subscribed = subscribed.union(set(d['EMAIL'] for d in to_subscribe)) + make_segments(mailchimp, list_id, nsegments, subscribed) + + +def connect_mailchimp(key): + mailchimp = MailSnake(key) + result = mailchimp.ping() + log.debug(result) + + return mailchimp + + +def verify_list(mailchimp, list_id, course_id): + lists = mailchimp.lists(filters={'list_id': list_id})['data'] + + if len(lists) != 1: + log.error('incorrect list id') + return False + + list_name = lists[0]['name'] + + log.debug('list name: %s' % list_name) + + # check that we are connecting to the correct list + parts = course_id.replace('_', ' ').replace('/', ' ').split() + count = sum(1 for p in parts if p in list_name) + if count < 3: + log.info(course_id) + log.info(list_name) + log.error('course_id does not match list name') + return False + + return True + + +def get_student_data(students, exclude=None): + # To speed the query, we won't retrieve the full User object, only + # two of its values. The namedtuple simulates the User object. + FakeUser = namedtuple('Fake', 'id username') + + exclude = exclude if exclude else set() + + def make(v): + e = {'EMAIL': v['user__email'], + 'FULLNAME': v['name'].title()} + + e['EDX_ID'] = unique_id_for_user(FakeUser(v['user_id'], + v['user__username'])) + return e + + fields = 'user__email', 'name', 'user_id', 'user__username' + values = students.values(*fields) + + exclude_func = lambda s: s['user__email'] in exclude + return [make(s) for s in values if not exclude_func(s)] + + +def get_enrolled_students(course_id): + objects = UserProfile.objects + students = objects.filter(user__courseenrollment__course_id=course_id, + user__courseenrollment__is_active=True) + return students + + +def get_subscribed(mailchimp, list_id): + return get_members(mailchimp, list_id, 'subscribed') + + +def get_unsubscribed(mailchimp, list_id): + return get_members(mailchimp, list_id, 'unsubscribed') + + +def get_cleaned(mailchimp, list_id): + return get_members(mailchimp, list_id, 'cleaned') + + +def get_members(mailchimp, list_id, status): + mc_get_members = mailchimp.listMembers + members = set() + + for page in itertools.count(): + response = mc_get_members(id=list_id, + status=status, + start=page, + limit=BATCH_SIZE) + data = response.get('data', []) + + if not data: + break + + members.update(d['email'] for d in data) + + return members + + +def unsubscribe(mailchimp, list_id, emails): + batch_unsubscribe = mailchimp.listBatchUnsubscribe + result = batch_unsubscribe(id=list_id, + emails=emails, + send_goodbye=False, + delete_member=False) + log.debug(result) + + +def update_merge_tags(mailchimp, list_id, tag_names): + mc_vars = mailchimp.listMergeVars(id=list_id) + mc_names = set(v['name'] for v in mc_vars) + + mc_merge = mailchimp.listMergeVarAdd + + tags = [v['tag'] for v in mc_vars] + + for name in tag_names: + tag = name_to_tag(name) + + # verify FULLNAME is present + if 'FULLNAME' not in tags: + result = mc_merge(id=list_id, + tag='FULLNAME', + name='Full Name', + options={'field_type': 'text', + 'public': False}) + tags.append('FULLNAME') + log.debug(result) + + # add extra tags if not present + if name not in mc_names and tag not in ['EMAIL', 'FULLNAME']: + ftype = FIELD_TYPES.get(name, 'number') + result = mc_merge(id=list_id, + tag=tag, + name=name, + options={'field_type': ftype, + 'public': False}) + tags.append(tag) + log.debug(result) + + +def subscribe_with_data(mailchimp, list_id, user_data): + format_entry = lambda e: {name_to_tag(k): v for k, v in e.iteritems()} + formated_data = list(format_entry(e) for e in user_data) + + # send the updates in batches of a fixed size + for batch in batches(formated_data, SUBSCRIBE_BATCH_SIZE): + result = mailchimp.listBatchSubscribe(id=list_id, + batch=batch, + double_optin=False, + update_existing=True) + log.debug("Added: {} Error on: {}".format( + result['add_count'], resurt['error_count'])) + + +def make_segments(mailchimp, list_id, count, emails): + if count > 0: + # reset segments + segments = mailchimp.listStaticSegments(id=list_id) + for s in segments: + if s['name'].startswith('random'): + mailchimp.listStaticSegmentDel(id=list_id, seg_id=s['id']) + + # shuffle and split emails + emails = list(emails) + random.shuffle(emails) + + chunk_size = int(math.ceil(float(len(emails))/count)) + chunks = list(chunk(emails, chunk_size)) + + # create segments and add emails + for n in xrange(count): + name = 'random_{0:002}'.format(n) + seg_id = mailchimp.listStaticSegmentAdd(id=list_id, name=name) + for batch in batches(chunks[n], BATCH_SIZE): + mailchimp.listStaticSegmentMembersAdd(id=list_id, + seg_id=seg_id, + batch=batch) + + +def name_to_tag(name): + return (name[:10] if len(name) > 10 else name).replace(' ', '_').strip() + + +def batches(iterable, size): + slices = range(0, len(iterable), size) + return [iterable[slice(i, i + size)] for i in slices] + + +def chunk(l, n): + for i in xrange(0, len(l), n): + yield l[i:i+n] From 934207bf038a445a81d314a3ee86c1bc29ca810a Mon Sep 17 00:00:00 2001 From: Feanil Patel Date: Fri, 20 Feb 2015 09:24:09 -0500 Subject: [PATCH 2/6] Fixes to get it working with master. --- .../management/commands/mailchimp_sync_course.py | 13 ++++++++----- lms/envs/common.py | 3 +++ requirements/edx/base.txt | 3 +++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py b/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py index f232f799ca..60334eaab6 100644 --- a/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py +++ b/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py @@ -13,6 +13,7 @@ from django.core.management.base import BaseCommand, CommandError from mailsnake import MailSnake from student.models import UserProfile, unique_id_for_user +from opaque_keys.edx.keys import CourseKey BATCH_SIZE = 15000 @@ -122,7 +123,7 @@ def verify_list(mailchimp, list_id, course_id): def get_student_data(students, exclude=None): # To speed the query, we won't retrieve the full User object, only # two of its values. The namedtuple simulates the User object. - FakeUser = namedtuple('Fake', 'id username') + FakeUser = namedtuple('Fake', 'id username is_anonymous') exclude = exclude if exclude else set() @@ -130,8 +131,9 @@ def get_student_data(students, exclude=None): e = {'EMAIL': v['user__email'], 'FULLNAME': v['name'].title()} - e['EDX_ID'] = unique_id_for_user(FakeUser(v['user_id'], - v['user__username'])) + fake_user = FakeUser(v['user_id'], v['user__username'], lambda:True) + e['EDX_ID'] = unique_id_for_user(fake_user) + return e fields = 'user__email', 'name', 'user_id', 'user__username' @@ -143,7 +145,8 @@ def get_student_data(students, exclude=None): def get_enrolled_students(course_id): objects = UserProfile.objects - students = objects.filter(user__courseenrollment__course_id=course_id, + course_key = CourseKey.from_string(course_id) + students = objects.filter(user__courseenrollment__course_id=course_key, user__courseenrollment__is_active=True) return students @@ -232,7 +235,7 @@ def subscribe_with_data(mailchimp, list_id, user_data): double_optin=False, update_existing=True) log.debug("Added: {} Error on: {}".format( - result['add_count'], resurt['error_count'])) + result['add_count'], result['error_count'])) def make_segments(mailchimp, list_id, count, emails): diff --git a/lms/envs/common.py b/lms/envs/common.py index d349d691e9..ac4e49cb7a 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -1722,6 +1722,9 @@ INSTALLED_APPS = ( 'openedx.core.djangoapps.content.course_structures', 'course_structure_api', + # Mailchimp Syncing + 'mailing', + # CORS and cross-domain CSRF 'corsheaders', 'cors_csrf', diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt index a3b6cdaeac..4656174ec2 100644 --- a/requirements/edx/base.txt +++ b/requirements/edx/base.txt @@ -149,3 +149,6 @@ testtools==0.9.34 # Used for Segment.io analytics analytics-python==0.4.4 + +# Needed for mailchimp(mailing djangoapp) +mailsnake==1.6.2 From d71433e2233405c21fb6c49c51419580d2c72e97 Mon Sep 17 00:00:00 2001 From: Feanil Patel Date: Fri, 20 Feb 2015 17:25:57 -0500 Subject: [PATCH 3/6] Address comments. --- .../mailing/management/commands/mailchimp_sync_course.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py b/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py index 60334eaab6..49b0e07e1c 100644 --- a/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py +++ b/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py @@ -58,13 +58,10 @@ class Command(BaseCommand): def handle(self, *args, **options): key, list_id, course_id, nsegments = self.parse_options(options) - log.info('Syncronizing email list for {0}'.format(course_id)) + log.info('Syncronizing email list for %s', course_id) mailchimp = connect_mailchimp(key) - # if not verify_list(mailchimp, list_id, course_id): - # raise CommandError('course_id does not match list name') - subscribed = get_subscribed(mailchimp, list_id) unsubscribed = get_unsubscribed(mailchimp, list_id) cleaned = get_cleaned(mailchimp, list_id) From f93073401c722a4cb5c658dc7cdb8249f25e1ffd Mon Sep 17 00:00:00 2001 From: Sarina Canelake Date: Mon, 13 Apr 2015 16:05:30 -0400 Subject: [PATCH 4/6] Quality fixups for Mailchimp scripts --- .../management/commands/mailchimp_id.py | 11 +++++ .../commands/mailchimp_sync_announcements.py | 36 +++++++++----- .../commands/mailchimp_sync_course.py | 47 +++++++++++++------ 3 files changed, 66 insertions(+), 28 deletions(-) diff --git a/lms/djangoapps/mailing/management/commands/mailchimp_id.py b/lms/djangoapps/mailing/management/commands/mailchimp_id.py index 4d440be3ac..7159b9478f 100644 --- a/lms/djangoapps/mailing/management/commands/mailchimp_id.py +++ b/lms/djangoapps/mailing/management/commands/mailchimp_id.py @@ -1,3 +1,7 @@ +""" +mailchimp_id: Returns whether or not a given mailchimp key represents +a valid list. +""" import sys from optparse import make_option @@ -7,6 +11,10 @@ from mailsnake import MailSnake class Command(BaseCommand): + """ + Given a mailchimp key, validates that a list with that key + exists in mailchimp. + """ args = '' help = 'Get the list id from a web_id' @@ -26,6 +34,9 @@ class Command(BaseCommand): return options['key'], options['web_id'] def handle(self, *args, **options): + """ + Validates that the id passed in exists in mailchimp. + """ key, web_id = self.parse_options(options) mailchimp = MailSnake(key) diff --git a/lms/djangoapps/mailing/management/commands/mailchimp_sync_announcements.py b/lms/djangoapps/mailing/management/commands/mailchimp_sync_announcements.py index bc6c86323b..e160abfce9 100644 --- a/lms/djangoapps/mailing/management/commands/mailchimp_sync_announcements.py +++ b/lms/djangoapps/mailing/management/commands/mailchimp_sync_announcements.py @@ -1,28 +1,28 @@ +""" +Synchronizes the announcement list with all active students. +""" import logging -import math -import random -import itertools -from itertools import chain from optparse import make_option -from collections import namedtuple - from django.core.management.base import BaseCommand, CommandError -from mailsnake import MailSnake - from django.contrib.auth.models import User -from .mailchimp_sync_course import (connect_mailchimp, get_subscribed, - get_unsubscribed, get_cleaned, - subscribe_with_data) +from .mailchimp_sync_course import ( + connect_mailchimp, get_cleaned, + get_subscribed, get_unsubscribed, + subscribe_with_data +) log = logging.getLogger('edx.mailchimp') class Command(BaseCommand): + """ + Synchronizes the announcement list with all active students. + """ args = '' - help = 'Synchronizes a mailchimp list with the students of a course.' + help = 'Synchronizes the announcement list with all active students.' option_list = BaseCommand.option_list + ( make_option('--key', action='store', help='mailchimp api key'), @@ -59,11 +59,21 @@ class Command(BaseCommand): def get_enrolled(): - # cdodge: filter out all users who signed up via a Microsite, which UserSignupSource tracks + """ + Filter out all users who signed up via a Microsite, which UserSignupSource tracks + """ + ## TODO (Feanil) This grabs all INactive students and MUST be changed + ## TODO (Feanil) blame cdodge for your troubles return User.objects.raw('SELECT * FROM auth_user where id not in (SELECT user_id from student_usersignupsource)') def get_data(users, exclude=None): + """ + users: set of Django users + exclude [optional]: set of Django users to exclude + + returns: {'EMAIL': u.email} for all users in users less those in `exclude` + """ exclude = exclude if exclude else set() emails = (u.email for u in users) return ({'EMAIL': e} for e in emails if e not in exclude) diff --git a/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py b/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py index 49b0e07e1c..973f21fb7d 100644 --- a/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py +++ b/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py @@ -1,7 +1,9 @@ +""" +Synchronizes a mailchimp list with the students of a course. +""" import logging import math import random -import time import itertools from itertools import chain from optparse import make_option @@ -27,6 +29,9 @@ FIELD_TYPES = {'EDX_ID': 'text'} class Command(BaseCommand): + """ + Synchronizes a mailchimp list with the students of a course. + """ args = '' help = 'Synchronizes a mailchimp list with the students of a course.' @@ -56,6 +61,7 @@ class Command(BaseCommand): options['course_id'], options['segments']) def handle(self, *args, **options): + """Synchronizes a mailchimp list with the students of a course.""" key, list_id, course_id, nsegments = self.parse_options(options) log.info('Syncronizing email list for %s', course_id) @@ -86,8 +92,11 @@ class Command(BaseCommand): make_segments(mailchimp, list_id, nsegments, subscribed) -def connect_mailchimp(key): - mailchimp = MailSnake(key) +def connect_mailchimp(api_key): + """ + Initializes connection to the mailchimp api + """ + mailchimp = MailSnake(api_key) result = mailchimp.ping() log.debug(result) @@ -95,6 +104,10 @@ def connect_mailchimp(key): def verify_list(mailchimp, list_id, course_id): + """ + Verifies that the given list_id corresponds to the course_id + Returns boolean: whether or not course_id matches list_id + """ lists = mailchimp.lists(filters={'list_id': list_id})['data'] if len(lists) != 1: @@ -103,7 +116,7 @@ def verify_list(mailchimp, list_id, course_id): list_name = lists[0]['name'] - log.debug('list name: %s' % list_name) + log.debug('list name: %s', list_name) # check that we are connecting to the correct list parts = course_id.replace('_', ' ').replace('/', ' ').split() @@ -128,7 +141,7 @@ def get_student_data(students, exclude=None): e = {'EMAIL': v['user__email'], 'FULLNAME': v['name'].title()} - fake_user = FakeUser(v['user_id'], v['user__username'], lambda:True) + fake_user = FakeUser(v['user_id'], v['user__username'], lambda: True) e['EDX_ID'] = unique_id_for_user(fake_user) return e @@ -231,8 +244,10 @@ def subscribe_with_data(mailchimp, list_id, user_data): batch=batch, double_optin=False, update_existing=True) - log.debug("Added: {} Error on: {}".format( - result['add_count'], result['error_count'])) + + log.debug( + "Added: %s Error on: %s", result['add_count'], result['error_count'] + ) def make_segments(mailchimp, list_id, count, emails): @@ -247,17 +262,19 @@ def make_segments(mailchimp, list_id, count, emails): emails = list(emails) random.shuffle(emails) - chunk_size = int(math.ceil(float(len(emails))/count)) + chunk_size = int(math.ceil(float(len(emails)) / count)) chunks = list(chunk(emails, chunk_size)) # create segments and add emails - for n in xrange(count): - name = 'random_{0:002}'.format(n) + for seg in xrange(count): + name = 'random_{0:002}'.format(seg) seg_id = mailchimp.listStaticSegmentAdd(id=list_id, name=name) - for batch in batches(chunks[n], BATCH_SIZE): - mailchimp.listStaticSegmentMembersAdd(id=list_id, - seg_id=seg_id, - batch=batch) + for batch in batches(chunks[seg], BATCH_SIZE): + mailchimp.listStaticSegmentMembersAdd( + id=list_id, + seg_id=seg_id, + batch=batch + ) def name_to_tag(name): @@ -271,4 +288,4 @@ def batches(iterable, size): def chunk(l, n): for i in xrange(0, len(l), n): - yield l[i:i+n] + yield l[i:i + n] From 2d3dab99365a18d3b069be973b08ec52554598db Mon Sep 17 00:00:00 2001 From: Sarina Canelake Date: Mon, 13 Apr 2015 20:17:08 -0400 Subject: [PATCH 5/6] Remove 'batches' function, since == to 'chunk' --- .../commands/mailchimp_sync_course.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py b/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py index 973f21fb7d..ac1e4b04f1 100644 --- a/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py +++ b/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py @@ -239,7 +239,7 @@ def subscribe_with_data(mailchimp, list_id, user_data): formated_data = list(format_entry(e) for e in user_data) # send the updates in batches of a fixed size - for batch in batches(formated_data, SUBSCRIBE_BATCH_SIZE): + for batch in chunk(formated_data, SUBSCRIBE_BATCH_SIZE): result = mailchimp.listBatchSubscribe(id=list_id, batch=batch, double_optin=False, @@ -269,7 +269,7 @@ def make_segments(mailchimp, list_id, count, emails): for seg in xrange(count): name = 'random_{0:002}'.format(seg) seg_id = mailchimp.listStaticSegmentAdd(id=list_id, name=name) - for batch in batches(chunks[seg], BATCH_SIZE): + for batch in chunk(chunks[seg], BATCH_SIZE): mailchimp.listStaticSegmentMembersAdd( id=list_id, seg_id=seg_id, @@ -281,11 +281,10 @@ def name_to_tag(name): return (name[:10] if len(name) > 10 else name).replace(' ', '_').strip() -def batches(iterable, size): - slices = range(0, len(iterable), size) - return [iterable[slice(i, i + size)] for i in slices] - - -def chunk(l, n): - for i in xrange(0, len(l), n): - yield l[i:i + n] +def chunk(elist, size): + """ + Generator. Yields a list of size `size` of the given list `elist`, + or a shorter list if at the end of the input. + """ + for i in xrange(0, len(elist), size): + yield elist[i:i + size] From b2d9d339d1ea7da81236363d2b567bb1986f63bb Mon Sep 17 00:00:00 2001 From: Sarina Canelake Date: Tue, 14 Apr 2015 13:01:49 -0400 Subject: [PATCH 6/6] Docstrings for mailchimp scripts --- .../management/commands/mailchimp_id.py | 1 + .../commands/mailchimp_sync_announcements.py | 4 +- .../commands/mailchimp_sync_course.py | 106 +++++++++++++++--- 3 files changed, 95 insertions(+), 16 deletions(-) diff --git a/lms/djangoapps/mailing/management/commands/mailchimp_id.py b/lms/djangoapps/mailing/management/commands/mailchimp_id.py index 7159b9478f..922d10b80d 100644 --- a/lms/djangoapps/mailing/management/commands/mailchimp_id.py +++ b/lms/djangoapps/mailing/management/commands/mailchimp_id.py @@ -25,6 +25,7 @@ class Command(BaseCommand): ) def parse_options(self, options): + """Parses `options` of the command.""" if not options['key']: raise CommandError('missing key') diff --git a/lms/djangoapps/mailing/management/commands/mailchimp_sync_announcements.py b/lms/djangoapps/mailing/management/commands/mailchimp_sync_announcements.py index e160abfce9..718f25b34d 100644 --- a/lms/djangoapps/mailing/management/commands/mailchimp_sync_announcements.py +++ b/lms/djangoapps/mailing/management/commands/mailchimp_sync_announcements.py @@ -31,6 +31,7 @@ class Command(BaseCommand): ) def parse_options(self, options): + """Parses `options` of the command.""" if not options['key']: raise CommandError('missing key') @@ -62,8 +63,7 @@ def get_enrolled(): """ Filter out all users who signed up via a Microsite, which UserSignupSource tracks """ - ## TODO (Feanil) This grabs all INactive students and MUST be changed - ## TODO (Feanil) blame cdodge for your troubles + ## TODO (Feanil) This grabs all inactive students and MUST be changed (or, could exclude inactive users in get_data) return User.objects.raw('SELECT * FROM auth_user where id not in (SELECT user_id from student_usersignupsource)') diff --git a/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py b/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py index ac1e4b04f1..3a1919fbbe 100644 --- a/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py +++ b/lms/djangoapps/mailing/management/commands/mailchimp_sync_course.py @@ -9,7 +9,6 @@ from itertools import chain from optparse import make_option from collections import namedtuple - from django.core.management.base import BaseCommand, CommandError from mailsnake import MailSnake @@ -48,6 +47,7 @@ class Command(BaseCommand): ) def parse_options(self, options): + """Parses `options` of the command.""" if not options['key']: raise CommandError('missing key') @@ -131,29 +131,50 @@ def verify_list(mailchimp, list_id, course_id): def get_student_data(students, exclude=None): + """ + Given a QuerySet of Django users, extracts id, username, and is_anonymous data. + Excludes any users provided in the optional `exclude` set. + + Returns a list of dictionaries for each user, where the dictionary has keys + 'EMAIL', 'FULLNAME', and 'EDX_ID'. + """ # To speed the query, we won't retrieve the full User object, only # two of its values. The namedtuple simulates the User object. - FakeUser = namedtuple('Fake', 'id username is_anonymous') + FakeUser = namedtuple('Fake', 'id username is_anonymous') # pylint: disable=invalid-name exclude = exclude if exclude else set() - def make(v): - e = {'EMAIL': v['user__email'], - 'FULLNAME': v['name'].title()} + def make(svalue): + """ + Given a User value entry `svalue`, extracts the student's email and fullname, + and provides a unique id for the user. - fake_user = FakeUser(v['user_id'], v['user__username'], lambda: True) - e['EDX_ID'] = unique_id_for_user(fake_user) + Returns a dictionary with keys 'EMAIL', 'FULLNAME', and 'EDX_ID'. + """ + fake_user = FakeUser(svalue['user_id'], svalue['user__username'], lambda: True) - return e + entry = { + 'EMAIL': svalue['user__email'], + 'FULLNAME': svalue['name'].title(), + 'EDX_ID': unique_id_for_user(fake_user) + } + + return entry fields = 'user__email', 'name', 'user_id', 'user__username' values = students.values(*fields) + # TODO: Since `students` is a QuerySet, can we chain a filter here that would be more + # performant than calling a lambda for every user? exclude_func = lambda s: s['user__email'] in exclude return [make(s) for s in values if not exclude_func(s)] def get_enrolled_students(course_id): + """ + Given a course_id, returns a QuerySet of all the active students + in the course. + """ objects = UserProfile.objects course_key = CourseKey.from_string(course_id) students = objects.filter(user__courseenrollment__course_id=course_key, @@ -162,18 +183,32 @@ def get_enrolled_students(course_id): def get_subscribed(mailchimp, list_id): + """Returns a set of email addresses subscribed to `list_id`""" return get_members(mailchimp, list_id, 'subscribed') def get_unsubscribed(mailchimp, list_id): + """Returns a set of email addresses that have unsubscribed from `list_id`""" return get_members(mailchimp, list_id, 'unsubscribed') def get_cleaned(mailchimp, list_id): + """ + Returns a set of email addresses that have been cleaned from `list_id` + + These email addresses may be invalid or have caused bounces, so you don't want + to re-add them back to the list. + """ return get_members(mailchimp, list_id, 'cleaned') def get_members(mailchimp, list_id, status): + """ + Given a mailchimp list id and a user status to filter on, returns all + members of the mailchimp list with that status. + + Returns a set of email addresses. + """ mc_get_members = mailchimp.listMembers members = set() @@ -193,6 +228,10 @@ def get_members(mailchimp, list_id, status): def unsubscribe(mailchimp, list_id, emails): + """ + Batch unsubscribe the given email addresses from the list represented + by `list_id` + """ batch_unsubscribe = mailchimp.listBatchUnsubscribe result = batch_unsubscribe(id=list_id, emails=emails, @@ -202,6 +241,15 @@ def unsubscribe(mailchimp, list_id, emails): def update_merge_tags(mailchimp, list_id, tag_names): + """ + This function is rather inscrutable. Given tag_names, which + in this code seems to be a list of ['FULLNAME', 'EMAIL', 'EDX_ID'], + we grab tags from the mailchimp list, then we verify tag_names has + 'FULLNAME' and 'EMAIL' present, we get more data from mailchimp, then + sync the variables up to mailchimp using `listMergeVarAdd`. + + The purpose of this function is unclear. + """ mc_vars = mailchimp.listMergeVars(id=list_id) mc_names = set(v['name'] for v in mc_vars) @@ -213,6 +261,9 @@ def update_merge_tags(mailchimp, list_id, tag_names): tag = name_to_tag(name) # verify FULLNAME is present + # TODO: Why is this under the for loop? It does nothing with the loop + # variable and seems like things would work if this was executed before or + # after the loop. if 'FULLNAME' not in tags: result = mc_merge(id=list_id, tag='FULLNAME', @@ -235,6 +286,13 @@ def update_merge_tags(mailchimp, list_id, tag_names): def subscribe_with_data(mailchimp, list_id, user_data): + """ + Given user_data in the form of a list of dictionaries for each user, + where the dictionary has keys 'EMAIL', 'FULLNAME', and 'EDX_ID', batch + subscribe the users to the given `list_id` via a Mailchimp api method. + + Returns None + """ format_entry = lambda e: {name_to_tag(k): v for k, v in e.iteritems()} formated_data = list(format_entry(e) for e in user_data) @@ -244,23 +302,37 @@ def subscribe_with_data(mailchimp, list_id, user_data): batch=batch, double_optin=False, update_existing=True) - + log.debug( "Added: %s Error on: %s", result['add_count'], result['error_count'] ) def make_segments(mailchimp, list_id, count, emails): + """ + Segments the list of email addresses `emails` into `count` segments, + if count is nonzero. + + For unknown historical reasons, lost to the winds of time, this is done with + a random order to the email addresses. + + First, existing 'random_' mailchimp segments are deleted. + + Then, the list of emails (the whole, large list) is shuffled. + + Finally, the shuffled emails are chunked into `count` segments and re-uploaded + to mailchimp as 'random_'-prefixed segments. + """ if count > 0: # reset segments segments = mailchimp.listStaticSegments(id=list_id) - for s in segments: - if s['name'].startswith('random'): - mailchimp.listStaticSegmentDel(id=list_id, seg_id=s['id']) + for seg in segments: + if seg['name'].startswith('random'): + mailchimp.listStaticSegmentDel(id=list_id, seg_id=seg['id']) # shuffle and split emails emails = list(emails) - random.shuffle(emails) + random.shuffle(emails) # Why do we do this? chunk_size = int(math.ceil(float(len(emails)) / count)) chunks = list(chunk(emails, chunk_size)) @@ -278,7 +350,13 @@ def make_segments(mailchimp, list_id, count, emails): def name_to_tag(name): - return (name[:10] if len(name) > 10 else name).replace(' ', '_').strip() + """ + Returns sanitized str `name`: no more than 10 characters, + with spaces replaced with `_` + """ + if len(name) > 10: + name = name[:10] + return name.replace(' ', '_').strip() def chunk(elist, size):