diff --git a/common/djangoapps/user_api/management/__init__.py b/common/djangoapps/user_api/management/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/common/djangoapps/user_api/management/commands/__init__.py b/common/djangoapps/user_api/management/commands/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/common/djangoapps/user_api/management/commands/email_opt_in_list.py b/common/djangoapps/user_api/management/commands/email_opt_in_list.py new file mode 100644 index 0000000000..b71f72ef91 --- /dev/null +++ b/common/djangoapps/user_api/management/commands/email_opt_in_list.py @@ -0,0 +1,267 @@ +"""Generate a list indicating whether users have opted in or out of receiving email from an org. + +Email opt-in is stored as an org-level preference. +When reports are generated, we need to handle: + +1) Org aliases: some organizations might have multiple course key "org" values. + We choose the most recently set preference among all org aliases. + Since this information isn't stored anywhere in edx-platform, + the caller needs to pass in the list of orgs and aliases. + +2) No preference set: Some users may not have an opt-in preference set + if they enrolled before the preference was introduced. + These users are opted in by default. + +3) Restricting to a subset of courses in an org: Some orgs have courses + that we don't want to include in the results (e.g. EdX-created test courses). + Allow the caller to explicitly specify the list of courses in the org. + +The command will always use the read replica database if one is configured. + +""" +import os.path +import csv +import time +import contextlib +import logging + +from django.core.management.base import BaseCommand, CommandError +from django.conf import settings +from django.db import connections + +from opaque_keys.edx.keys import CourseKey +from xmodule.modulestore.django import modulestore + + +LOGGER = logging.getLogger(__name__) + + +class Command(BaseCommand): + """Generate a list of email opt-in values for user enrollments. """ + + args = " --courses=COURSE_ID_LIST" + help = "Generate a list of email opt-in values for user enrollments." + + # Fields output in the CSV + OUTPUT_FIELD_NAMES = [ + "email", + "full_name", + "course_id", + "is_opted_in_for_email", + "preference_set_date" + ] + + # Number of records to read at a time when making + # multiple queries over a potentially large dataset. + QUERY_INTERVAL = 1000 + + def handle(self, *args, **options): + """Execute the command. + + Arguments: + file_path (str): Path to the output file. + *org_list (unicode): List of organization aliases. + + Keyword Arguments: + courses (unicode): Comma-separated list of course keys. If provided, + include only these courses in the results. + + Raises: + CommandError + + """ + file_path, org_list = self._parse_args(args) + + # Retrieve all the courses for the org. + # If we were given a specific list of courses to include, + # filter out anything not in that list. + courses = self._get_courses_for_org(org_list) + only_courses = options.get("courses") + if only_courses is not None: + only_courses = [ + CourseKey.from_string(course_key.strip()) + for course_key in only_courses.split(",") + ] + courses = list(set(courses) & set(only_courses)) + + # Add in organizations from the course keys, to ensure + # we're including orgs with different capitalizations + org_list = list(set(org_list) | set(course.org for course in courses)) + + # If no courses are found, abort + if not courses: + raise CommandError( + u"No courses found for orgs: {orgs}".format( + orgs=", ".join(org_list) + ) + ) + + # Let the user know what's about to happen + LOGGER.info( + u"Retrieving data for courses: {courses}".format( + courses=", ".join([unicode(course) for course in courses]) + ) + ) + + # Open the output file and generate the report. + with open(file_path, "w") as file_handle: + with self._log_execution_time(): + self._write_email_opt_in_prefs(file_handle, org_list, courses) + + # Remind the user where the output file is + LOGGER.info(u"Output file: {file_path}".format(file_path=file_path)) + + def _parse_args(self, args): + """Check and parse arguments. + + Validates that the right number of args were provided + and that the output file doesn't already exist. + + Arguments: + args (list): List of arguments given at the command line. + + Returns: + Tuple of (file_path, org_list) + + Raises: + CommandError + + """ + if len(args) < 2: + raise CommandError(u"Usage: {args}".format(args=self.args)) + + file_path = args[0] + org_list = args[1:] + if os.path.exists(file_path): + raise CommandError("File already exists at '{path}'".format(path=file_path)) + + return file_path, org_list + + def _get_courses_for_org(self, org_aliases): + """Retrieve all course keys for a particular org. + + Arguments: + org_aliases (list): List of aliases for the org. + + Returns: + List of `CourseKey`s + + """ + all_courses = modulestore().get_courses() + orgs_lowercase = [org.lower() for org in org_aliases] + return [ + course.id + for course in all_courses + if course.id.org.lower() in orgs_lowercase + ] + + @contextlib.contextmanager + def _log_execution_time(self): + """Context manager for measuring execution time. """ + start_time = time.time() + yield + execution_time = time.time() - start_time + LOGGER.info(u"Execution time: {time} seconds".format(time=execution_time)) + + def _write_email_opt_in_prefs(self, file_handle, org_aliases, courses): + """Write email opt-in preferences to the output file. + + This will generate a CSV with one row for each enrollment. + This means that the user's "opt in" preference will be specified + multiple times if the user has enrolled in multiple courses + within the org. However, the values should always be the same: + if the user is listed as "opted out" for course A, she will + also be listed as "opted out" for courses B, C, and D. + + Arguments: + file_handle (file): Handle to the output file. + org_aliases (list): List of aliases for the org. + courses (list): List of course keys in the org. + + Returns: + None + + """ + writer = csv.DictWriter(file_handle, fieldnames=self.OUTPUT_FIELD_NAMES) + cursor = self._db_cursor() + query = ( + u""" + SELECT + user.`email` AS `email`, + profile.`name` AS `full_name`, + enrollment.`course_id` AS `course_id`, + ( + SELECT value + FROM user_api_userorgtag + WHERE org IN ( {org_list} ) + AND `key`=\"email-optin\" + AND `user_id`=user.`id` + ORDER BY modified DESC + LIMIT 1 + ) AS `is_opted_in_for_email`, + ( + SELECT modified + FROM user_api_userorgtag + WHERE org IN ( {org_list} ) + AND `key`=\"email-optin\" + AND `user_id`=user.`id` + ORDER BY modified DESC + LIMIT 1 + ) AS `preference_set_date` + FROM + student_courseenrollment AS enrollment + LEFT JOIN auth_user AS user ON user.id=enrollment.user_id + LEFT JOIN auth_userprofile AS profile ON profile.user_id=user.id + WHERE enrollment.course_id IN ( {course_id_list} ) + """ + ).format( + course_id_list=self._sql_list(courses), + org_list=self._sql_list(org_aliases) + ) + + cursor.execute(query) + row_count = 0 + for row in self._iterate_results(cursor): + email, full_name, course_id, is_opted_in, pref_set_date = row + writer.writerow({ + "email": email.encode('utf-8'), + "full_name": full_name.encode('utf-8'), + "course_id": course_id.encode('utf-8'), + "is_opted_in_for_email": is_opted_in if is_opted_in else "True", + "preference_set_date": pref_set_date, + }) + row_count += 1 + + # Log the number of rows we processed + LOGGER.info(u"Retrieved {num_rows} records.".format(num_rows=row_count)) + + def _iterate_results(self, cursor): + """Iterate through the results of a database query, fetching in chunks. + + Arguments: + cursor: The database cursor + + Yields: + tuple of row values from the query + + """ + while True: + rows = cursor.fetchmany(self.QUERY_INTERVAL) + if not rows: + break + for row in rows: + yield row + + def _sql_list(self, values): + """Serialize a list of values for including in a SQL "IN" statement. """ + return u",".join([u'"{}"'.format(val) for val in values]) + + def _db_cursor(self): + """Return a database cursor to the read replica if one is available. """ + # Use the read replica if one has been configured + db_alias = ( + 'read_replica' + if 'read_replica' in settings.DATABASES + else 'default' + ) + return connections[db_alias].cursor() diff --git a/common/djangoapps/user_api/management/tests/__init__.py b/common/djangoapps/user_api/management/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/common/djangoapps/user_api/management/tests/test_email_opt_in_list.py b/common/djangoapps/user_api/management/tests/test_email_opt_in_list.py new file mode 100644 index 0000000000..f9b4389da3 --- /dev/null +++ b/common/djangoapps/user_api/management/tests/test_email_opt_in_list.py @@ -0,0 +1,399 @@ +# -*- coding: utf-8 -*- +"""Tests for the email opt-in list management command. """ +import os.path +import tempfile +import shutil +import csv +from collections import defaultdict +from unittest import skipUnless + + +import ddt +from django.conf import settings +from django.test.utils import override_settings +from django.core.management.base import CommandError + + +from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase, mixed_store_config +from xmodule.modulestore.tests.factories import CourseFactory +from student.tests.factories import UserFactory, CourseEnrollmentFactory +from student.models import CourseEnrollment + +import user_api.api.profile as profile_api +from user_api.models import UserOrgTag +from user_api.management.commands import email_opt_in_list + + +MODULESTORE_CONFIG = mixed_store_config(settings.COMMON_TEST_DATA_ROOT, {}, include_xml=False) + + +@ddt.ddt +@skipUnless(settings.ROOT_URLCONF == 'lms.urls', 'Test only valid in lms') +@override_settings(MODULESTORE=MODULESTORE_CONFIG) +class EmailOptInListTest(ModuleStoreTestCase): + """Tests for the email opt-in list management command. """ + + USER_USERNAME = "test_user" + USER_FIRST_NAME = u"Ṫëṡẗ" + USER_LAST_NAME = u"Űśéŕ" + + TEST_ORG = u"téśt_őŕǵ" + + OUTPUT_FILE_NAME = "test_org_email_opt_in.csv" + OUTPUT_FIELD_NAMES = [ + "email", + "full_name", + "course_id", + "is_opted_in_for_email", + "preference_set_date" + ] + + def setUp(self): + self.user = UserFactory.create( + username=self.USER_USERNAME, + first_name=self.USER_FIRST_NAME, + last_name=self.USER_LAST_NAME + ) + self.courses = [] + self.enrollments = defaultdict(list) + + def test_not_enrolled(self): + self._create_courses_and_enrollments((self.TEST_ORG, False)) + output = self._run_command(self.TEST_ORG) + + # The user isn't enrolled in the course, so the output should be empty + self._assert_output(output) + + def test_enrolled_no_pref(self): + self._create_courses_and_enrollments((self.TEST_ORG, True)) + output = self._run_command(self.TEST_ORG) + + # By default, if no preference is set by the user is enrolled, opt in + self._assert_output(output, (self.user, self.courses[0].id, True)) + + def test_enrolled_pref_opted_in(self): + self._create_courses_and_enrollments((self.TEST_ORG, True)) + self._set_opt_in_pref(self.user, self.TEST_ORG, True) + output = self._run_command(self.TEST_ORG) + self._assert_output(output, (self.user, self.courses[0].id, True)) + + def test_enrolled_pref_opted_out(self): + self._create_courses_and_enrollments((self.TEST_ORG, True)) + self._set_opt_in_pref(self.user, self.TEST_ORG, False) + output = self._run_command(self.TEST_ORG) + self._assert_output(output, (self.user, self.courses[0].id, False)) + + def test_opt_in_then_opt_out(self): + self._create_courses_and_enrollments((self.TEST_ORG, True)) + self._set_opt_in_pref(self.user, self.TEST_ORG, True) + self._set_opt_in_pref(self.user, self.TEST_ORG, False) + output = self._run_command(self.TEST_ORG) + self._assert_output(output, (self.user, self.courses[0].id, False)) + + def test_exclude_non_org_courses(self): + # Enroll in a course that's not in the org + self._create_courses_and_enrollments( + (self.TEST_ORG, True), + ("other_org", True) + ) + + # Opt out of the other course + self._set_opt_in_pref(self.user, "other_org", False) + + # The first course is included in the results, + # but the second course is excluded, + # so the user should be opted in by default. + output = self._run_command(self.TEST_ORG) + self._assert_output( + output, + (self.user, self.courses[0].id, True), + expect_pref_datetime=False + ) + + def test_enrolled_conflicting_prefs(self): + # Enroll in two courses, both in the org + self._create_courses_and_enrollments( + (self.TEST_ORG, True), + ("org_alias", True) + ) + + # Opt into the first course, then opt out of the second course + self._set_opt_in_pref(self.user, self.TEST_ORG, True) + self._set_opt_in_pref(self.user, "org_alias", False) + + # The second preference change should take precedence + # Note that *both* courses are included in the list, + # but they should have the same value. + output = self._run_command(self.TEST_ORG, other_names=["org_alias"]) + self._assert_output( + output, + (self.user, self.courses[0].id, False), + (self.user, self.courses[1].id, False) + ) + + # Opt into the first course + # Even though the other course still has a preference set to false, + # the newest preference takes precedence + self._set_opt_in_pref(self.user, self.TEST_ORG, True) + output = self._run_command(self.TEST_ORG, other_names=["org_alias"]) + self._assert_output( + output, + (self.user, self.courses[0].id, True), + (self.user, self.courses[1].id, True) + ) + + @ddt.data(True, False) + def test_unenrolled_from_all_courses(self, opt_in_pref): + # Enroll in the course and set a preference + self._create_courses_and_enrollments((self.TEST_ORG, True)) + self._set_opt_in_pref(self.user, self.TEST_ORG, opt_in_pref) + + # Unenroll from the course + CourseEnrollment.unenroll(self.user, self.courses[0].id, skip_refund=True) + + # Enrollments should still appear in the outpu + output = self._run_command(self.TEST_ORG) + self._assert_output(output, (self.user, self.courses[0].id, opt_in_pref)) + + def test_unenrolled_from_some_courses(self): + # Enroll in several courses in the org + self._create_courses_and_enrollments( + (self.TEST_ORG, True), + (self.TEST_ORG, True), + (self.TEST_ORG, True), + ("org_alias", True) + ) + + # Set a preference for the aliased course + self._set_opt_in_pref(self.user, "org_alias", False) + + # Unenroll from the aliased course + CourseEnrollment.unenroll(self.user, self.courses[3].id, skip_refund=True) + + # Expect that the preference still applies, + # and all the enrollments should appear in the list + output = self._run_command(self.TEST_ORG, other_names=["org_alias"]) + self._assert_output( + output, + (self.user, self.courses[0].id, False), + (self.user, self.courses[1].id, False), + (self.user, self.courses[2].id, False), + (self.user, self.courses[3].id, False) + ) + + def test_no_courses_for_org_name(self): + self._create_courses_and_enrollments((self.TEST_ORG, True)) + self._set_opt_in_pref(self.user, self.TEST_ORG, True) + + # No course available for this particular org + with self.assertRaisesRegexp(CommandError, "^No courses found for orgs:"): + self._run_command("other_org") + + def test_specify_subset_of_courses(self): + # Create several courses in the same org + self._create_courses_and_enrollments( + (self.TEST_ORG, True), + (self.TEST_ORG, True), + (self.TEST_ORG, True), + ) + + # Execute the command, but exclude the second course from the list + only_courses = [self.courses[0].id, self.courses[1].id] + self._run_command(self.TEST_ORG, only_courses=only_courses) + + # Choose numbers before and after the query interval boundary + @ddt.data(2, 3, 4, 5, 6, 7, 8, 9) + def test_many_users(self, num_users): + # Create many users and enroll them in the test course + course = CourseFactory.create(org=self.TEST_ORG) + usernames = [] + for _ in range(num_users): + user = UserFactory.create() + usernames.append(user.username) + CourseEnrollmentFactory.create(course_id=course.id, user=user) + + # Generate the report + output = self._run_command(self.TEST_ORG, query_interval=4) + + # Expect that every enrollment shows up in the report + output_emails = [row["email"] for row in output] + for email in output_emails: + self.assertIn(email, output_emails) + + def test_org_capitalization(self): + # Lowercase some of the org names in the course IDs + self._create_courses_and_enrollments( + ("MyOrg", True), + ("myorg", True) + ) + + # Set preferences for both courses + self._set_opt_in_pref(self.user, "MyOrg", True) + self._set_opt_in_pref(self.user, "myorg", False) + + # Execute the command, expecting both enrollments to show up + # We're passing in the uppercase org, but we set the lowercase + # version more recently, so we expect the lowercase org + # preference to apply. + output = self._run_command("MyOrg") + self._assert_output( + output, + (self.user, self.courses[0].id, False), + (self.user, self.courses[1].id, False) + ) + + @ddt.data(0, 1) + def test_not_enough_args(self, num_args): + args = ["dummy"] * num_args + expected_msg_regex = "^Usage: --courses=COURSE_ID_LIST$" + with self.assertRaisesRegexp(CommandError, expected_msg_regex): + email_opt_in_list.Command().handle(*args) + + def test_file_already_exists(self): + temp_file = tempfile.NamedTemporaryFile(delete=True) + + def _cleanup(): # pylint: disable=missing-docstring + temp_file.close() + + with self.assertRaisesRegexp(CommandError, "^File already exists"): + email_opt_in_list.Command().handle(temp_file.name, self.TEST_ORG) + + def _create_courses_and_enrollments(self, *args): + """Create courses and enrollments. + + Created courses and enrollments are stored in instance variables + so tests can refer to them later. + + Arguments: + *args: Tuples of (course_org, should_enroll), where + course_org is the name of the org in the course key + and should_enroll is a boolean indicating whether to enroll + the user in the course. + + Returns: + None + + """ + for course_number, (course_org, should_enroll) in enumerate(args): + course = CourseFactory.create(org=course_org, number=str(course_number)) + if should_enroll: + enrollment = CourseEnrollmentFactory.create( + is_active=True, + course_id=course.id, + user=self.user + ) + self.enrollments[course.id].append(enrollment) + self.courses.append(course) + + def _set_opt_in_pref(self, user, org, is_opted_in): + """Set the email opt-in preference. + + Arguments: + user (User): The user model. + org (unicode): The org in the course key. + is_opted_in (bool): Whether the user is opted in or out of emails. + + Returns: + None + + """ + profile_api.update_email_opt_in(user.username, org, is_opted_in) + + def _latest_pref_set_date(self, user): + """Retrieve the latest opt-in preference for the user, + across all orgs and preference keys. + + Arguments: + user (User): The user whos preference was set. + + Returns: + ISO-formatted date string or empty string + + """ + pref = UserOrgTag.objects.filter(user=user).order_by("-modified") + return pref[0].modified.isoformat(' ') if len(pref) > 0 else "" + + def _run_command(self, org, other_names=None, only_courses=None, query_interval=None): + """Execute the management command to generate the email opt-in list. + + Arguments: + org (unicode): The org to generate the report for. + + Keyword Arguments: + other_names (list): List of other aliases for the org. + only_courses (list): If provided, include only these course IDs in the report. + query_interval (int): If provided, override the default query interval. + + Returns: + list: The rows of the generated CSV report. Each item is a dictionary. + + """ + # Create a temporary directory for the output + # Delete it when we're finished + temp_dir_path = tempfile.mkdtemp() + + def _cleanup(): # pylint: disable=missing-docstring + shutil.rmtree(temp_dir_path) + + self.addCleanup(_cleanup) + + # Sanitize the arguments + if other_names is None: + other_names = [] + + output_path = os.path.join(temp_dir_path, self.OUTPUT_FILE_NAME) + org_list = [org] + other_names + if only_courses is not None: + only_courses = ",".join(unicode(course_id) for course_id in only_courses) + + command = email_opt_in_list.Command() + + # Override the query interval to speed up the tests + if query_interval is not None: + command.QUERY_INTERVAL = query_interval + + # Execute the command + command.handle(output_path, *org_list, courses=only_courses) + + # Retrieve the output from the file + try: + with open(output_path) as output_file: + reader = csv.DictReader(output_file, fieldnames=self.OUTPUT_FIELD_NAMES) + rows = [row for row in reader] + except IOError: + self.fail("Could not find or open output file at '{path}'".format(path=output_path)) + + # Return the output as a list of dictionaries + return rows + + def _assert_output(self, output, *args, **kwargs): + """Check the output of the report. + + Arguments: + output (list): List of rows in the output CSV file. + *args: Tuples of (user, course_id, opt_in_pref) + + Keyword Arguments: + expect_pref_datetime (bool): If false, expect an empty + string for the preference. + + Returns: + None + + Raises: + AssertionError + + """ + self.assertEqual(len(output), len(args)) + for user, course_id, opt_in_pref in args: + self.assertIn({ + "email": user.email.encode('utf-8'), + "full_name": user.profile.name.encode('utf-8'), + "course_id": unicode(course_id).encode('utf-8'), + "is_opted_in_for_email": unicode(opt_in_pref), + "preference_set_date": ( + self._latest_pref_set_date(self.user) + if kwargs.get("expect_pref_datetime", True) + else "" + ) + }, output)