From 139cdb8ba201273c6f012577b5c2326dc9f56bc8 Mon Sep 17 00:00:00 2001 From: Michael Youngstrom Date: Thu, 27 Jun 2019 22:04:02 -0400 Subject: [PATCH] Switch testing from ecs to ec2 (#20846) --- .coveragerc | 2 +- lms/envs/test.py | 6 + pavelib/paver_tests/test_paver_pytest_cmds.py | 6 +- pavelib/paver_tests/test_paver_quality.py | 6 +- pavelib/quality.py | 2 +- pavelib/utils/test/suites/pytest_suite.py | 12 +- scripts/Jenkinsfiles/python | 33 +-- scripts/unit-tests.sh | 4 +- scripts/xdist/prepare_xdist_nodes.sh | 32 ++- scripts/xdist/pytest_container_manager.py | 203 ----------------- scripts/xdist/pytest_worker_manager.py | 212 ++++++++++++++++++ scripts/xdist/terminate_xdist_nodes.sh | 10 +- tox.ini | 10 +- 13 files changed, 280 insertions(+), 258 deletions(-) delete mode 100644 scripts/xdist/pytest_container_manager.py create mode 100644 scripts/xdist/pytest_worker_manager.py diff --git a/.coveragerc b/.coveragerc index fab115cb37..44b8c467a2 100644 --- a/.coveragerc +++ b/.coveragerc @@ -49,4 +49,4 @@ output = reports/coverage.xml jenkins_source = /home/jenkins/workspace/$JOB_NAME /home/jenkins/workspace/$SUBSET_JOB - /edx/app/edxapp/edx-platform + /home/jenkins/edx-platform diff --git a/lms/envs/test.py b/lms/envs/test.py index 81b81398c6..504cb49d83 100644 --- a/lms/envs/test.py +++ b/lms/envs/test.py @@ -127,6 +127,12 @@ MOCK_PEER_GRADING = True COMMENTS_SERVICE_URL = 'http://localhost:4567' +DJFS = { + 'type': 'osfs', + 'directory_root': '{}/django-pyfs/static/django-pyfs'.format(DATA_DIR), + 'url_root': '/static/django-pyfs', +} + ############################ STATIC FILES ############################# # TODO (cpennington): We need to figure out how envs/test.py can inject things diff --git a/pavelib/paver_tests/test_paver_pytest_cmds.py b/pavelib/paver_tests/test_paver_pytest_cmds.py index a73ed11c0a..c803c31ca1 100644 --- a/pavelib/paver_tests/test_paver_pytest_cmds.py +++ b/pavelib/paver_tests/test_paver_pytest_cmds.py @@ -57,9 +57,9 @@ class TestPaverPytestCmd(unittest.TestCase): env_var_cmd = "{} DISABLE_COURSEENROLLMENT_HISTORY=1".format(django_env_var_cmd) - xdist_string = u'--tx {}*ssh="ubuntu@{} -o StrictHostKeyChecking=no"' \ - '//python="source /edx/app/edxapp/edxapp_env; {}; python"' \ - '//chdir="/edx/app/edxapp/edx-platform"' \ + xdist_string = u'--tx {}*ssh="jenkins@{} -o StrictHostKeyChecking=no"' \ + '//python="source edx-venv/bin/activate; {}; python"' \ + '//chdir="edx-platform"' \ .format(processes, ip, env_var_cmd) expected_statement.append(xdist_string) for rsync_dir in Env.rsync_dirs(): diff --git a/pavelib/paver_tests/test_paver_quality.py b/pavelib/paver_tests/test_paver_quality.py index 2709b42563..7aef238c04 100644 --- a/pavelib/paver_tests/test_paver_quality.py +++ b/pavelib/paver_tests/test_paver_quality.py @@ -346,8 +346,8 @@ class TestPaverRunQuality(PaverTestCase): def test_no_diff_quality_failures(self): # Assert nothing is raised pavelib.quality.run_quality("") - # And assert that sh was called 7 times: - # 5 for pylint on each of the system directories + # And assert that sh was called 8 times: + # 6 for pylint on each of the system directories # 1 for diff_quality for pylint # 1 for diff_quality for eslint - self.assertEqual(self._mock_paver_sh.call_count, 7) + self.assertEqual(self._mock_paver_sh.call_count, 8) diff --git a/pavelib/quality.py b/pavelib/quality.py index e52bd8c98a..cff66b57ca 100644 --- a/pavelib/quality.py +++ b/pavelib/quality.py @@ -20,7 +20,7 @@ from openedx.core.djangolib.markup import HTML from .utils.envs import Env from .utils.timer import timed -ALL_SYSTEMS = 'lms,cms,common,openedx,pavelib' +ALL_SYSTEMS = 'lms,cms,common,openedx,pavelib,scripts' JUNIT_XML_TEMPLATE = u""" {failure_element} diff --git a/pavelib/utils/test/suites/pytest_suite.py b/pavelib/utils/test/suites/pytest_suite.py index 3360d9a811..53eb0b836a 100644 --- a/pavelib/utils/test/suites/pytest_suite.py +++ b/pavelib/utils/test/suites/pytest_suite.py @@ -170,9 +170,9 @@ class SystemTestSuite(PytestSuite): env_var_cmd = u'export DJANGO_SETTINGS_MODULE={} DISABLE_COURSEENROLLMENT_HISTORY={}'\ .format('{}.envs.{}'.format(self.root, self.settings), self.disable_courseenrollment_history) - xdist_string = u'--tx {}*ssh="ubuntu@{} -o StrictHostKeyChecking=no"' \ - '//python="source /edx/app/edxapp/edxapp_env; {}; python"' \ - '//chdir="/edx/app/edxapp/edx-platform"' \ + xdist_string = u'--tx {}*ssh="jenkins@{} -o StrictHostKeyChecking=no"' \ + '//python="source edx-venv/bin/activate; {}; python"' \ + '//chdir="edx-platform"' \ .format(xdist_remote_processes, ip, env_var_cmd) cmd.append(xdist_string) for rsync_dir in Env.rsync_dirs(): @@ -295,9 +295,9 @@ class LibTestSuite(PytestSuite): env_var_cmd = u'{} DISABLE_COURSEENROLLMENT_HISTORY={}' \ .format(django_env_var_cmd, self.disable_courseenrollment_history) - xdist_string = u'--tx {}*ssh="ubuntu@{} -o StrictHostKeyChecking=no"' \ - '//python="source /edx/app/edxapp/edxapp_env; {}; python"' \ - '//chdir="/edx/app/edxapp/edx-platform"' \ + xdist_string = u'--tx {}*ssh="jenkins@{} -o StrictHostKeyChecking=no"' \ + '//python="source edx-venv/bin/activate; {}; python"' \ + '//chdir="edx-platform"' \ .format(xdist_remote_processes, ip, env_var_cmd) cmd.append(xdist_string) for rsync_dir in Env.rsync_dirs(): diff --git a/scripts/Jenkinsfiles/python b/scripts/Jenkinsfiles/python index 3906c91932..6950148b05 100644 --- a/scripts/Jenkinsfiles/python +++ b/scripts/Jenkinsfiles/python @@ -1,12 +1,7 @@ def runPythonTests() { // Determine git refspec, branch, and clone type - if (env.ghprbActualCommit) { - git_branch = "${ghprbActualCommit}" - git_refspec = "+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*" - } else { - git_branch = "${BRANCH_NAME}" - git_refspec = "+refs/heads/${BRANCH_NAME}:refs/remotes/origin/${BRANCH_NAME}" - } + git_branch = xdist_git_branch() + git_refspec = xdist_git_refspec() sshagent(credentials: ['jenkins-worker', 'jenkins-worker-pem'], ignoreMissing: true) { checkout changelog: false, poll: false, scm: [$class: 'GitSCM', branches: [[name: git_branch]], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'CloneOption', honorRefspec: true, @@ -32,6 +27,14 @@ def xdist_git_branch() { } } +def xdist_git_refspec() { + if (env.ghprbActualCommit) { + return "+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*" + } else { + return "+refs/heads/${BRANCH_NAME}:refs/remotes/origin/${BRANCH_NAME}" + } +} + pipeline { agent { label "jenkins-worker" } options { @@ -39,10 +42,14 @@ pipeline { timeout(60) } environment { - XDIST_CONTAINER_SUBNET = credentials('XDIST_CONTAINER_SUBNET') - XDIST_CONTAINER_SECURITY_GROUP = credentials('XDIST_CONTAINER_SECURITY_GROUP') - XDIST_CONTAINER_TASK_NAME = "jenkins-worker-task" XDIST_GIT_BRANCH = xdist_git_branch() + XDIST_GIT_REFSPEC = xdist_git_refspec() + XDIST_INSTANCE_TYPE = "c5d.large" + XDIST_WORKER_AMI = credentials('XDIST_WORKER_AMI') + XDIST_WORKER_IAM_PROFILE_ARN = credentials('XDIST_WORKER_IAM_PROFILE_ARN') + XDIST_WORKER_KEY_NAME = "jenkins-worker" + XDIST_WORKER_SUBNET = credentials('XDIST_WORKER_SUBNET') + XDIST_WORKER_SECURITY_GROUP = credentials('XDIST_WORKER_SECURITY_GROUP') } stages { stage('Mark build as pending on Github') { @@ -74,7 +81,7 @@ pipeline { agent { label "jenkins-worker" } environment { TEST_SUITE = "lms-unit" - XDIST_NUM_TASKS = 10 + XDIST_NUM_WORKERS = 10 XDIST_REMOTE_NUM_PROCESSES = 1 } steps { @@ -94,7 +101,7 @@ pipeline { agent { label "jenkins-worker" } environment { TEST_SUITE = "cms-unit" - XDIST_NUM_TASKS = 3 + XDIST_NUM_WORKERS = 3 XDIST_REMOTE_NUM_PROCESSES = 1 } steps { @@ -114,7 +121,7 @@ pipeline { agent { label "jenkins-worker" } environment { TEST_SUITE = "commonlib-unit" - XDIST_NUM_TASKS = 3 + XDIST_NUM_WORKERS = 3 XDIST_REMOTE_NUM_PROCESSES = 1 } steps { diff --git a/scripts/unit-tests.sh b/scripts/unit-tests.sh index 938708c8ac..93aa2b0bda 100755 --- a/scripts/unit-tests.sh +++ b/scripts/unit-tests.sh @@ -36,9 +36,9 @@ if [[ -n "$TOXENV" ]]; then export NO_PREREQ_INSTALL="True" fi -if [[ -n "$XDIST_NUM_TASKS" ]]; then +if [[ -n "$XDIST_NUM_WORKERS" ]]; then bash scripts/xdist/prepare_xdist_nodes.sh - PAVER_ARGS="-v --xdist_ip_addresses="$( 0: + logger.info("Still waiting on {} workers to spin up".format(len(not_running))) + time.sleep(5) + else: + logger.info("Finished spinning up workers") + all_running = True + break + + if not all_running: + raise StandardError( + "Timed out waiting to spin up all workers." + ) + logger.info("Successfully booted up {} workers.".format(number_of_workers)) + + not_ready_ip_addresses = ip_addresses[:] + logger.info("Checking ssh connection to workers.") + pool = Pool(processes=number_of_workers) + for ssh_try in range(0, self.WORKER_SSH_ATTEMPTS): + results = pool.map(_check_worker_ready, not_ready_ip_addresses) + deleted_ips = 0 + for num in range(0, len(results)): + if results[num] == 0: + del(not_ready_ip_addresses[num - deleted_ips]) + deleted_ips += 1 + + if len(not_ready_ip_addresses) == 0: + logger.info("All workers are ready for tests.") + break + + if ssh_try == self.WORKER_SSH_ATTEMPTS - 1: + raise StandardError( + "Max ssh tries to remote workers reached." + ) + + logger.info("Not all workers are ready. Sleeping for 5 seconds then retrying.") + time.sleep(5) + + # Generate .txt files containing IP addresses and instance ids + ip_list_string = ",".join(ip_addresses) + logger.info("Worker IP list: {}".format(ip_list_string)) + ip_list_file = open("pytest_worker_ips.txt", "w") + ip_list_file.write(ip_list_string) + ip_list_file.close() + + worker_instance_id_list_string = ",".join(worker_instance_ids) + logger.info("Worker Instance Id list: {}".format(worker_instance_id_list_string)) + worker_arn_file = open("pytest_worker_instance_ids.txt", "w") + worker_arn_file.write(worker_instance_id_list_string) + worker_arn_file.close() + + def terminate_workers(self, worker_instance_ids): + """ + Terminates workers based on a list of worker_instance_ids. + """ + instance_id_list = worker_instance_ids.split(',') + response = self.ec2.terminate_instances( + InstanceIds=instance_id_list + ) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + description="PytestWorkerManager, manages EC2 workers in an AWS cluster." + ) + + parser.add_argument('--action', '-a', choices=['up', 'down'], default=None, + help="Action for PytestWorkerManager to perform. " + "Either up for spinning up AWS EC2 workers or down for terminating them") + + parser.add_argument('--region', '-g', default='us-east-1', + help="AWS region where EC2 infrastructure lives. Defaults to us-east-1") + + # Spinning up workers + parser.add_argument('--num-workers', '-n', type=int, default=None, + help="Number of EC2 workers to spin up") + + parser.add_argument('--ami', '-ami', default=None, + help="AMI for workers") + + parser.add_argument('--instance-type', '-type', default=None, + help="Desired EC2 instance type") + + parser.add_argument('--subnet-id', '-s', default=None, + help="Subnet for the workers to exist in") + + parser.add_argument('--security_groups', '-sg', nargs='+', default=None, + help="List of security group ids to apply to workers") + + parser.add_argument('--key-name', '-key', default=None, + help="Key pair name for sshing to worker") + + parser.add_argument('--iam-arn', '-iam', default=None, + help="Iam Instance Profile ARN for the workers") + + # Terminating workers + parser.add_argument('--instance-ids', '-ids', default=None, + help="Instance ids terminate") + + args = parser.parse_args() + workerManager = PytestWorkerManager(args.region) + + if args.action == 'up': + workerManager.spin_up_workers( + args.num_workers, + args.ami, + args.instance_type, + args.subnet_id, + args.security_groups, + args.key_name, + args.iam_arn + ) + elif args.action == 'down': + workerManager.terminate_workers( + args.instance_ids + ) + else: + logger.info("No action specified for PytestWorkerManager") diff --git a/scripts/xdist/terminate_xdist_nodes.sh b/scripts/xdist/terminate_xdist_nodes.sh index ee077a2c57..e2997da6f7 100644 --- a/scripts/xdist/terminate_xdist_nodes.sh +++ b/scripts/xdist/terminate_xdist_nodes.sh @@ -1,10 +1,10 @@ #!/bin/bash set -e -if [ -f pytest_task_arns.txt ]; then - echo "Terminating xdist containers with pytest_container_manager.py" - xdist_task_arns=$(=2.0,<2.1