Merge pull request #18615 from edx/youngstrom/handle-multi-containers

Xdist: Handle multiple containers per task definition
This commit is contained in:
Michael Youngstrom
2018-07-20 16:38:52 -04:00
committed by GitHub
4 changed files with 48 additions and 46 deletions

2
.gitignore vendored
View File

@@ -74,7 +74,7 @@ reports/
jscover.log
jscover.log.*
.pytest_cache/
pytest_container*.txt
pytest_task*.txt
.tddium*
common/test/data/test_unicode/static/
test_root/courses/

View File

@@ -1,12 +1,12 @@
#!/bin/bash
set -e
python scripts/xdist/pytest_container_manager.py -a up -n ${XDIST_NUM_CONTAINERS} \
python scripts/xdist/pytest_container_manager.py -a up -n ${XDIST_NUM_TASKS} \
-t ${XDIST_CONTAINER_TASK_NAME} \
-s ${XDIST_CONTAINER_SUBNET} \
-sg ${XDIST_CONTAINER_SECURITY_GROUP}
ip_list=$(<pytest_container_ip_list.txt)
ip_list=$(<pytest_task_ips.txt)
for ip in $ip_list
do

View File

@@ -11,36 +11,36 @@ logger = logging.getLogger(__name__)
class PytestContainerManager():
"""
Responsible for spinning up and terminating containers to be used with pytest-xdist
Responsible for spinning up and terminating ECS tasks to be used with pytest-xdist
"""
def __init__(self, region, cluster):
self.ecs = boto3.client('ecs', region)
self.cluster_name = cluster
def spin_up_containers(self, number_of_containers, task_name, subnets, security_groups, public_ip, launch_type):
def spin_up_tasks(self, number_of_tasks, task_name, subnets, security_groups, public_ip, launch_type):
"""
Spins up containers and generates two .txt files, one containing the IP
addresses of the new containers, the other containing their task_arns.
Spins up tasks and generates two .txt files, containing the IP/ arns
of the new tasks.
"""
CONTAINER_RUN_TIME_OUT_MINUTES = 10
TASK_RUN_TIMEOUT_MINUTES = 10
MAX_RUN_TASK_RETRIES = 7
revision = self.ecs.describe_task_definition(taskDefinition=task_name)['taskDefinition']['revision']
task_definition = "{}:{}".format(task_name, revision)
logging.info("Spinning up {} containers based on task definition: {}".format(number_of_containers, task_definition))
logging.info("Spinning up {} tasks based on task definition: {}".format(number_of_tasks, task_definition))
remainder = number_of_containers % 10
quotient = number_of_containers / 10
remainder = number_of_tasks % 10
quotient = number_of_tasks / 10
container_num_list = [10 for i in range(0, quotient)]
task_num_list = [10 for i in range(0, quotient)]
if remainder:
container_num_list.append(remainder)
task_num_list.append(remainder)
# Boot up containers. boto3's run_task only allows 10 containers to be launched at a time
# Spin up tasks. boto3's run_task only allows 10 tasks to be launched at a time
task_arns = []
for num in container_num_list:
for num in task_num_list:
for retry in range(1, MAX_RUN_TASK_RETRIES + 1):
try:
response = self.ecs.run_task(
@@ -60,7 +60,7 @@ class PytestContainerManager():
# Handle AWS throttling with an exponential backoff
if retry == MAX_RUN_TASK_RETRIES:
raise StandardError(
"MAX_RUN_TASK_RETRIES ({}) reached while spinning up containers due to AWS throttling.".format(MAX_RUN_TASK_RETRIES)
"MAX_RUN_TASK_RETRIES ({}) reached while spinning up tasks due to AWS throttling.".format(MAX_RUN_TASK_RETRIES)
)
logger.info("Hit error: {}. Retrying".format(err))
countdown = 2 ** retry
@@ -72,51 +72,53 @@ class PytestContainerManager():
for task_response in response['tasks']:
task_arns.append(task_response['taskArn'])
# Wait for containers to finish spinning up
# Wait for tasks to finish spinning up
not_running = task_arns[:]
ip_addresses = []
all_running = False
for attempt in range(0, CONTAINER_RUN_TIME_OUT_MINUTES * 2):
for attempt in range(0, TASK_RUN_TIMEOUT_MINUTES * 2):
time.sleep(30)
list_tasks_response = self.ecs.describe_tasks(cluster=self.cluster_name, tasks=not_running)['tasks']
del not_running[:]
for task_response in list_tasks_response:
if task_response['lastStatus'] == 'RUNNING':
for container in task_response['containers']:
ip_addresses.append(container["networkInterfaces"][0]["privateIpv4Address"])
container_ip_address = container["networkInterfaces"][0]["privateIpv4Address"]
if container_ip_address not in ip_addresses:
ip_addresses.append(container_ip_address)
else:
not_running.append(task_response['taskArn'])
if not_running:
logger.info("Still waiting on {} containers to spin up".format(len(not_running)))
logger.info("Still waiting on {} tasks to spin up".format(len(not_running)))
else:
logger.info("Finished spinning up containers")
logger.info("Finished spinning up tasks")
all_running = True
break
if not all_running:
raise StandardError(
"Timed out waiting to spin up all containers."
"Timed out waiting to spin up all tasks."
)
logger.info("Successfully booted up {} containers.".format(number_of_containers))
logger.info("Successfully booted up {} tasks.".format(number_of_tasks))
# Generate .txt files containing IP addresses and task arns
ip_list_string = " ".join(ip_addresses)
logger.info("Container IP list: {}".format(ip_list_string))
ip_list_file = open("pytest_container_ip_list.txt", "w")
logger.info("Task IP list: {}".format(ip_list_string))
ip_list_file = open("pytest_task_ips.txt", "w")
ip_list_file.write(ip_list_string)
ip_list_file.close()
task_arn_list_string = " ".join(task_arns)
logger.info("Container task arn list: {}".format(task_arn_list_string))
task_arn_file = open("pytest_container_task_arns.txt", "w")
logger.info("Task arn list: {}".format(task_arn_list_string))
task_arn_file = open("pytest_task_arns.txt", "w")
task_arn_file.write(task_arn_list_string)
task_arn_file.close()
def terminate_containers(self, task_arns, reason):
def terminate_tasks(self, task_arns, reason):
"""
Terminates containers based on a list of task_arns.
Terminates tasks based on a list of task_arns.
"""
for task_arn in task_arns:
response = self.ecs.stop_task(
@@ -129,52 +131,52 @@ class PytestContainerManager():
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="PytestContainerManager, manages ECS containers in an AWS cluster."
description="PytestContainerManager, manages ECS tasks in an AWS cluster."
)
parser.add_argument('--region', '-g', default='us-east-1',
help="AWS region where ECS infrastructure lives. Defaults to us-east-1")
parser.add_argument('--cluster', '-c', default="jenkins-worker-containers",
help="AWS Cluster name where the containers live. Defaults to"
help="AWS Cluster name where the tasks run. Defaults to"
"the testeng cluster: jenkins-worker-containers")
parser.add_argument('--action', '-a', choices=['up', 'down'], default=None,
help="Action for PytestContainerManager to perform. "
"Either up for spinning up AWS ECS containers or down for stopping them")
"Either up for spinning up AWS ECS tasks or down for stopping them")
# Spinning up containers
parser.add_argument('--num_containers', '-n', type=int, default=None,
help="Number of containers to spin up")
# Spinning up tasks
parser.add_argument('--num_tasks', '-n', type=int, default=None,
help="Number of ECS tasks to spin up")
parser.add_argument('--task_name', '-t', default=None,
help="Name of the task definition for spinning up workers")
help="Name of the task definition")
parser.add_argument('--subnets', '-s', nargs='+', default=None,
help="List of subnets for the containers to exist in")
help="List of subnets for the tasks to exist in")
parser.add_argument('--security_groups', '-sg', nargs='+', default=None,
help="List of security groups to apply to the containers")
help="List of security groups to apply to the tasks")
parser.add_argument('--public_ip', choices=['ENABLED', 'DISABLED'],
default='DISABLED', help="Whether the containers should have a public IP")
default='DISABLED', help="Whether the tasks should have a public IP")
parser.add_argument('--launch_type', default='FARGATE', choices=['EC2', 'FARGATE'],
help="ECS launch type of container. Defaults to FARGATE")
help="ECS launch type for tasks. Defaults to FARGATE")
# Terminating containers
# Terminating tasks
parser.add_argument('--task_arns', '-arns', nargs='+', default=None,
help="Task arns to terminate")
parser.add_argument('--reason', '-r', default="Finished executing tests",
help="Reason for terminating containers")
help="Reason for terminating tasks")
args = parser.parse_args()
containerManager = PytestContainerManager(args.region, args.cluster)
if args.action == 'up':
containerManager.spin_up_containers(
args.num_containers,
containerManager.spin_up_tasks(
args.num_tasks,
args.task_name,
args.subnets,
args.security_groups,
@@ -182,7 +184,7 @@ if __name__ == "__main__":
args.launch_type
)
elif args.action == 'down':
containerManager.terminate_containers(
containerManager.terminate_tasks(
args.task_arns,
args.reason
)

View File

@@ -48,7 +48,7 @@ passenv =
XDIST_CONTAINER_SUBNET
XDIST_CONTAINER_TASK_NAME
XDIST_GIT_BRANCH
XDIST_NUM_CONTAINERS
XDIST_NUM_TASKS
deps =
django18: Django>=1.8,<1.9
django19: Django>=1.9,<1.10