From 2018acbce305d316d1d77c18eee0443cd7a55e06 Mon Sep 17 00:00:00 2001 From: Christine Lytwynec Date: Thu, 18 Dec 2014 17:24:32 -0500 Subject: [PATCH] use pip-accel if it is available, get .pip/download-cache from S3 add script to upload/download pip cache directories from S3 update all-tests.sh to use pip-download-cache from S3 update pip-accel to version 0.22 --- pavelib/prereqs.py | 19 ++++- requirements/edx/paver.txt | 4 + scripts/all-tests.sh | 33 ++++++- scripts/pip_cache_store.py | 170 +++++++++++++++++++++++++++++++++++++ 4 files changed, 222 insertions(+), 4 deletions(-) create mode 100644 scripts/pip_cache_store.py diff --git a/pavelib/prereqs.py b/pavelib/prereqs.py index 0a334a8152..2c84e6610b 100644 --- a/pavelib/prereqs.py +++ b/pavelib/prereqs.py @@ -6,6 +6,7 @@ import os import hashlib from distutils import sysconfig from paver.easy import * +import subprocess from .utils.envs import Env @@ -133,8 +134,24 @@ def python_prereqs_installation(): """ Installs Python prerequisites """ + + try: + subprocess.check_call( + "pip-accel --version", + stdout=file("/dev/null"), + stderr=file("/dev/null"), + shell=True + ) + executable = 'pip-accel' + except subprocess.CalledProcessError: + executable = 'pip' + for req_file in PYTHON_REQ_FILES: - sh("pip install -q --exists-action w -r {req_file}".format(req_file=req_file)) + sh("{ex} install -q --exists-action=w -r {req_file}".format( + ex=executable, + req_file=req_file, + ) + ) @task diff --git a/requirements/edx/paver.txt b/requirements/edx/paver.txt index 2d110fc9dd..e551898e34 100644 --- a/requirements/edx/paver.txt +++ b/requirements/edx/paver.txt @@ -5,3 +5,7 @@ lazy==1.1 path.py==3.0.1 watchdog==0.7.1 python-memcached + +# Requirements to run paver with pip-accel +-e git+https://github.com/jzoldak/pip.git@v1.4.1patch772#egg=pip +pip-accel[s3]==0.22 diff --git a/scripts/all-tests.sh b/scripts/all-tests.sh index cb4f502eb0..a58bd2023f 100755 --- a/scripts/all-tests.sh +++ b/scripts/all-tests.sh @@ -94,8 +94,22 @@ if [ -e $HOME/edx-venv_clean.tar.gz ]; then tar -C $HOME -xf $HOME/edx-venv_clean.tar.gz fi -# Activate the Python virtualenv -source $HOME/edx-venv/bin/activate +# Activate a new Python virtualenv +virtualenv $HOME/edx-venv-$GIT_COMMIT +source $HOME/edx-venv-$GIT_COMMIT/bin/activate + +# boto and path.py are requirements of scripts/pip_cache_store.py which is used +# to download the pip download cache from S3. +# We are installing just boto and path.py here to avoid installing all of base.txt +# and paver.txt before getting the download cache. If versions of these are changed +# in requirements files, they will be updated by install_prereqs. +pip install -q boto>=2.32.1 path.py>=3.0.1 + +# Download the pip-download-cache +python scripts/pip_cache_store.py download -b edx-platform.dependency-cache -f v1/master -d $HOME/.pip/download-cache/ -t $HOME/pip-download-cache.tar.gz + +# Now install paver requirements +pip install -q -r requirements/edx/paver.txt # If the environment variable 'SHARD' is not set, default to 'all'. # This could happen if you are trying to use this script from @@ -122,7 +136,15 @@ case "$TEST_SUITE" in END - exit $EXIT + exitcode=$EXIT + + # Update the pip-download-cache.tar.gz in S3 if JOB_NAME starts with "edx-all-tests-auto-master/" + # (for old jenkins) or "edx-platform-all-tests-master/" (for new jenkins). + # The JOB_NAME is something along the lines of "edx-all-tests-auto-master/SHARD=1,TEST_SUITE=quality". + if [[ ${JOB_NAME} == 'edx-all-tests-auto-master/'* ]] || [[ ${JOB_NAME} == 'edx-platform-all-tests-master/'* ]]; then + python scripts/pip_cache_store.py upload -b edx-platform.dependency-cache -f v1/master -d $HOME/.pip/download-cache/ -t $HOME/pip-download-cache.tar.gz + fi + exit $exitcode ;; "unit") @@ -202,3 +224,8 @@ END ;; esac + + +# Deactivate and clean up python virtualenv +deactivate +rm -r $HOME/edx-venv-$GIT_COMMIT diff --git a/scripts/pip_cache_store.py b/scripts/pip_cache_store.py new file mode 100644 index 0000000000..cf02c91644 --- /dev/null +++ b/scripts/pip_cache_store.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python +""" +This script is intended to be used to store the ~/.pip/download-cache +directory in S3. The primary use case, as of this writing, is to help +speed up Jenkins build times for edx-platform tests. + +Before running pip-accel install (or pip install) on a Jenkins worker, +this directory will be downloaded from S3. + +For usage: `python pip_cache_store.py -h`. +""" +import argparse +from boto.s3.connection import S3Connection +from boto.exception import S3ResponseError +import os +from path import path +import sys +import tarfile + + +class S3TarStore(): + """ + Static methods for storing directories in S3 in tar.gz form. + """ + + def __init__(self, *args, **kwargs): + self.dirpath = kwargs['dirpath'] + self.tarpath = kwargs['tarpath'] + self.bucket_name = kwargs['bucket_name'] + self.keyname = path(kwargs['bucket_folder']) / self.tarpath.basename() + + @staticmethod + def bucket(bucket_name): + """ + Returns bucket matching name. If there exists no such bucket + or there is an exception raised, then `None` is returned. + """ + try: + conn = S3Connection() + bucket = conn.get_bucket(bucket_name) + except S3ResponseError: + print ( + "Please check that the bucket {} exists and that you have " + "the proper credentials to access it.".format(bucket_name) + ) + return None + except Exception as e: + print ( + "There was an error while connecting to S3. " + "Please check error log for more details." + ) + sys.stderr.write(e.message) + return None + + if not bucket: + print "No such bucket {}.".format(self.bucket_name) + + return bucket + + + @staticmethod + def download_dir(bucket, tarpath, dirpath, keyname): + """ + Downloads a file matching `keyname` from `bucket` + to `tarpath`. It then extracts the tar.gz file into `dirpath`. + If no matching `keyname` is found, it does nothing. + + Note that any exceptions that occur while downloading or unpacking + will be logged, but not raised. + """ + key = bucket.lookup(keyname) + if key: + try: + print "Downloading contents of {} from S3.".format(keyname) + key.get_contents_to_filename(tarpath) + + with tarfile.open(tarpath, mode="r:gz") as tar: + print "Unpacking {} to {}".format(tarpath, dirpath) + tar.extractall(path=dirpath.parent) + except Exception as e: + print ("Ignored Exception:\n {}".format(e.message)) + else: + print ( + "Couldn't find anything matching {} in S3 bucket. " + "Doing Nothing.".format(keyname) + ) + + @staticmethod + def upload_dir(bucket, tarpath, dirpath, keyname): + """ + Packs the contents of `dirpath` into a tar.gz file named + `tarpath.basename()`. It then uploads the tar.gz file to `bucket` + as `keyname`. If `dirpath` is not a directory, it does nothing. + + Note that any exceptions that occur while compressing or uploading + will be logged, but not raised. + # """ + if dirpath.isdir(): + try: + with tarfile.open(tarpath, "w:gz") as tar: + print "Packing up {} to {}".format(dirpath, tarpath) + tar.add(dirpath, arcname='/') + + print "Uploading {} to S3 bucket.".format(keyname) + existing_key = bucket.lookup(keyname) + key = existing_key if existing_key else bucket.new_key(keyname) + key.set_contents_from_filename(tarpath) + except Exception as e: + print ("Ignored Exception:\n {}".format(e.message)) + sys.stderr.write(e.message) + else: + "Path {} isn't a directory. Doing Nothing.".format(dirname) + + def download(self): + """ + Checks that bucket is available and downloads self.keyname to self.tarpath. + Then extracts self.tarpath to self.dirpath. + """ + bucket = self.bucket(self.bucket_name) + if not bucket: + return + + self.download_dir(bucket, self.tarpath, self.dirpath, self.keyname) + + def upload(self): + """ + Checks that bucket is available. Then compresses self.dirpath to self.tarpath + and uploads self.tarpath to self.keyname. + """ + bucket = self.bucket(self.bucket_name) + if not bucket: + return + + self.upload_dir(bucket, self.tarpath, self.dirpath, self.keyname) + + +def main(): + """ + Calls S3TarStore.upload or S3TarStore.download using the command line args. + """ + parser = argparse.ArgumentParser(description='Upload/download tar.gz files to/from S3.') + parser.add_argument('action', choices=('upload', 'download')) + parser.add_argument('--bucket', '-b', dest='bucket_name', required=True, + help='Name of S3 bucket.') + parser.add_argument('--folder', '-f', dest='bucket_folder', required=True, + help='Folder within S3 bucket. (ex. "v1/my-branch-name/")') + parser.add_argument('--dir', '-d', dest='dirpath', required=True, + help='Directory to be uploaded from or downloaded to. ' + '(ex. "~/.pip/download-cache/")') + parser.add_argument('--tar', '-t', dest='tarpath', required=True, + help='Path to place newly created or downloaded tarfile. ' + 'The basename of this should be the basename of the tarfile ' + 'stored in S3. (ex. "~/pip-download-cache.tar.gz")') + args = parser.parse_args() + + store = S3TarStore( + dirpath = path(args.dirpath), + tarpath = path(args.tarpath), + bucket_name = args.bucket_name, + bucket_folder = args.bucket_folder, + ) + + if args.action == 'upload': + store.upload() + elif args.action == 'download': + store.download() + + +if __name__ == '__main__': + main()