From 2018acbce305d316d1d77c18eee0443cd7a55e06 Mon Sep 17 00:00:00 2001
From: Christine Lytwynec <clytwynec@edx.org>
Date: Thu, 18 Dec 2014 17:24:32 -0500
Subject: [PATCH] use pip-accel if it is available, get .pip/download-cache
 from S3

add script to upload/download pip cache directories from S3

update all-tests.sh to use pip-download-cache from S3

update pip-accel to version 0.22
---
 pavelib/prereqs.py         |  19 ++++-
 requirements/edx/paver.txt |   4 +
 scripts/all-tests.sh       |  33 ++++++-
 scripts/pip_cache_store.py | 170 +++++++++++++++++++++++++++++++++++++
 4 files changed, 222 insertions(+), 4 deletions(-)
 create mode 100644 scripts/pip_cache_store.py
diff --git a/pavelib/prereqs.py b/pavelib/prereqs.py
index 0a334a8152..2c84e6610b 100644
--- a/pavelib/prereqs.py
+++ b/pavelib/prereqs.py
@@ -6,6 +6,7 @@ import os
 import hashlib
 from distutils import sysconfig
 from paver.easy import *
+import subprocess
 from .utils.envs import Env
 
 
@@ -133,8 +134,24 @@ def python_prereqs_installation():
     """
     Installs Python prerequisites
     """
+
+    try:
+        subprocess.check_call(
+            "pip-accel --version",
+            stdout=file("/dev/null"),
+            stderr=file("/dev/null"),
+            shell=True
+        )
+        executable = 'pip-accel'
+    except subprocess.CalledProcessError:
+        executable = 'pip'
+
     for req_file in PYTHON_REQ_FILES:
-        sh("pip install -q --exists-action w -r {req_file}".format(req_file=req_file))
+        sh("{ex} install -q --exists-action=w -r {req_file}".format(
+            ex=executable,
+            req_file=req_file,
+            )
+        )
 
 
 @task
diff --git a/requirements/edx/paver.txt b/requirements/edx/paver.txt
index 2d110fc9dd..e551898e34 100644
--- a/requirements/edx/paver.txt
+++ b/requirements/edx/paver.txt
@@ -5,3 +5,7 @@ lazy==1.1
 path.py==3.0.1
 watchdog==0.7.1
 python-memcached
+
+# Requirements to run paver with pip-accel
+-e git+https://github.com/jzoldak/pip.git@v1.4.1patch772#egg=pip
+pip-accel[s3]==0.22
diff --git a/scripts/all-tests.sh b/scripts/all-tests.sh
index cb4f502eb0..a58bd2023f 100755
--- a/scripts/all-tests.sh
+++ b/scripts/all-tests.sh
@@ -94,8 +94,22 @@ if [ -e $HOME/edx-venv_clean.tar.gz ]; then
     tar -C $HOME -xf $HOME/edx-venv_clean.tar.gz
 fi
 
-# Activate the Python virtualenv
-source $HOME/edx-venv/bin/activate
+# Activate a new Python virtualenv
+virtualenv $HOME/edx-venv-$GIT_COMMIT
+source $HOME/edx-venv-$GIT_COMMIT/bin/activate
+
+# boto and path.py are requirements of scripts/pip_cache_store.py which is used
+# to download the pip download cache from S3.
+# We are installing just boto and path.py here to avoid installing all of base.txt
+# and paver.txt before getting the download cache. If versions of these are changed
+# in requirements files, they will be updated by install_prereqs.
+pip install -q boto>=2.32.1 path.py>=3.0.1
+
+# Download the pip-download-cache
+python scripts/pip_cache_store.py download -b edx-platform.dependency-cache -f v1/master -d $HOME/.pip/download-cache/ -t $HOME/pip-download-cache.tar.gz
+
+# Now install paver requirements
+pip install -q -r requirements/edx/paver.txt
 
 # If the environment variable 'SHARD' is not set, default to 'all'.
 # This could happen if you are trying to use this script from
@@ -122,7 +136,15 @@ case "$TEST_SUITE" in
 <testcase classname="quality" name="quality" time="0.604"></testcase>
 </testsuite>
 END
-        exit $EXIT
+        exitcode=$EXIT
+
+        # Update the pip-download-cache.tar.gz in S3 if JOB_NAME starts with "edx-all-tests-auto-master/"
+        # (for old jenkins) or "edx-platform-all-tests-master/" (for new jenkins).
+        # The JOB_NAME is something along the lines of "edx-all-tests-auto-master/SHARD=1,TEST_SUITE=quality".
+        if [[ ${JOB_NAME} == 'edx-all-tests-auto-master/'* ]] || [[ ${JOB_NAME} == 'edx-platform-all-tests-master/'* ]]; then
+            python scripts/pip_cache_store.py upload -b edx-platform.dependency-cache -f v1/master -d $HOME/.pip/download-cache/ -t $HOME/pip-download-cache.tar.gz
+        fi
+        exit $exitcode
         ;;
 
     "unit")
@@ -202,3 +224,8 @@ END
         ;;
 
 esac
+
+
+# Deactivate and clean up python virtualenv
+deactivate
+rm -r $HOME/edx-venv-$GIT_COMMIT
diff --git a/scripts/pip_cache_store.py b/scripts/pip_cache_store.py
new file mode 100644
index 0000000000..cf02c91644
--- /dev/null
+++ b/scripts/pip_cache_store.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python
+"""
+This script is intended to be used to store the ~/.pip/download-cache
+directory in S3. The primary use case, as of this writing, is to help
+speed up Jenkins build times for edx-platform tests.
+
+Before running pip-accel install (or pip install) on a Jenkins worker,
+this directory will be downloaded from S3.
+
+For usage:  `python pip_cache_store.py -h`.
+"""
+import argparse
+from boto.s3.connection import S3Connection
+from boto.exception import S3ResponseError
+import os
+from path import path
+import sys
+import tarfile
+
+
+class S3TarStore():
+    """
+    Static methods for storing directories in S3 in tar.gz form.
+    """
+
+    def __init__(self, *args, **kwargs):
+        self.dirpath = kwargs['dirpath']
+        self.tarpath = kwargs['tarpath']
+        self.bucket_name = kwargs['bucket_name']
+        self.keyname = path(kwargs['bucket_folder']) / self.tarpath.basename()
+
+    @staticmethod
+    def bucket(bucket_name):
+        """
+        Returns bucket matching name. If there exists no such bucket
+        or there is an exception raised, then `None` is returned.
+        """
+        try:
+            conn = S3Connection()
+            bucket = conn.get_bucket(bucket_name)
+        except S3ResponseError:
+            print ( 
+                "Please check that the bucket {} exists and that you have "
+                "the proper credentials to access it.".format(bucket_name)
+            )
+            return None
+        except Exception as e:
+            print (
+                "There was an error while connecting to S3. "
+                "Please check error log for more details."
+            )
+            sys.stderr.write(e.message)
+            return None
+
+        if not bucket:
+            print "No such bucket {}.".format(self.bucket_name)
+
+        return bucket
+
+
+    @staticmethod
+    def download_dir(bucket, tarpath, dirpath, keyname):
+        """
+        Downloads a file matching `keyname` from `bucket`
+        to `tarpath`. It then extracts the tar.gz file into `dirpath`.
+        If no matching `keyname` is found, it does nothing.
+
+        Note that any exceptions that occur while downloading or unpacking
+        will be logged, but not raised.
+        """
+        key = bucket.lookup(keyname)
+        if key:
+            try:
+                print "Downloading contents of {} from S3.".format(keyname)
+                key.get_contents_to_filename(tarpath)
+
+                with tarfile.open(tarpath, mode="r:gz") as tar:
+                    print "Unpacking {} to {}".format(tarpath, dirpath)
+                    tar.extractall(path=dirpath.parent)
+            except Exception as e:
+                print ("Ignored Exception:\n {}".format(e.message))
+        else:
+            print (
+                "Couldn't find anything matching {} in S3 bucket. "
+                "Doing Nothing.".format(keyname)
+            )
+
+    @staticmethod
+    def upload_dir(bucket, tarpath, dirpath, keyname):
+        """
+        Packs the contents of `dirpath` into a tar.gz file named
+        `tarpath.basename()`. It then uploads the tar.gz file to `bucket`
+        as `keyname`. If `dirpath` is not a directory, it does nothing.
+
+        Note that any exceptions that occur while compressing or uploading
+        will be logged, but not raised.
+        # """
+        if dirpath.isdir():
+            try:
+                with tarfile.open(tarpath, "w:gz") as tar:
+                    print "Packing up {} to {}".format(dirpath, tarpath)
+                    tar.add(dirpath, arcname='/')
+
+                print "Uploading {} to S3 bucket.".format(keyname)
+                existing_key = bucket.lookup(keyname)
+                key = existing_key if existing_key else bucket.new_key(keyname)
+                key.set_contents_from_filename(tarpath)
+            except Exception as e:
+                print ("Ignored Exception:\n {}".format(e.message))
+                sys.stderr.write(e.message)
+        else:
+            "Path {} isn't a directory. Doing Nothing.".format(dirname)
+
+    def download(self):
+        """
+        Checks that bucket is available and downloads self.keyname to self.tarpath. 
+        Then extracts self.tarpath to self.dirpath.
+        """
+        bucket = self.bucket(self.bucket_name)
+        if not bucket:
+            return
+        
+        self.download_dir(bucket, self.tarpath, self.dirpath, self.keyname)
+
+    def upload(self):
+        """
+        Checks that bucket is available. Then compresses self.dirpath to self.tarpath
+        and uploads self.tarpath to self.keyname.
+        """
+        bucket = self.bucket(self.bucket_name)
+        if not bucket:
+            return
+        
+        self.upload_dir(bucket, self.tarpath, self.dirpath, self.keyname)
+
+
+def main():
+    """
+    Calls S3TarStore.upload or S3TarStore.download using the command line args.
+    """
+    parser = argparse.ArgumentParser(description='Upload/download tar.gz files to/from S3.')
+    parser.add_argument('action', choices=('upload', 'download'))
+    parser.add_argument('--bucket', '-b', dest='bucket_name', required=True,
+                        help='Name of S3 bucket.')
+    parser.add_argument('--folder', '-f', dest='bucket_folder', required=True,
+                        help='Folder within S3 bucket. (ex. "v1/my-branch-name/")')
+    parser.add_argument('--dir', '-d', dest='dirpath', required=True,
+                        help='Directory to be uploaded from or downloaded to. '
+                        '(ex. "~/.pip/download-cache/")')
+    parser.add_argument('--tar', '-t', dest='tarpath', required=True,
+                        help='Path to place newly created or downloaded tarfile. '
+                        'The basename of this should be the basename of the tarfile '
+                        'stored in S3. (ex. "~/pip-download-cache.tar.gz")')
+    args = parser.parse_args()
+
+    store = S3TarStore(
+        dirpath = path(args.dirpath),
+        tarpath = path(args.tarpath),
+        bucket_name = args.bucket_name,
+        bucket_folder = args.bucket_folder,
+    )
+    
+    if args.action == 'upload':
+        store.upload()
+    elif args.action == 'download':
+        store.download()
+
+
+if __name__ == '__main__':
+    main()