add tests for paver db cache command

2017-12-29 12:05:14 -05:00
parent d75bbf05e4
commit 386f0e7cb7
3 changed files with 224 additions and 62 deletions
--- a/pavelib/database.py
+++ b/pavelib/database.py
@@ -2,16 +2,14 @@
 Tasks for controlling the databases used in tests
 """
 from __future__ import print_function
-import os

-from paver.easy import needs
+from paver.easy import needs, task

 from pavelib.utils.db_utils import (
    remove_files_from_folder, reset_test_db, compute_fingerprint_and_write_to_disk,
    fingerprint_bokchoy_db_files, does_fingerprint_on_disk_match, is_fingerprint_in_bucket,
-    get_file_from_s3, extract_files_from_zip, create_tarfile_from_db_cache, upload_to_s3
+    refresh_bokchoy_db_cache_from_s3, upload_db_cache_to_s3
 )
-from pavelib.utils.passthrough_opts import PassthroughTask
 from pavelib.utils.timer import timed

 # Bokchoy db schema and data fixtures
@@ -36,7 +34,7 @@ CACHE_FOLDER = 'common/test/db_cache'


@needs('pavelib.prereqs.install_prereqs')
-@PassthroughTask
+@task
@timed
 def update_bokchoy_db_cache():
    """
@@ -54,32 +52,49 @@ def update_bokchoy_db_cache():


@needs('pavelib.prereqs.install_prereqs')
-@PassthroughTask
+@task
@timed
 def update_local_bokchoy_db_from_s3():
    """
-    Update the MYSQL database for bokchoy testing:
-    * Determine if your current cache files are up to date
-      with all the migrations
-    * If not then check if there is a copy up at s3
-    * If so then download then extract it
-    * Otherwise apply migrations as usual
+    Prepare the local MYSQL test database for running bokchoy tests. Since
+    most pull requests do not introduce migrations, this task provides
+    an optimization for caching the state of the db when migrations are
+    added into a bucket in s3. Subsequent commits can avoid rerunning
+    migrations by using the cache files from s3, until the local cache files
+    are updated by running the `update_bokchoy_db_cache` Paver task, and
+    committing the updated cache files to github.
+
+    Steps:
+    1. Determine which migrations, if any, need to be applied to your current
+       db cache files to make them up to date
+    2. Compute the sha1 fingerprint of the local db cache files and the output
+       of the migration
+    3a. If the fingerprint computed in step 2 is equal to the local
+        fingerprint file, load the cache files into the MYSQL test database
+    3b. If the fingerprints are not equal, but there is bucket matching the
+        fingerprint computed in step 2, download and extract the contents of
+        bucket (db cache files) and load them into the MYSQL test database
+    3c. If the fingerprints are not equal AND there is no bucket matching the
+        fingerprint computed in step 2, load the local db cache files into
+        the MYSQL test database and apply any needed migrations. Create a
+        bucket in s3 named the fingerprint computed in step 2 and push the
+        newly updated db cache files to the bucket.
+
+    NOTE: the computed fingerprints referenced in this and related functions
+    represent the state of the db cache files and migration output PRIOR
+    to running migrations. The corresponding s3 bucket named for a given
+    fingerprint contains the db cache files AFTER applying migrations
    """
    fingerprint = fingerprint_bokchoy_db_files(MIGRATION_OUTPUT_FILES, ALL_DB_FILES)

    if does_fingerprint_on_disk_match(fingerprint):
        print ("DB cache files match the current migrations.")
-        # TODO: we don't really need to apply migrations, just to
-        # load the db cache files into the database.
        reset_test_db(BOKCHOY_DB_FILES, update_cache_files=False)

    elif is_fingerprint_in_bucket(fingerprint, CACHE_BUCKET_NAME):
        print ("Found updated bokchoy db files at S3.")
-        refresh_bokchoy_db_cache_from_s3(fingerprint=fingerprint)
+        refresh_bokchoy_db_cache_from_s3(fingerprint, CACHE_BUCKET_NAME, BOKCHOY_DB_FILES)
        reset_test_db(BOKCHOY_DB_FILES, update_cache_files=False)
-        # Write the new fingerprint to disk so that it reflects the
-        # current state of the system.
-        compute_fingerprint_and_write_to_disk(MIGRATION_OUTPUT_FILES, ALL_DB_FILES)

    else:
        msg = "{} {} {}".format(
@@ -89,45 +104,15 @@ def update_local_bokchoy_db_from_s3():
        )
        print (msg)
        reset_test_db(BOKCHOY_DB_FILES, update_cache_files=True)
-        # Write the new fingerprint to disk so that it reflects the
-        # current state of the system.
-        # E.g. you could have added a new migration in your PR.
-        compute_fingerprint_and_write_to_disk(MIGRATION_OUTPUT_FILES, ALL_DB_FILES)
-
-
-@needs('pavelib.prereqs.install_prereqs')
-@PassthroughTask
-@timed
-def refresh_bokchoy_db_cache_from_s3(fingerprint=None):
-    """
-    If the cache files for the current fingerprint exist
-    in s3 then replace what you have on disk with those.
-    If no copy exists on s3 then continue without error.
-    """
-    if not fingerprint:
-        fingerprint = fingerprint_bokchoy_db_files(MIGRATION_OUTPUT_FILES, ALL_DB_FILES)
-
-    bucket_name = CACHE_BUCKET_NAME
-    path = CACHE_FOLDER
-    if is_fingerprint_in_bucket(fingerprint, bucket_name):
-        zipfile_name = '{}.tar.gz'.format(fingerprint)
-        get_file_from_s3(bucket_name, zipfile_name, path)
-
-        zipfile_path = os.path.join(path, zipfile_name)
-        print ("Extracting db cache files.")
-        extract_files_from_zip(BOKCHOY_DB_FILES, zipfile_path, path)
-        os.remove(zipfile_path)
-
-
-@needs('pavelib.prereqs.install_prereqs')
-@PassthroughTask
-@timed
-def upload_db_cache_to_s3():
-    """
-    Update the S3 bucket with the bokchoy DB cache files.
-    """
-    fingerprint = fingerprint_bokchoy_db_files(MIGRATION_OUTPUT_FILES, ALL_DB_FILES)
-    zipfile_name, zipfile_path = create_tarfile_from_db_cache(
-        fingerprint, BOKCHOY_DB_FILES, CACHE_FOLDER
-    )
-    upload_to_s3(zipfile_name, zipfile_path, CACHE_BUCKET_NAME)
+        # Check one last time to see if the fingerprint is present in
+        # the s3 bucket. This could occur because the bokchoy job is
+        # sharded and running the same task in parallel
+        if not is_fingerprint_in_bucket(fingerprint, CACHE_BUCKET_NAME):
+            upload_db_cache_to_s3(fingerprint, BOKCHOY_DB_FILES, CACHE_BUCKET_NAME)
+        else:
+            msg = "{} {}. {}".format(
+                "Found a matching fingerprint in bucket ",
+                CACHE_BUCKET_NAME,
+                "Not pushing to s3"
+            )
+            print(msg)
--- a/pavelib/paver_tests/test_database.py
+++ b/pavelib/paver_tests/test_database.py
@@ -8,10 +8,15 @@ import os
 from unittest import TestCase

 import boto
-from mock import patch
+from mock import patch, call

 from common.test.utils import MockS3Mixin
-from pavelib.utils.db_utils import is_fingerprint_in_bucket, extract_files_from_zip
+from pavelib.utils.envs import Env
+from pavelib.utils.db_utils import (
+    is_fingerprint_in_bucket, extract_files_from_zip,
+)
+from pavelib.utils import db_utils
+from pavelib import database


 class TestPaverDbS3Utils(MockS3Mixin, TestCase):
@@ -59,3 +64,149 @@ class TestPaverDbUtils(TestCase):
        with open(extracted_file, 'r') as test_file:
            data = test_file.read()
        assert data == 'Test file content'
+
+
+def _write_temporary_db_cache_files(path, files):
+    """
+    create some temporary files to act as the local db cache files so that
+    we can compute a fingerprint
+    """
+    for index, filename in enumerate(files):
+        filepath = os.path.join(path, filename)
+        with open(filepath, 'w') as cache_file:
+            cache_file.write(str(index))
+
+
+class TestPaverDatabaseTasks(MockS3Mixin, TestCase):
+    """
+    Tests for the high level database tasks
+    """
+
+    def setUp(self):
+        super(TestPaverDatabaseTasks, self).setUp()
+        conn = boto.connect_s3()
+        conn.create_bucket('moto_test_bucket')
+        self.bucket = conn.get_bucket('moto_test_bucket')
+        # This value is the actual sha1 fingerprint calculated for the dummy
+        # files used in these tests
+        self.expected_fingerprint = 'ccaa8d8dcc7d030cd6a6768db81f90d0ef976c3d'
+        self.fingerprint_filename = '{}.tar.gz'.format(self.expected_fingerprint)
+
+    @patch.object(db_utils, 'CACHE_FOLDER', mkdtemp())
+    @patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint'))
+    @patch.object(db_utils, 'sh')
+    def test_load_data_from_local_cache(self, _mock_sh):
+        """
+        Assuming that the computed db cache file fingerprint is the same as
+        the stored fingerprint, verify that we make a call to load data into
+        the database without running migrations
+        """
+        self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER)
+        self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH)
+        _write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES)
+        # write the local fingerprint file with the same value than the
+        # computed fingerprint
+        with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file:
+            fingerprint_file.write(self.expected_fingerprint)
+
+        with patch.object(db_utils, 'get_file_from_s3', wraps=db_utils.get_file_from_s3) as _mock_get_file:
+            database.update_local_bokchoy_db_from_s3()
+            # Make sure that the local cache files are used - NOT downloaded from s3
+            self.assertFalse(_mock_get_file.called)
+        calls = [
+            call('{}/scripts/calculate-bokchoy-migrations.sh'.format(Env.REPO_ROOT)),
+            call('{}/scripts/reset-test-db.sh'.format(Env.REPO_ROOT))
+        ]
+        _mock_sh.assert_has_calls(calls)
+
+    @patch.object(database, 'CACHE_BUCKET_NAME', 'moto_test_bucket')
+    @patch.object(db_utils, 'CACHE_FOLDER', mkdtemp())
+    @patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint'))
+    @patch.object(db_utils, 'sh')
+    def test_load_data_from_s3_fingerprint(self, _mock_sh):
+        """
+        Assuming that the computed db cache file fingerprint is different
+        than the stored fingerprint AND there is a matching fingerprint file
+        in s3, verify that we make a call to load data into the database
+        without running migrations
+        """
+        self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER)
+        self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH)
+        _write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES)
+
+        # zip the temporary files and push them to a moto s3 bucket
+        zipfile_path = os.path.join(db_utils.CACHE_FOLDER, self.fingerprint_filename)
+        with tarfile.open(name=zipfile_path, mode='w:gz') as tar_file:
+            for name in database.ALL_DB_FILES:
+                tar_file.add(os.path.join(db_utils.CACHE_FOLDER, name), arcname=name)
+        key = boto.s3.key.Key(bucket=self.bucket, name=self.fingerprint_filename)
+        key.set_contents_from_filename(zipfile_path, replace=False)
+
+        # write the local fingerprint file with a different value than
+        # the computed fingerprint
+        local_fingerprint = '123456789'
+        with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file:
+            fingerprint_file.write(local_fingerprint)
+
+        with patch.object(db_utils, 'get_file_from_s3', wraps=db_utils.get_file_from_s3) as _mock_get_file:
+            database.update_local_bokchoy_db_from_s3()
+            # Make sure that the fingerprint file is downloaded from s3
+            _mock_get_file.assert_called_once_with(
+                'moto_test_bucket', self.fingerprint_filename, db_utils.CACHE_FOLDER
+            )
+        calls = [
+            call('{}/scripts/calculate-bokchoy-migrations.sh'.format(Env.REPO_ROOT)),
+            call('{}/scripts/reset-test-db.sh'.format(Env.REPO_ROOT))
+        ]
+        _mock_sh.assert_has_calls(calls)
+
+    @patch.object(database, 'CACHE_BUCKET_NAME', 'moto_test_bucket')
+    @patch.object(db_utils, 'CACHE_FOLDER', mkdtemp())
+    @patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint'))
+    @patch.object(db_utils, 'sh')
+    def test_load_data_and_run_migrations(self, _mock_sh):
+        """
+        Assuming that the computed db cache file fingerprint is different
+        than the stored fingerprint AND there is NO matching fingerprint file
+        in s3, verify that we make a call to load data into the database, run
+        migrations and update the local db cache files
+        """
+        self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER)
+        self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH)
+        _write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES)
+
+        # write the local fingerprint file with a different value than
+        # the computed fingerprint
+        local_fingerprint = '123456789'
+        with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file:
+            fingerprint_file.write(local_fingerprint)
+
+        database.update_local_bokchoy_db_from_s3()
+        calls = [
+            call('{}/scripts/calculate-bokchoy-migrations.sh'.format(Env.REPO_ROOT)),
+            call('{}/scripts/reset-test-db.sh --rebuild_cache'.format(Env.REPO_ROOT))
+        ]
+        _mock_sh.assert_has_calls(calls)
+
+    @patch.object(database, 'CACHE_BUCKET_NAME', 'moto_test_bucket')
+    @patch.object(db_utils, 'CACHE_FOLDER', mkdtemp())
+    @patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint'))
+    @patch.object(db_utils, 'sh')
+    def test_updated_db_cache_pushed_to_s3(self, _mock_sh):
+        """
+        Assuming that the computed db cache file fingerprint is different
+        than the stored fingerprint AND there is NO matching fingerprint file
+        in s3, verify that an updated fingeprint file is pushed to s3
+        """
+        self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER)
+        self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH)
+        _write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES)
+
+        # write the local fingerprint file with a different value than
+        # the computed fingerprint
+        local_fingerprint = '123456789'
+        with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file:
+            fingerprint_file.write(local_fingerprint)
+
+        database.update_local_bokchoy_db_from_s3()
+        self.assertTrue(self.bucket.get_key(self.fingerprint_filename))
--- a/pavelib/utils/db_utils.py
+++ b/pavelib/utils/db_utils.py
@@ -174,6 +174,22 @@ def extract_files_from_zip(files, zipfile_path, to_path):
    verify_files_exist(files)


+def refresh_bokchoy_db_cache_from_s3(fingerprint, bucket_name, bokchoy_db_files):
+    """
+    If the cache files for the current fingerprint exist
+    in s3 then replace what you have on disk with those.
+    If no copy exists on s3 then continue without error.
+    """
+    path = CACHE_FOLDER
+    if is_fingerprint_in_bucket(fingerprint, bucket_name):
+        zipfile_name = '{}.tar.gz'.format(fingerprint)
+        get_file_from_s3(bucket_name, zipfile_name, path)
+        zipfile_path = os.path.join(path, zipfile_name)
+        print ("Extracting db cache files.")
+        extract_files_from_zip(bokchoy_db_files, zipfile_path, path)
+        os.remove(zipfile_path)
+
+
 def create_tarfile_from_db_cache(fingerprint, files, path):
    """
    Create a tar.gz file with the current bokchoy DB cache files.
@@ -200,3 +216,13 @@ def upload_to_s3(file_name, file_path, bucket_name):
    else:
        msg = "File {} already existed in bucket {}.".format(key.name, bucket_name)
    print (msg)
+
+
+def upload_db_cache_to_s3(fingerprint, bokchoy_db_files, bucket_name):
+    """
+    Update the S3 bucket with the bokchoy DB cache files.
+    """
+    zipfile_name, zipfile_path = create_tarfile_from_db_cache(
+        fingerprint, bokchoy_db_files, CACHE_FOLDER
+    )
+    upload_to_s3(zipfile_name, zipfile_path, bucket_name)