diff --git a/pavelib/database.py b/pavelib/database.py index 1f15765fe3..dfae423c97 100644 --- a/pavelib/database.py +++ b/pavelib/database.py @@ -2,16 +2,14 @@ Tasks for controlling the databases used in tests """ from __future__ import print_function -import os -from paver.easy import needs +from paver.easy import needs, task from pavelib.utils.db_utils import ( remove_files_from_folder, reset_test_db, compute_fingerprint_and_write_to_disk, fingerprint_bokchoy_db_files, does_fingerprint_on_disk_match, is_fingerprint_in_bucket, - get_file_from_s3, extract_files_from_zip, create_tarfile_from_db_cache, upload_to_s3 + refresh_bokchoy_db_cache_from_s3, upload_db_cache_to_s3 ) -from pavelib.utils.passthrough_opts import PassthroughTask from pavelib.utils.timer import timed # Bokchoy db schema and data fixtures @@ -36,7 +34,7 @@ CACHE_FOLDER = 'common/test/db_cache' @needs('pavelib.prereqs.install_prereqs') -@PassthroughTask +@task @timed def update_bokchoy_db_cache(): """ @@ -54,32 +52,49 @@ def update_bokchoy_db_cache(): @needs('pavelib.prereqs.install_prereqs') -@PassthroughTask +@task @timed def update_local_bokchoy_db_from_s3(): """ - Update the MYSQL database for bokchoy testing: - * Determine if your current cache files are up to date - with all the migrations - * If not then check if there is a copy up at s3 - * If so then download then extract it - * Otherwise apply migrations as usual + Prepare the local MYSQL test database for running bokchoy tests. Since + most pull requests do not introduce migrations, this task provides + an optimization for caching the state of the db when migrations are + added into a bucket in s3. Subsequent commits can avoid rerunning + migrations by using the cache files from s3, until the local cache files + are updated by running the `update_bokchoy_db_cache` Paver task, and + committing the updated cache files to github. + + Steps: + 1. Determine which migrations, if any, need to be applied to your current + db cache files to make them up to date + 2. Compute the sha1 fingerprint of the local db cache files and the output + of the migration + 3a. If the fingerprint computed in step 2 is equal to the local + fingerprint file, load the cache files into the MYSQL test database + 3b. If the fingerprints are not equal, but there is bucket matching the + fingerprint computed in step 2, download and extract the contents of + bucket (db cache files) and load them into the MYSQL test database + 3c. If the fingerprints are not equal AND there is no bucket matching the + fingerprint computed in step 2, load the local db cache files into + the MYSQL test database and apply any needed migrations. Create a + bucket in s3 named the fingerprint computed in step 2 and push the + newly updated db cache files to the bucket. + + NOTE: the computed fingerprints referenced in this and related functions + represent the state of the db cache files and migration output PRIOR + to running migrations. The corresponding s3 bucket named for a given + fingerprint contains the db cache files AFTER applying migrations """ fingerprint = fingerprint_bokchoy_db_files(MIGRATION_OUTPUT_FILES, ALL_DB_FILES) if does_fingerprint_on_disk_match(fingerprint): print ("DB cache files match the current migrations.") - # TODO: we don't really need to apply migrations, just to - # load the db cache files into the database. reset_test_db(BOKCHOY_DB_FILES, update_cache_files=False) elif is_fingerprint_in_bucket(fingerprint, CACHE_BUCKET_NAME): print ("Found updated bokchoy db files at S3.") - refresh_bokchoy_db_cache_from_s3(fingerprint=fingerprint) + refresh_bokchoy_db_cache_from_s3(fingerprint, CACHE_BUCKET_NAME, BOKCHOY_DB_FILES) reset_test_db(BOKCHOY_DB_FILES, update_cache_files=False) - # Write the new fingerprint to disk so that it reflects the - # current state of the system. - compute_fingerprint_and_write_to_disk(MIGRATION_OUTPUT_FILES, ALL_DB_FILES) else: msg = "{} {} {}".format( @@ -89,45 +104,15 @@ def update_local_bokchoy_db_from_s3(): ) print (msg) reset_test_db(BOKCHOY_DB_FILES, update_cache_files=True) - # Write the new fingerprint to disk so that it reflects the - # current state of the system. - # E.g. you could have added a new migration in your PR. - compute_fingerprint_and_write_to_disk(MIGRATION_OUTPUT_FILES, ALL_DB_FILES) - - -@needs('pavelib.prereqs.install_prereqs') -@PassthroughTask -@timed -def refresh_bokchoy_db_cache_from_s3(fingerprint=None): - """ - If the cache files for the current fingerprint exist - in s3 then replace what you have on disk with those. - If no copy exists on s3 then continue without error. - """ - if not fingerprint: - fingerprint = fingerprint_bokchoy_db_files(MIGRATION_OUTPUT_FILES, ALL_DB_FILES) - - bucket_name = CACHE_BUCKET_NAME - path = CACHE_FOLDER - if is_fingerprint_in_bucket(fingerprint, bucket_name): - zipfile_name = '{}.tar.gz'.format(fingerprint) - get_file_from_s3(bucket_name, zipfile_name, path) - - zipfile_path = os.path.join(path, zipfile_name) - print ("Extracting db cache files.") - extract_files_from_zip(BOKCHOY_DB_FILES, zipfile_path, path) - os.remove(zipfile_path) - - -@needs('pavelib.prereqs.install_prereqs') -@PassthroughTask -@timed -def upload_db_cache_to_s3(): - """ - Update the S3 bucket with the bokchoy DB cache files. - """ - fingerprint = fingerprint_bokchoy_db_files(MIGRATION_OUTPUT_FILES, ALL_DB_FILES) - zipfile_name, zipfile_path = create_tarfile_from_db_cache( - fingerprint, BOKCHOY_DB_FILES, CACHE_FOLDER - ) - upload_to_s3(zipfile_name, zipfile_path, CACHE_BUCKET_NAME) + # Check one last time to see if the fingerprint is present in + # the s3 bucket. This could occur because the bokchoy job is + # sharded and running the same task in parallel + if not is_fingerprint_in_bucket(fingerprint, CACHE_BUCKET_NAME): + upload_db_cache_to_s3(fingerprint, BOKCHOY_DB_FILES, CACHE_BUCKET_NAME) + else: + msg = "{} {}. {}".format( + "Found a matching fingerprint in bucket ", + CACHE_BUCKET_NAME, + "Not pushing to s3" + ) + print(msg) diff --git a/pavelib/paver_tests/test_database.py b/pavelib/paver_tests/test_database.py index 6fdc284990..3ec5af6edf 100644 --- a/pavelib/paver_tests/test_database.py +++ b/pavelib/paver_tests/test_database.py @@ -8,10 +8,15 @@ import os from unittest import TestCase import boto -from mock import patch +from mock import patch, call from common.test.utils import MockS3Mixin -from pavelib.utils.db_utils import is_fingerprint_in_bucket, extract_files_from_zip +from pavelib.utils.envs import Env +from pavelib.utils.db_utils import ( + is_fingerprint_in_bucket, extract_files_from_zip, +) +from pavelib.utils import db_utils +from pavelib import database class TestPaverDbS3Utils(MockS3Mixin, TestCase): @@ -59,3 +64,149 @@ class TestPaverDbUtils(TestCase): with open(extracted_file, 'r') as test_file: data = test_file.read() assert data == 'Test file content' + + +def _write_temporary_db_cache_files(path, files): + """ + create some temporary files to act as the local db cache files so that + we can compute a fingerprint + """ + for index, filename in enumerate(files): + filepath = os.path.join(path, filename) + with open(filepath, 'w') as cache_file: + cache_file.write(str(index)) + + +class TestPaverDatabaseTasks(MockS3Mixin, TestCase): + """ + Tests for the high level database tasks + """ + + def setUp(self): + super(TestPaverDatabaseTasks, self).setUp() + conn = boto.connect_s3() + conn.create_bucket('moto_test_bucket') + self.bucket = conn.get_bucket('moto_test_bucket') + # This value is the actual sha1 fingerprint calculated for the dummy + # files used in these tests + self.expected_fingerprint = 'ccaa8d8dcc7d030cd6a6768db81f90d0ef976c3d' + self.fingerprint_filename = '{}.tar.gz'.format(self.expected_fingerprint) + + @patch.object(db_utils, 'CACHE_FOLDER', mkdtemp()) + @patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint')) + @patch.object(db_utils, 'sh') + def test_load_data_from_local_cache(self, _mock_sh): + """ + Assuming that the computed db cache file fingerprint is the same as + the stored fingerprint, verify that we make a call to load data into + the database without running migrations + """ + self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER) + self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH) + _write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES) + # write the local fingerprint file with the same value than the + # computed fingerprint + with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file: + fingerprint_file.write(self.expected_fingerprint) + + with patch.object(db_utils, 'get_file_from_s3', wraps=db_utils.get_file_from_s3) as _mock_get_file: + database.update_local_bokchoy_db_from_s3() + # Make sure that the local cache files are used - NOT downloaded from s3 + self.assertFalse(_mock_get_file.called) + calls = [ + call('{}/scripts/calculate-bokchoy-migrations.sh'.format(Env.REPO_ROOT)), + call('{}/scripts/reset-test-db.sh'.format(Env.REPO_ROOT)) + ] + _mock_sh.assert_has_calls(calls) + + @patch.object(database, 'CACHE_BUCKET_NAME', 'moto_test_bucket') + @patch.object(db_utils, 'CACHE_FOLDER', mkdtemp()) + @patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint')) + @patch.object(db_utils, 'sh') + def test_load_data_from_s3_fingerprint(self, _mock_sh): + """ + Assuming that the computed db cache file fingerprint is different + than the stored fingerprint AND there is a matching fingerprint file + in s3, verify that we make a call to load data into the database + without running migrations + """ + self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER) + self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH) + _write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES) + + # zip the temporary files and push them to a moto s3 bucket + zipfile_path = os.path.join(db_utils.CACHE_FOLDER, self.fingerprint_filename) + with tarfile.open(name=zipfile_path, mode='w:gz') as tar_file: + for name in database.ALL_DB_FILES: + tar_file.add(os.path.join(db_utils.CACHE_FOLDER, name), arcname=name) + key = boto.s3.key.Key(bucket=self.bucket, name=self.fingerprint_filename) + key.set_contents_from_filename(zipfile_path, replace=False) + + # write the local fingerprint file with a different value than + # the computed fingerprint + local_fingerprint = '123456789' + with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file: + fingerprint_file.write(local_fingerprint) + + with patch.object(db_utils, 'get_file_from_s3', wraps=db_utils.get_file_from_s3) as _mock_get_file: + database.update_local_bokchoy_db_from_s3() + # Make sure that the fingerprint file is downloaded from s3 + _mock_get_file.assert_called_once_with( + 'moto_test_bucket', self.fingerprint_filename, db_utils.CACHE_FOLDER + ) + calls = [ + call('{}/scripts/calculate-bokchoy-migrations.sh'.format(Env.REPO_ROOT)), + call('{}/scripts/reset-test-db.sh'.format(Env.REPO_ROOT)) + ] + _mock_sh.assert_has_calls(calls) + + @patch.object(database, 'CACHE_BUCKET_NAME', 'moto_test_bucket') + @patch.object(db_utils, 'CACHE_FOLDER', mkdtemp()) + @patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint')) + @patch.object(db_utils, 'sh') + def test_load_data_and_run_migrations(self, _mock_sh): + """ + Assuming that the computed db cache file fingerprint is different + than the stored fingerprint AND there is NO matching fingerprint file + in s3, verify that we make a call to load data into the database, run + migrations and update the local db cache files + """ + self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER) + self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH) + _write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES) + + # write the local fingerprint file with a different value than + # the computed fingerprint + local_fingerprint = '123456789' + with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file: + fingerprint_file.write(local_fingerprint) + + database.update_local_bokchoy_db_from_s3() + calls = [ + call('{}/scripts/calculate-bokchoy-migrations.sh'.format(Env.REPO_ROOT)), + call('{}/scripts/reset-test-db.sh --rebuild_cache'.format(Env.REPO_ROOT)) + ] + _mock_sh.assert_has_calls(calls) + + @patch.object(database, 'CACHE_BUCKET_NAME', 'moto_test_bucket') + @patch.object(db_utils, 'CACHE_FOLDER', mkdtemp()) + @patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint')) + @patch.object(db_utils, 'sh') + def test_updated_db_cache_pushed_to_s3(self, _mock_sh): + """ + Assuming that the computed db cache file fingerprint is different + than the stored fingerprint AND there is NO matching fingerprint file + in s3, verify that an updated fingeprint file is pushed to s3 + """ + self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER) + self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH) + _write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES) + + # write the local fingerprint file with a different value than + # the computed fingerprint + local_fingerprint = '123456789' + with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file: + fingerprint_file.write(local_fingerprint) + + database.update_local_bokchoy_db_from_s3() + self.assertTrue(self.bucket.get_key(self.fingerprint_filename)) diff --git a/pavelib/utils/db_utils.py b/pavelib/utils/db_utils.py index a3aae3698b..e0fecede98 100644 --- a/pavelib/utils/db_utils.py +++ b/pavelib/utils/db_utils.py @@ -174,6 +174,22 @@ def extract_files_from_zip(files, zipfile_path, to_path): verify_files_exist(files) +def refresh_bokchoy_db_cache_from_s3(fingerprint, bucket_name, bokchoy_db_files): + """ + If the cache files for the current fingerprint exist + in s3 then replace what you have on disk with those. + If no copy exists on s3 then continue without error. + """ + path = CACHE_FOLDER + if is_fingerprint_in_bucket(fingerprint, bucket_name): + zipfile_name = '{}.tar.gz'.format(fingerprint) + get_file_from_s3(bucket_name, zipfile_name, path) + zipfile_path = os.path.join(path, zipfile_name) + print ("Extracting db cache files.") + extract_files_from_zip(bokchoy_db_files, zipfile_path, path) + os.remove(zipfile_path) + + def create_tarfile_from_db_cache(fingerprint, files, path): """ Create a tar.gz file with the current bokchoy DB cache files. @@ -200,3 +216,13 @@ def upload_to_s3(file_name, file_path, bucket_name): else: msg = "File {} already existed in bucket {}.".format(key.name, bucket_name) print (msg) + + +def upload_db_cache_to_s3(fingerprint, bokchoy_db_files, bucket_name): + """ + Update the S3 bucket with the bokchoy DB cache files. + """ + zipfile_name, zipfile_path = create_tarfile_from_db_cache( + fingerprint, bokchoy_db_files, CACHE_FOLDER + ) + upload_to_s3(zipfile_name, zipfile_path, bucket_name)