add tests for paver db cache command

This commit is contained in:
Stuart Young
2017-12-29 12:05:14 -05:00
parent d75bbf05e4
commit 386f0e7cb7
3 changed files with 224 additions and 62 deletions

View File

@@ -2,16 +2,14 @@
Tasks for controlling the databases used in tests
"""
from __future__ import print_function
import os
from paver.easy import needs
from paver.easy import needs, task
from pavelib.utils.db_utils import (
remove_files_from_folder, reset_test_db, compute_fingerprint_and_write_to_disk,
fingerprint_bokchoy_db_files, does_fingerprint_on_disk_match, is_fingerprint_in_bucket,
get_file_from_s3, extract_files_from_zip, create_tarfile_from_db_cache, upload_to_s3
refresh_bokchoy_db_cache_from_s3, upload_db_cache_to_s3
)
from pavelib.utils.passthrough_opts import PassthroughTask
from pavelib.utils.timer import timed
# Bokchoy db schema and data fixtures
@@ -36,7 +34,7 @@ CACHE_FOLDER = 'common/test/db_cache'
@needs('pavelib.prereqs.install_prereqs')
@PassthroughTask
@task
@timed
def update_bokchoy_db_cache():
"""
@@ -54,32 +52,49 @@ def update_bokchoy_db_cache():
@needs('pavelib.prereqs.install_prereqs')
@PassthroughTask
@task
@timed
def update_local_bokchoy_db_from_s3():
"""
Update the MYSQL database for bokchoy testing:
* Determine if your current cache files are up to date
with all the migrations
* If not then check if there is a copy up at s3
* If so then download then extract it
* Otherwise apply migrations as usual
Prepare the local MYSQL test database for running bokchoy tests. Since
most pull requests do not introduce migrations, this task provides
an optimization for caching the state of the db when migrations are
added into a bucket in s3. Subsequent commits can avoid rerunning
migrations by using the cache files from s3, until the local cache files
are updated by running the `update_bokchoy_db_cache` Paver task, and
committing the updated cache files to github.
Steps:
1. Determine which migrations, if any, need to be applied to your current
db cache files to make them up to date
2. Compute the sha1 fingerprint of the local db cache files and the output
of the migration
3a. If the fingerprint computed in step 2 is equal to the local
fingerprint file, load the cache files into the MYSQL test database
3b. If the fingerprints are not equal, but there is bucket matching the
fingerprint computed in step 2, download and extract the contents of
bucket (db cache files) and load them into the MYSQL test database
3c. If the fingerprints are not equal AND there is no bucket matching the
fingerprint computed in step 2, load the local db cache files into
the MYSQL test database and apply any needed migrations. Create a
bucket in s3 named the fingerprint computed in step 2 and push the
newly updated db cache files to the bucket.
NOTE: the computed fingerprints referenced in this and related functions
represent the state of the db cache files and migration output PRIOR
to running migrations. The corresponding s3 bucket named for a given
fingerprint contains the db cache files AFTER applying migrations
"""
fingerprint = fingerprint_bokchoy_db_files(MIGRATION_OUTPUT_FILES, ALL_DB_FILES)
if does_fingerprint_on_disk_match(fingerprint):
print ("DB cache files match the current migrations.")
# TODO: we don't really need to apply migrations, just to
# load the db cache files into the database.
reset_test_db(BOKCHOY_DB_FILES, update_cache_files=False)
elif is_fingerprint_in_bucket(fingerprint, CACHE_BUCKET_NAME):
print ("Found updated bokchoy db files at S3.")
refresh_bokchoy_db_cache_from_s3(fingerprint=fingerprint)
refresh_bokchoy_db_cache_from_s3(fingerprint, CACHE_BUCKET_NAME, BOKCHOY_DB_FILES)
reset_test_db(BOKCHOY_DB_FILES, update_cache_files=False)
# Write the new fingerprint to disk so that it reflects the
# current state of the system.
compute_fingerprint_and_write_to_disk(MIGRATION_OUTPUT_FILES, ALL_DB_FILES)
else:
msg = "{} {} {}".format(
@@ -89,45 +104,15 @@ def update_local_bokchoy_db_from_s3():
)
print (msg)
reset_test_db(BOKCHOY_DB_FILES, update_cache_files=True)
# Write the new fingerprint to disk so that it reflects the
# current state of the system.
# E.g. you could have added a new migration in your PR.
compute_fingerprint_and_write_to_disk(MIGRATION_OUTPUT_FILES, ALL_DB_FILES)
@needs('pavelib.prereqs.install_prereqs')
@PassthroughTask
@timed
def refresh_bokchoy_db_cache_from_s3(fingerprint=None):
"""
If the cache files for the current fingerprint exist
in s3 then replace what you have on disk with those.
If no copy exists on s3 then continue without error.
"""
if not fingerprint:
fingerprint = fingerprint_bokchoy_db_files(MIGRATION_OUTPUT_FILES, ALL_DB_FILES)
bucket_name = CACHE_BUCKET_NAME
path = CACHE_FOLDER
if is_fingerprint_in_bucket(fingerprint, bucket_name):
zipfile_name = '{}.tar.gz'.format(fingerprint)
get_file_from_s3(bucket_name, zipfile_name, path)
zipfile_path = os.path.join(path, zipfile_name)
print ("Extracting db cache files.")
extract_files_from_zip(BOKCHOY_DB_FILES, zipfile_path, path)
os.remove(zipfile_path)
@needs('pavelib.prereqs.install_prereqs')
@PassthroughTask
@timed
def upload_db_cache_to_s3():
"""
Update the S3 bucket with the bokchoy DB cache files.
"""
fingerprint = fingerprint_bokchoy_db_files(MIGRATION_OUTPUT_FILES, ALL_DB_FILES)
zipfile_name, zipfile_path = create_tarfile_from_db_cache(
fingerprint, BOKCHOY_DB_FILES, CACHE_FOLDER
)
upload_to_s3(zipfile_name, zipfile_path, CACHE_BUCKET_NAME)
# Check one last time to see if the fingerprint is present in
# the s3 bucket. This could occur because the bokchoy job is
# sharded and running the same task in parallel
if not is_fingerprint_in_bucket(fingerprint, CACHE_BUCKET_NAME):
upload_db_cache_to_s3(fingerprint, BOKCHOY_DB_FILES, CACHE_BUCKET_NAME)
else:
msg = "{} {}. {}".format(
"Found a matching fingerprint in bucket ",
CACHE_BUCKET_NAME,
"Not pushing to s3"
)
print(msg)

View File

@@ -8,10 +8,15 @@ import os
from unittest import TestCase
import boto
from mock import patch
from mock import patch, call
from common.test.utils import MockS3Mixin
from pavelib.utils.db_utils import is_fingerprint_in_bucket, extract_files_from_zip
from pavelib.utils.envs import Env
from pavelib.utils.db_utils import (
is_fingerprint_in_bucket, extract_files_from_zip,
)
from pavelib.utils import db_utils
from pavelib import database
class TestPaverDbS3Utils(MockS3Mixin, TestCase):
@@ -59,3 +64,149 @@ class TestPaverDbUtils(TestCase):
with open(extracted_file, 'r') as test_file:
data = test_file.read()
assert data == 'Test file content'
def _write_temporary_db_cache_files(path, files):
"""
create some temporary files to act as the local db cache files so that
we can compute a fingerprint
"""
for index, filename in enumerate(files):
filepath = os.path.join(path, filename)
with open(filepath, 'w') as cache_file:
cache_file.write(str(index))
class TestPaverDatabaseTasks(MockS3Mixin, TestCase):
"""
Tests for the high level database tasks
"""
def setUp(self):
super(TestPaverDatabaseTasks, self).setUp()
conn = boto.connect_s3()
conn.create_bucket('moto_test_bucket')
self.bucket = conn.get_bucket('moto_test_bucket')
# This value is the actual sha1 fingerprint calculated for the dummy
# files used in these tests
self.expected_fingerprint = 'ccaa8d8dcc7d030cd6a6768db81f90d0ef976c3d'
self.fingerprint_filename = '{}.tar.gz'.format(self.expected_fingerprint)
@patch.object(db_utils, 'CACHE_FOLDER', mkdtemp())
@patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint'))
@patch.object(db_utils, 'sh')
def test_load_data_from_local_cache(self, _mock_sh):
"""
Assuming that the computed db cache file fingerprint is the same as
the stored fingerprint, verify that we make a call to load data into
the database without running migrations
"""
self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER)
self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH)
_write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES)
# write the local fingerprint file with the same value than the
# computed fingerprint
with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file:
fingerprint_file.write(self.expected_fingerprint)
with patch.object(db_utils, 'get_file_from_s3', wraps=db_utils.get_file_from_s3) as _mock_get_file:
database.update_local_bokchoy_db_from_s3()
# Make sure that the local cache files are used - NOT downloaded from s3
self.assertFalse(_mock_get_file.called)
calls = [
call('{}/scripts/calculate-bokchoy-migrations.sh'.format(Env.REPO_ROOT)),
call('{}/scripts/reset-test-db.sh'.format(Env.REPO_ROOT))
]
_mock_sh.assert_has_calls(calls)
@patch.object(database, 'CACHE_BUCKET_NAME', 'moto_test_bucket')
@patch.object(db_utils, 'CACHE_FOLDER', mkdtemp())
@patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint'))
@patch.object(db_utils, 'sh')
def test_load_data_from_s3_fingerprint(self, _mock_sh):
"""
Assuming that the computed db cache file fingerprint is different
than the stored fingerprint AND there is a matching fingerprint file
in s3, verify that we make a call to load data into the database
without running migrations
"""
self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER)
self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH)
_write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES)
# zip the temporary files and push them to a moto s3 bucket
zipfile_path = os.path.join(db_utils.CACHE_FOLDER, self.fingerprint_filename)
with tarfile.open(name=zipfile_path, mode='w:gz') as tar_file:
for name in database.ALL_DB_FILES:
tar_file.add(os.path.join(db_utils.CACHE_FOLDER, name), arcname=name)
key = boto.s3.key.Key(bucket=self.bucket, name=self.fingerprint_filename)
key.set_contents_from_filename(zipfile_path, replace=False)
# write the local fingerprint file with a different value than
# the computed fingerprint
local_fingerprint = '123456789'
with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file:
fingerprint_file.write(local_fingerprint)
with patch.object(db_utils, 'get_file_from_s3', wraps=db_utils.get_file_from_s3) as _mock_get_file:
database.update_local_bokchoy_db_from_s3()
# Make sure that the fingerprint file is downloaded from s3
_mock_get_file.assert_called_once_with(
'moto_test_bucket', self.fingerprint_filename, db_utils.CACHE_FOLDER
)
calls = [
call('{}/scripts/calculate-bokchoy-migrations.sh'.format(Env.REPO_ROOT)),
call('{}/scripts/reset-test-db.sh'.format(Env.REPO_ROOT))
]
_mock_sh.assert_has_calls(calls)
@patch.object(database, 'CACHE_BUCKET_NAME', 'moto_test_bucket')
@patch.object(db_utils, 'CACHE_FOLDER', mkdtemp())
@patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint'))
@patch.object(db_utils, 'sh')
def test_load_data_and_run_migrations(self, _mock_sh):
"""
Assuming that the computed db cache file fingerprint is different
than the stored fingerprint AND there is NO matching fingerprint file
in s3, verify that we make a call to load data into the database, run
migrations and update the local db cache files
"""
self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER)
self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH)
_write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES)
# write the local fingerprint file with a different value than
# the computed fingerprint
local_fingerprint = '123456789'
with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file:
fingerprint_file.write(local_fingerprint)
database.update_local_bokchoy_db_from_s3()
calls = [
call('{}/scripts/calculate-bokchoy-migrations.sh'.format(Env.REPO_ROOT)),
call('{}/scripts/reset-test-db.sh --rebuild_cache'.format(Env.REPO_ROOT))
]
_mock_sh.assert_has_calls(calls)
@patch.object(database, 'CACHE_BUCKET_NAME', 'moto_test_bucket')
@patch.object(db_utils, 'CACHE_FOLDER', mkdtemp())
@patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint'))
@patch.object(db_utils, 'sh')
def test_updated_db_cache_pushed_to_s3(self, _mock_sh):
"""
Assuming that the computed db cache file fingerprint is different
than the stored fingerprint AND there is NO matching fingerprint file
in s3, verify that an updated fingeprint file is pushed to s3
"""
self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER)
self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH)
_write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES)
# write the local fingerprint file with a different value than
# the computed fingerprint
local_fingerprint = '123456789'
with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file:
fingerprint_file.write(local_fingerprint)
database.update_local_bokchoy_db_from_s3()
self.assertTrue(self.bucket.get_key(self.fingerprint_filename))

View File

@@ -174,6 +174,22 @@ def extract_files_from_zip(files, zipfile_path, to_path):
verify_files_exist(files)
def refresh_bokchoy_db_cache_from_s3(fingerprint, bucket_name, bokchoy_db_files):
"""
If the cache files for the current fingerprint exist
in s3 then replace what you have on disk with those.
If no copy exists on s3 then continue without error.
"""
path = CACHE_FOLDER
if is_fingerprint_in_bucket(fingerprint, bucket_name):
zipfile_name = '{}.tar.gz'.format(fingerprint)
get_file_from_s3(bucket_name, zipfile_name, path)
zipfile_path = os.path.join(path, zipfile_name)
print ("Extracting db cache files.")
extract_files_from_zip(bokchoy_db_files, zipfile_path, path)
os.remove(zipfile_path)
def create_tarfile_from_db_cache(fingerprint, files, path):
"""
Create a tar.gz file with the current bokchoy DB cache files.
@@ -200,3 +216,13 @@ def upload_to_s3(file_name, file_path, bucket_name):
else:
msg = "File {} already existed in bucket {}.".format(key.name, bucket_name)
print (msg)
def upload_db_cache_to_s3(fingerprint, bokchoy_db_files, bucket_name):
"""
Update the S3 bucket with the bokchoy DB cache files.
"""
zipfile_name, zipfile_path = create_tarfile_from_db_cache(
fingerprint, bokchoy_db_files, CACHE_FOLDER
)
upload_to_s3(zipfile_name, zipfile_path, bucket_name)