fix: remove bokchoy db cache uploader paver tasks (#28841)
This commit is contained in:
@@ -3,4 +3,4 @@ paver commands
|
||||
"""
|
||||
|
||||
|
||||
from . import assets, bok_choy, database, docs, i18n, js_test, prereqs, quality, servers, tests
|
||||
from . import assets, bok_choy, docs, i18n, js_test, prereqs, quality, servers, tests
|
||||
|
||||
@@ -1,136 +0,0 @@
|
||||
"""
|
||||
Tasks for controlling the databases used in tests
|
||||
"""
|
||||
|
||||
|
||||
from paver.easy import cmdopts, needs, task
|
||||
|
||||
from pavelib.utils.db_utils import (
|
||||
compute_fingerprint_and_write_to_disk,
|
||||
does_fingerprint_on_disk_match,
|
||||
fingerprint_bokchoy_db_files,
|
||||
is_fingerprint_in_bucket,
|
||||
refresh_bokchoy_db_cache_from_s3,
|
||||
remove_files_from_folder,
|
||||
reset_test_db,
|
||||
upload_db_cache_to_s3
|
||||
)
|
||||
from pavelib.utils.timer import timed
|
||||
|
||||
# Bokchoy db schema and data fixtures
|
||||
BOKCHOY_DB_FILES = [
|
||||
'bok_choy_data_default.json',
|
||||
'bok_choy_data_student_module_history.json',
|
||||
'bok_choy_migrations_data_default.sql',
|
||||
'bok_choy_migrations_data_student_module_history.sql',
|
||||
'bok_choy_schema_default.sql',
|
||||
'bok_choy_schema_student_module_history.sql'
|
||||
]
|
||||
|
||||
# Output files from scripts/reset-test-db.sh --calculate_migrations
|
||||
MIGRATION_OUTPUT_FILES = [
|
||||
'bok_choy_default_migrations.yaml',
|
||||
'bok_choy_student_module_history_migrations.yaml'
|
||||
]
|
||||
|
||||
ALL_DB_FILES = BOKCHOY_DB_FILES + MIGRATION_OUTPUT_FILES
|
||||
CACHE_BUCKET_NAME = 'edx-tools-database-caches'
|
||||
CACHE_FOLDER = 'common/test/db_cache'
|
||||
|
||||
|
||||
@needs('pavelib.prereqs.install_prereqs')
|
||||
@task
|
||||
@timed
|
||||
def update_bokchoy_db_cache():
|
||||
"""
|
||||
Update and cache the MYSQL database for bokchoy testing:
|
||||
* Remove any previously cached database files
|
||||
* Apply migrations on a fresh db
|
||||
* Write the collective sha1 checksum for all of these files to disk
|
||||
|
||||
WARNING: This will take several minutes.
|
||||
"""
|
||||
print('Removing cached db files for bokchoy tests')
|
||||
remove_files_from_folder(BOKCHOY_DB_FILES, CACHE_FOLDER)
|
||||
reset_test_db(BOKCHOY_DB_FILES, update_cache_files=True)
|
||||
compute_fingerprint_and_write_to_disk(MIGRATION_OUTPUT_FILES, ALL_DB_FILES)
|
||||
|
||||
|
||||
@needs('pavelib.prereqs.install_prereqs')
|
||||
@task
|
||||
@timed
|
||||
@cmdopts([
|
||||
("rewrite_fingerprint", None, "Optional flag that will write the new sha1 fingerprint to disk")
|
||||
])
|
||||
def update_local_bokchoy_db_from_s3(options):
|
||||
"""
|
||||
Prepare the local MYSQL test database for running bokchoy tests. Since
|
||||
most pull requests do not introduce migrations, this task provides
|
||||
an optimization for caching the state of the db when migrations are
|
||||
added into a bucket in s3. Subsequent commits can avoid rerunning
|
||||
migrations by using the cache files from s3, until the local cache files
|
||||
are updated by running the `update_bokchoy_db_cache` Paver task, and
|
||||
committing the updated cache files to github.
|
||||
|
||||
Steps:
|
||||
1. Determine which migrations, if any, need to be applied to your current
|
||||
db cache files to make them up to date
|
||||
2. Compute the sha1 fingerprint of the local db cache files and the output
|
||||
of the migration
|
||||
3a. If the fingerprint computed in step 2 is equal to the local
|
||||
fingerprint file, load the cache files into the MYSQL test database
|
||||
3b. If the fingerprints are not equal, but there is bucket matching the
|
||||
fingerprint computed in step 2, download and extract the contents of
|
||||
bucket (db cache files) and load them into the MYSQL test database
|
||||
3c. If the fingerprints are not equal AND there is no bucket matching the
|
||||
fingerprint computed in step 2, load the local db cache files into
|
||||
the MYSQL test database and apply any needed migrations. Create a
|
||||
bucket in s3 named the fingerprint computed in step 2 and push the
|
||||
newly updated db cache files to the bucket.
|
||||
|
||||
NOTE: the computed fingerprints referenced in this and related functions
|
||||
represent the state of the db cache files and migration output PRIOR
|
||||
to running migrations. The corresponding s3 bucket named for a given
|
||||
fingerprint contains the db cache files AFTER applying migrations
|
||||
"""
|
||||
fingerprint = fingerprint_bokchoy_db_files(MIGRATION_OUTPUT_FILES, ALL_DB_FILES)
|
||||
fingerprints_match = does_fingerprint_on_disk_match(fingerprint)
|
||||
|
||||
# Calculating the fingerprint already reset the DB, so we don't need to
|
||||
# do it again (hence use_existing_db=True below)
|
||||
if fingerprints_match:
|
||||
print("DB cache files match the current migrations.")
|
||||
reset_test_db(BOKCHOY_DB_FILES, update_cache_files=False, use_existing_db=True)
|
||||
|
||||
elif is_fingerprint_in_bucket(fingerprint, CACHE_BUCKET_NAME):
|
||||
print("Found updated bokchoy db files at S3.")
|
||||
refresh_bokchoy_db_cache_from_s3(fingerprint, CACHE_BUCKET_NAME, BOKCHOY_DB_FILES)
|
||||
reset_test_db(BOKCHOY_DB_FILES, update_cache_files=False, use_existing_db=True)
|
||||
|
||||
else:
|
||||
msg = "{} {} {}".format(
|
||||
"Did not find updated bokchoy db files at S3.",
|
||||
"Loading the bokchoy db files from disk",
|
||||
"and running migrations."
|
||||
)
|
||||
print(msg)
|
||||
reset_test_db(BOKCHOY_DB_FILES, update_cache_files=True, use_existing_db=True)
|
||||
# Check one last time to see if the fingerprint is present in
|
||||
# the s3 bucket. This could occur because the bokchoy job is
|
||||
# sharded and running the same task in parallel
|
||||
if not is_fingerprint_in_bucket(fingerprint, CACHE_BUCKET_NAME):
|
||||
upload_db_cache_to_s3(fingerprint, BOKCHOY_DB_FILES, CACHE_BUCKET_NAME)
|
||||
else:
|
||||
msg = "{} {}. {}".format(
|
||||
"Found a matching fingerprint in bucket ",
|
||||
CACHE_BUCKET_NAME,
|
||||
"Not pushing to s3"
|
||||
)
|
||||
print(msg)
|
||||
|
||||
rewrite_fingerprint = getattr(options, 'rewrite_fingerprint', False)
|
||||
# If the rewrite_fingerprint flag is set, and the fingerpint has changed,
|
||||
# write it to disk.
|
||||
if not fingerprints_match and rewrite_fingerprint:
|
||||
print("Updating fingerprint and writing to disk.")
|
||||
compute_fingerprint_and_write_to_disk(MIGRATION_OUTPUT_FILES, ALL_DB_FILES)
|
||||
@@ -1,194 +0,0 @@
|
||||
"""
|
||||
Tests for the Paver commands for updating test databases and its utility methods
|
||||
"""
|
||||
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import tarfile
|
||||
from tempfile import mkdtemp
|
||||
from unittest import TestCase
|
||||
from unittest.mock import call, patch, Mock
|
||||
|
||||
import boto
|
||||
|
||||
from pavelib import database
|
||||
from pavelib.utils import db_utils
|
||||
from pavelib.utils.db_utils import extract_files_from_zip
|
||||
from pavelib.utils.envs import Env
|
||||
|
||||
from .utils import PaverTestCase
|
||||
|
||||
|
||||
class TestPaverDbUtils(TestCase):
|
||||
""" Tests for paver bokchoy database utils """
|
||||
@patch('pavelib.utils.db_utils.verify_files_exist')
|
||||
def test_extract_files_from_zip(self, _mock_verify):
|
||||
test_dir = mkdtemp()
|
||||
output_dir = mkdtemp()
|
||||
self.addCleanup(shutil.rmtree, test_dir)
|
||||
self.addCleanup(shutil.rmtree, output_dir)
|
||||
|
||||
tmp_file_name = os.path.join(test_dir, 'test.txt')
|
||||
with open(tmp_file_name, 'w') as tmp_file:
|
||||
tmp_file.write('Test file content')
|
||||
|
||||
tmp_tarfile = os.path.join(test_dir, 'test.tar.gz')
|
||||
|
||||
with tarfile.open(name=tmp_tarfile, mode='w:gz') as tar_file:
|
||||
tar_file.add(tmp_file_name, arcname='test.txt')
|
||||
|
||||
extract_files_from_zip(['test.txt'], tmp_tarfile, output_dir)
|
||||
|
||||
extracted_file = os.path.join(output_dir, 'test.txt')
|
||||
assert os.path.isfile(extracted_file)
|
||||
|
||||
with open(extracted_file) as test_file:
|
||||
data = test_file.read()
|
||||
assert data == 'Test file content'
|
||||
|
||||
|
||||
def _write_temporary_db_cache_files(path, files):
|
||||
"""
|
||||
create some temporary files to act as the local db cache files so that
|
||||
we can compute a fingerprint
|
||||
"""
|
||||
for index, filename in enumerate(files):
|
||||
filepath = os.path.join(path, filename)
|
||||
with open(filepath, 'w') as cache_file:
|
||||
cache_file.write(str(index))
|
||||
|
||||
|
||||
class TestPaverDatabaseTasks(PaverTestCase):
|
||||
"""
|
||||
Tests for the high level database tasks
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
# This value is the actual sha1 fingerprint calculated for the dummy
|
||||
# files used in these tests
|
||||
self.expected_fingerprint = 'ccaa8d8dcc7d030cd6a6768db81f90d0ef976c3d'
|
||||
self.fingerprint_filename = f'{self.expected_fingerprint}.tar.gz'
|
||||
self.bucket = Mock(name='test_bucket')
|
||||
|
||||
@patch.object(db_utils, 'CACHE_FOLDER', mkdtemp())
|
||||
@patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint'))
|
||||
@patch.object(db_utils, 'sh')
|
||||
def test_load_data_from_local_cache(self, _mock_sh):
|
||||
"""
|
||||
Assuming that the computed db cache file fingerprint is the same as
|
||||
the stored fingerprint, verify that we make a call to load data into
|
||||
the database without running migrations
|
||||
"""
|
||||
self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER)
|
||||
self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH)
|
||||
_write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES)
|
||||
# write the local fingerprint file with the same value than the
|
||||
# computed fingerprint
|
||||
with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file:
|
||||
fingerprint_file.write(self.expected_fingerprint)
|
||||
|
||||
with patch.object(db_utils, 'get_file_from_s3', wraps=db_utils.get_file_from_s3) as _mock_get_file:
|
||||
database.update_local_bokchoy_db_from_s3() # pylint: disable=no-value-for-parameter
|
||||
# Make sure that the local cache files are used - NOT downloaded from s3
|
||||
assert not _mock_get_file.called
|
||||
calls = [
|
||||
call(f'{Env.REPO_ROOT}/scripts/reset-test-db.sh --calculate_migrations'),
|
||||
call(f'{Env.REPO_ROOT}/scripts/reset-test-db.sh --use-existing-db')
|
||||
]
|
||||
_mock_sh.assert_has_calls(calls)
|
||||
|
||||
@patch.object(database, 'CACHE_BUCKET_NAME', 'test_bucket')
|
||||
@patch.object(db_utils, 'CACHE_FOLDER', mkdtemp())
|
||||
@patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint'))
|
||||
@patch.object(db_utils, 'sh')
|
||||
def test_load_data_from_s3_fingerprint(self, _mock_sh):
|
||||
"""
|
||||
Assuming that the computed db cache file fingerprint is different
|
||||
than the stored fingerprint AND there is a matching fingerprint file
|
||||
in s3, verify that we make a call to load data into the database
|
||||
without running migrations
|
||||
"""
|
||||
self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER)
|
||||
self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH)
|
||||
_write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES)
|
||||
|
||||
# zip the temporary files and push them to s3 bucket
|
||||
zipfile_path = os.path.join(db_utils.CACHE_FOLDER, self.fingerprint_filename)
|
||||
with tarfile.open(name=zipfile_path, mode='w:gz') as tar_file:
|
||||
for name in database.ALL_DB_FILES:
|
||||
tar_file.add(os.path.join(db_utils.CACHE_FOLDER, name), arcname=name)
|
||||
key = boto.s3.key.Key(bucket=self.bucket, name=self.fingerprint_filename)
|
||||
key.set_contents_from_filename(zipfile_path, replace=False)
|
||||
|
||||
# write the local fingerprint file with a different value than
|
||||
# the computed fingerprint
|
||||
local_fingerprint = '123456789'
|
||||
with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file:
|
||||
fingerprint_file.write(local_fingerprint)
|
||||
|
||||
with patch('boto.connect_s3', Mock(return_value=Mock())):
|
||||
with patch.object(db_utils, 'get_file_from_s3') as _mock_get_file:
|
||||
database.update_local_bokchoy_db_from_s3() # pylint: disable=no-value-for-parameter
|
||||
# Make sure that the fingerprint file is downloaded from s3
|
||||
_mock_get_file.assert_called_once_with(
|
||||
'test_bucket', self.fingerprint_filename, db_utils.CACHE_FOLDER
|
||||
)
|
||||
|
||||
calls = [
|
||||
call(f'{Env.REPO_ROOT}/scripts/reset-test-db.sh --calculate_migrations'),
|
||||
call(f'{Env.REPO_ROOT}/scripts/reset-test-db.sh --use-existing-db')
|
||||
]
|
||||
_mock_sh.assert_has_calls(calls)
|
||||
|
||||
@patch.object(database, 'CACHE_BUCKET_NAME', 'test_bucket')
|
||||
@patch.object(db_utils, 'CACHE_FOLDER', mkdtemp())
|
||||
@patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint'))
|
||||
@patch.object(db_utils, 'sh')
|
||||
def test_load_data_and_run_migrations(self, _mock_sh):
|
||||
"""
|
||||
Assuming that the computed db cache file fingerprint is different
|
||||
than the stored fingerprint AND there is NO matching fingerprint file
|
||||
in s3, verify that we make a call to load data into the database, run
|
||||
migrations and update the local db cache files
|
||||
"""
|
||||
self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER)
|
||||
self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH)
|
||||
_write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES)
|
||||
|
||||
# write the local fingerprint file with a different value than
|
||||
# the computed fingerprint
|
||||
local_fingerprint = '123456789'
|
||||
with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file:
|
||||
fingerprint_file.write(local_fingerprint)
|
||||
|
||||
database.update_local_bokchoy_db_from_s3() # pylint: disable=no-value-for-parameter
|
||||
calls = [
|
||||
call(f'{Env.REPO_ROOT}/scripts/reset-test-db.sh --calculate_migrations'),
|
||||
call(f'{Env.REPO_ROOT}/scripts/reset-test-db.sh --rebuild_cache --use-existing-db')
|
||||
]
|
||||
_mock_sh.assert_has_calls(calls)
|
||||
|
||||
@patch.object(database, 'CACHE_BUCKET_NAME', 'test_bucket')
|
||||
@patch.object(db_utils, 'CACHE_FOLDER', mkdtemp())
|
||||
@patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint'))
|
||||
@patch.object(db_utils, 'sh')
|
||||
def test_updated_db_cache_pushed_to_s3(self, _mock_sh):
|
||||
"""
|
||||
Assuming that the computed db cache file fingerprint is different
|
||||
than the stored fingerprint AND there is NO matching fingerprint file
|
||||
in s3, verify that an updated fingeprint file is pushed to s3
|
||||
"""
|
||||
self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER)
|
||||
self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH)
|
||||
_write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES)
|
||||
|
||||
# write the local fingerprint file with a different value than
|
||||
# the computed fingerprint
|
||||
local_fingerprint = '123456789'
|
||||
with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file:
|
||||
fingerprint_file.write(local_fingerprint)
|
||||
|
||||
database.update_local_bokchoy_db_from_s3() # pylint: disable=no-value-for-parameter
|
||||
assert self.bucket.get_key(self.fingerprint_filename)
|
||||
@@ -9,7 +9,6 @@ from time import sleep
|
||||
from paver.easy import call_task, cmdopts, dry, might_call, needs, sh, task
|
||||
|
||||
from common.test.acceptance.fixtures.course import CourseFixture, FixtureError
|
||||
from pavelib.database import update_local_bokchoy_db_from_s3
|
||||
from pavelib.utils.envs import Env
|
||||
from pavelib.utils.test import utils as test_utils
|
||||
from pavelib.utils.test.bokchoy_options import (
|
||||
@@ -102,18 +101,7 @@ def update_fixtures():
|
||||
|
||||
|
||||
@task
|
||||
@timed
|
||||
def reset_test_database():
|
||||
"""
|
||||
Reset the database used by the bokchoy tests.
|
||||
|
||||
Use the database cache automation defined in pavelib/database.py
|
||||
"""
|
||||
update_local_bokchoy_db_from_s3() # pylint: disable=no-value-for-parameter
|
||||
|
||||
|
||||
@task
|
||||
@needs(['reset_test_database', 'clear_mongo', 'load_bok_choy_data', 'load_courses', 'update_fixtures'])
|
||||
@needs(['clear_mongo', 'load_bok_choy_data', 'load_courses', 'update_fixtures'])
|
||||
@might_call('start_servers')
|
||||
@cmdopts([BOKCHOY_FASTTEST], share_with=['start_servers'])
|
||||
@timed
|
||||
|
||||
Reference in New Issue
Block a user