From e71cac0a0116707e160d1010ccddd665dae15c0a Mon Sep 17 00:00:00 2001 From: Aarif Date: Wed, 22 Dec 2021 13:42:47 +0500 Subject: [PATCH] fix: remove bokchoy db cache uploader paver tasks (#28841) --- pavelib/__init__.py | 2 +- pavelib/database.py | 136 --------------- pavelib/paver_tests/test_database.py | 194 --------------------- pavelib/utils/test/suites/bokchoy_suite.py | 14 +- 4 files changed, 2 insertions(+), 344 deletions(-) delete mode 100644 pavelib/database.py delete mode 100644 pavelib/paver_tests/test_database.py diff --git a/pavelib/__init__.py b/pavelib/__init__.py index 747031759d..b0f80a6a87 100644 --- a/pavelib/__init__.py +++ b/pavelib/__init__.py @@ -3,4 +3,4 @@ paver commands """ -from . import assets, bok_choy, database, docs, i18n, js_test, prereqs, quality, servers, tests +from . import assets, bok_choy, docs, i18n, js_test, prereqs, quality, servers, tests diff --git a/pavelib/database.py b/pavelib/database.py deleted file mode 100644 index 1249329085..0000000000 --- a/pavelib/database.py +++ /dev/null @@ -1,136 +0,0 @@ -""" -Tasks for controlling the databases used in tests -""" - - -from paver.easy import cmdopts, needs, task - -from pavelib.utils.db_utils import ( - compute_fingerprint_and_write_to_disk, - does_fingerprint_on_disk_match, - fingerprint_bokchoy_db_files, - is_fingerprint_in_bucket, - refresh_bokchoy_db_cache_from_s3, - remove_files_from_folder, - reset_test_db, - upload_db_cache_to_s3 -) -from pavelib.utils.timer import timed - -# Bokchoy db schema and data fixtures -BOKCHOY_DB_FILES = [ - 'bok_choy_data_default.json', - 'bok_choy_data_student_module_history.json', - 'bok_choy_migrations_data_default.sql', - 'bok_choy_migrations_data_student_module_history.sql', - 'bok_choy_schema_default.sql', - 'bok_choy_schema_student_module_history.sql' -] - -# Output files from scripts/reset-test-db.sh --calculate_migrations -MIGRATION_OUTPUT_FILES = [ - 'bok_choy_default_migrations.yaml', - 'bok_choy_student_module_history_migrations.yaml' -] - -ALL_DB_FILES = BOKCHOY_DB_FILES + MIGRATION_OUTPUT_FILES -CACHE_BUCKET_NAME = 'edx-tools-database-caches' -CACHE_FOLDER = 'common/test/db_cache' - - -@needs('pavelib.prereqs.install_prereqs') -@task -@timed -def update_bokchoy_db_cache(): - """ - Update and cache the MYSQL database for bokchoy testing: - * Remove any previously cached database files - * Apply migrations on a fresh db - * Write the collective sha1 checksum for all of these files to disk - - WARNING: This will take several minutes. - """ - print('Removing cached db files for bokchoy tests') - remove_files_from_folder(BOKCHOY_DB_FILES, CACHE_FOLDER) - reset_test_db(BOKCHOY_DB_FILES, update_cache_files=True) - compute_fingerprint_and_write_to_disk(MIGRATION_OUTPUT_FILES, ALL_DB_FILES) - - -@needs('pavelib.prereqs.install_prereqs') -@task -@timed -@cmdopts([ - ("rewrite_fingerprint", None, "Optional flag that will write the new sha1 fingerprint to disk") -]) -def update_local_bokchoy_db_from_s3(options): - """ - Prepare the local MYSQL test database for running bokchoy tests. Since - most pull requests do not introduce migrations, this task provides - an optimization for caching the state of the db when migrations are - added into a bucket in s3. Subsequent commits can avoid rerunning - migrations by using the cache files from s3, until the local cache files - are updated by running the `update_bokchoy_db_cache` Paver task, and - committing the updated cache files to github. - - Steps: - 1. Determine which migrations, if any, need to be applied to your current - db cache files to make them up to date - 2. Compute the sha1 fingerprint of the local db cache files and the output - of the migration - 3a. If the fingerprint computed in step 2 is equal to the local - fingerprint file, load the cache files into the MYSQL test database - 3b. If the fingerprints are not equal, but there is bucket matching the - fingerprint computed in step 2, download and extract the contents of - bucket (db cache files) and load them into the MYSQL test database - 3c. If the fingerprints are not equal AND there is no bucket matching the - fingerprint computed in step 2, load the local db cache files into - the MYSQL test database and apply any needed migrations. Create a - bucket in s3 named the fingerprint computed in step 2 and push the - newly updated db cache files to the bucket. - - NOTE: the computed fingerprints referenced in this and related functions - represent the state of the db cache files and migration output PRIOR - to running migrations. The corresponding s3 bucket named for a given - fingerprint contains the db cache files AFTER applying migrations - """ - fingerprint = fingerprint_bokchoy_db_files(MIGRATION_OUTPUT_FILES, ALL_DB_FILES) - fingerprints_match = does_fingerprint_on_disk_match(fingerprint) - - # Calculating the fingerprint already reset the DB, so we don't need to - # do it again (hence use_existing_db=True below) - if fingerprints_match: - print("DB cache files match the current migrations.") - reset_test_db(BOKCHOY_DB_FILES, update_cache_files=False, use_existing_db=True) - - elif is_fingerprint_in_bucket(fingerprint, CACHE_BUCKET_NAME): - print("Found updated bokchoy db files at S3.") - refresh_bokchoy_db_cache_from_s3(fingerprint, CACHE_BUCKET_NAME, BOKCHOY_DB_FILES) - reset_test_db(BOKCHOY_DB_FILES, update_cache_files=False, use_existing_db=True) - - else: - msg = "{} {} {}".format( - "Did not find updated bokchoy db files at S3.", - "Loading the bokchoy db files from disk", - "and running migrations." - ) - print(msg) - reset_test_db(BOKCHOY_DB_FILES, update_cache_files=True, use_existing_db=True) - # Check one last time to see if the fingerprint is present in - # the s3 bucket. This could occur because the bokchoy job is - # sharded and running the same task in parallel - if not is_fingerprint_in_bucket(fingerprint, CACHE_BUCKET_NAME): - upload_db_cache_to_s3(fingerprint, BOKCHOY_DB_FILES, CACHE_BUCKET_NAME) - else: - msg = "{} {}. {}".format( - "Found a matching fingerprint in bucket ", - CACHE_BUCKET_NAME, - "Not pushing to s3" - ) - print(msg) - - rewrite_fingerprint = getattr(options, 'rewrite_fingerprint', False) - # If the rewrite_fingerprint flag is set, and the fingerpint has changed, - # write it to disk. - if not fingerprints_match and rewrite_fingerprint: - print("Updating fingerprint and writing to disk.") - compute_fingerprint_and_write_to_disk(MIGRATION_OUTPUT_FILES, ALL_DB_FILES) diff --git a/pavelib/paver_tests/test_database.py b/pavelib/paver_tests/test_database.py deleted file mode 100644 index 6dd28527d7..0000000000 --- a/pavelib/paver_tests/test_database.py +++ /dev/null @@ -1,194 +0,0 @@ -""" -Tests for the Paver commands for updating test databases and its utility methods -""" - - -import os -import shutil -import tarfile -from tempfile import mkdtemp -from unittest import TestCase -from unittest.mock import call, patch, Mock - -import boto - -from pavelib import database -from pavelib.utils import db_utils -from pavelib.utils.db_utils import extract_files_from_zip -from pavelib.utils.envs import Env - -from .utils import PaverTestCase - - -class TestPaverDbUtils(TestCase): - """ Tests for paver bokchoy database utils """ - @patch('pavelib.utils.db_utils.verify_files_exist') - def test_extract_files_from_zip(self, _mock_verify): - test_dir = mkdtemp() - output_dir = mkdtemp() - self.addCleanup(shutil.rmtree, test_dir) - self.addCleanup(shutil.rmtree, output_dir) - - tmp_file_name = os.path.join(test_dir, 'test.txt') - with open(tmp_file_name, 'w') as tmp_file: - tmp_file.write('Test file content') - - tmp_tarfile = os.path.join(test_dir, 'test.tar.gz') - - with tarfile.open(name=tmp_tarfile, mode='w:gz') as tar_file: - tar_file.add(tmp_file_name, arcname='test.txt') - - extract_files_from_zip(['test.txt'], tmp_tarfile, output_dir) - - extracted_file = os.path.join(output_dir, 'test.txt') - assert os.path.isfile(extracted_file) - - with open(extracted_file) as test_file: - data = test_file.read() - assert data == 'Test file content' - - -def _write_temporary_db_cache_files(path, files): - """ - create some temporary files to act as the local db cache files so that - we can compute a fingerprint - """ - for index, filename in enumerate(files): - filepath = os.path.join(path, filename) - with open(filepath, 'w') as cache_file: - cache_file.write(str(index)) - - -class TestPaverDatabaseTasks(PaverTestCase): - """ - Tests for the high level database tasks - """ - - def setUp(self): - super().setUp() - # This value is the actual sha1 fingerprint calculated for the dummy - # files used in these tests - self.expected_fingerprint = 'ccaa8d8dcc7d030cd6a6768db81f90d0ef976c3d' - self.fingerprint_filename = f'{self.expected_fingerprint}.tar.gz' - self.bucket = Mock(name='test_bucket') - - @patch.object(db_utils, 'CACHE_FOLDER', mkdtemp()) - @patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint')) - @patch.object(db_utils, 'sh') - def test_load_data_from_local_cache(self, _mock_sh): - """ - Assuming that the computed db cache file fingerprint is the same as - the stored fingerprint, verify that we make a call to load data into - the database without running migrations - """ - self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER) - self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH) - _write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES) - # write the local fingerprint file with the same value than the - # computed fingerprint - with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file: - fingerprint_file.write(self.expected_fingerprint) - - with patch.object(db_utils, 'get_file_from_s3', wraps=db_utils.get_file_from_s3) as _mock_get_file: - database.update_local_bokchoy_db_from_s3() # pylint: disable=no-value-for-parameter - # Make sure that the local cache files are used - NOT downloaded from s3 - assert not _mock_get_file.called - calls = [ - call(f'{Env.REPO_ROOT}/scripts/reset-test-db.sh --calculate_migrations'), - call(f'{Env.REPO_ROOT}/scripts/reset-test-db.sh --use-existing-db') - ] - _mock_sh.assert_has_calls(calls) - - @patch.object(database, 'CACHE_BUCKET_NAME', 'test_bucket') - @patch.object(db_utils, 'CACHE_FOLDER', mkdtemp()) - @patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint')) - @patch.object(db_utils, 'sh') - def test_load_data_from_s3_fingerprint(self, _mock_sh): - """ - Assuming that the computed db cache file fingerprint is different - than the stored fingerprint AND there is a matching fingerprint file - in s3, verify that we make a call to load data into the database - without running migrations - """ - self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER) - self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH) - _write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES) - - # zip the temporary files and push them to s3 bucket - zipfile_path = os.path.join(db_utils.CACHE_FOLDER, self.fingerprint_filename) - with tarfile.open(name=zipfile_path, mode='w:gz') as tar_file: - for name in database.ALL_DB_FILES: - tar_file.add(os.path.join(db_utils.CACHE_FOLDER, name), arcname=name) - key = boto.s3.key.Key(bucket=self.bucket, name=self.fingerprint_filename) - key.set_contents_from_filename(zipfile_path, replace=False) - - # write the local fingerprint file with a different value than - # the computed fingerprint - local_fingerprint = '123456789' - with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file: - fingerprint_file.write(local_fingerprint) - - with patch('boto.connect_s3', Mock(return_value=Mock())): - with patch.object(db_utils, 'get_file_from_s3') as _mock_get_file: - database.update_local_bokchoy_db_from_s3() # pylint: disable=no-value-for-parameter - # Make sure that the fingerprint file is downloaded from s3 - _mock_get_file.assert_called_once_with( - 'test_bucket', self.fingerprint_filename, db_utils.CACHE_FOLDER - ) - - calls = [ - call(f'{Env.REPO_ROOT}/scripts/reset-test-db.sh --calculate_migrations'), - call(f'{Env.REPO_ROOT}/scripts/reset-test-db.sh --use-existing-db') - ] - _mock_sh.assert_has_calls(calls) - - @patch.object(database, 'CACHE_BUCKET_NAME', 'test_bucket') - @patch.object(db_utils, 'CACHE_FOLDER', mkdtemp()) - @patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint')) - @patch.object(db_utils, 'sh') - def test_load_data_and_run_migrations(self, _mock_sh): - """ - Assuming that the computed db cache file fingerprint is different - than the stored fingerprint AND there is NO matching fingerprint file - in s3, verify that we make a call to load data into the database, run - migrations and update the local db cache files - """ - self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER) - self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH) - _write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES) - - # write the local fingerprint file with a different value than - # the computed fingerprint - local_fingerprint = '123456789' - with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file: - fingerprint_file.write(local_fingerprint) - - database.update_local_bokchoy_db_from_s3() # pylint: disable=no-value-for-parameter - calls = [ - call(f'{Env.REPO_ROOT}/scripts/reset-test-db.sh --calculate_migrations'), - call(f'{Env.REPO_ROOT}/scripts/reset-test-db.sh --rebuild_cache --use-existing-db') - ] - _mock_sh.assert_has_calls(calls) - - @patch.object(database, 'CACHE_BUCKET_NAME', 'test_bucket') - @patch.object(db_utils, 'CACHE_FOLDER', mkdtemp()) - @patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint')) - @patch.object(db_utils, 'sh') - def test_updated_db_cache_pushed_to_s3(self, _mock_sh): - """ - Assuming that the computed db cache file fingerprint is different - than the stored fingerprint AND there is NO matching fingerprint file - in s3, verify that an updated fingeprint file is pushed to s3 - """ - self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER) - self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH) - _write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES) - - # write the local fingerprint file with a different value than - # the computed fingerprint - local_fingerprint = '123456789' - with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file: - fingerprint_file.write(local_fingerprint) - - database.update_local_bokchoy_db_from_s3() # pylint: disable=no-value-for-parameter - assert self.bucket.get_key(self.fingerprint_filename) diff --git a/pavelib/utils/test/suites/bokchoy_suite.py b/pavelib/utils/test/suites/bokchoy_suite.py index 1155c8c54f..19fbacd960 100644 --- a/pavelib/utils/test/suites/bokchoy_suite.py +++ b/pavelib/utils/test/suites/bokchoy_suite.py @@ -9,7 +9,6 @@ from time import sleep from paver.easy import call_task, cmdopts, dry, might_call, needs, sh, task from common.test.acceptance.fixtures.course import CourseFixture, FixtureError -from pavelib.database import update_local_bokchoy_db_from_s3 from pavelib.utils.envs import Env from pavelib.utils.test import utils as test_utils from pavelib.utils.test.bokchoy_options import ( @@ -102,18 +101,7 @@ def update_fixtures(): @task -@timed -def reset_test_database(): - """ - Reset the database used by the bokchoy tests. - - Use the database cache automation defined in pavelib/database.py - """ - update_local_bokchoy_db_from_s3() # pylint: disable=no-value-for-parameter - - -@task -@needs(['reset_test_database', 'clear_mongo', 'load_bok_choy_data', 'load_courses', 'update_fixtures']) +@needs(['clear_mongo', 'load_bok_choy_data', 'load_courses', 'update_fixtures']) @might_call('start_servers') @cmdopts([BOKCHOY_FASTTEST], share_with=['start_servers']) @timed