fix: remove bokchoy db cache uploader paver tasks (#28841)

2021-12-22 13:42:47 +05:00
parent 51b67ad1df
commit e71cac0a01
4 changed files with 2 additions and 344 deletions
--- a/pavelib/init.py
+++ b/pavelib/init.py
@@ -3,4 +3,4 @@ paver commands
 """


-from . import assets, bok_choy, database, docs, i18n, js_test, prereqs, quality, servers, tests
+from . import assets, bok_choy, docs, i18n, js_test, prereqs, quality, servers, tests
--- a/pavelib/database.py
+++ b/pavelib/database.py
@@ -1,136 +0,0 @@
-"""
-Tasks for controlling the databases used in tests
-"""
-
-
-from paver.easy import cmdopts, needs, task
-
-from pavelib.utils.db_utils import (
-    compute_fingerprint_and_write_to_disk,
-    does_fingerprint_on_disk_match,
-    fingerprint_bokchoy_db_files,
-    is_fingerprint_in_bucket,
-    refresh_bokchoy_db_cache_from_s3,
-    remove_files_from_folder,
-    reset_test_db,
-    upload_db_cache_to_s3
-)
-from pavelib.utils.timer import timed
-
-# Bokchoy db schema and data fixtures
-BOKCHOY_DB_FILES = [
-    'bok_choy_data_default.json',
-    'bok_choy_data_student_module_history.json',
-    'bok_choy_migrations_data_default.sql',
-    'bok_choy_migrations_data_student_module_history.sql',
-    'bok_choy_schema_default.sql',
-    'bok_choy_schema_student_module_history.sql'
-]
-
-# Output files from scripts/reset-test-db.sh --calculate_migrations
-MIGRATION_OUTPUT_FILES = [
-    'bok_choy_default_migrations.yaml',
-    'bok_choy_student_module_history_migrations.yaml'
-]
-
-ALL_DB_FILES = BOKCHOY_DB_FILES + MIGRATION_OUTPUT_FILES
-CACHE_BUCKET_NAME = 'edx-tools-database-caches'
-CACHE_FOLDER = 'common/test/db_cache'
-
-
-@needs('pavelib.prereqs.install_prereqs')
-@task
-@timed
-def update_bokchoy_db_cache():
-    """
-    Update and cache the MYSQL database for bokchoy testing:
-    * Remove any previously cached database files
-    * Apply migrations on a fresh db
-    * Write the collective sha1 checksum for all of these files to disk
-
-    WARNING: This will take several minutes.
-    """
-    print('Removing cached db files for bokchoy tests')
-    remove_files_from_folder(BOKCHOY_DB_FILES, CACHE_FOLDER)
-    reset_test_db(BOKCHOY_DB_FILES, update_cache_files=True)
-    compute_fingerprint_and_write_to_disk(MIGRATION_OUTPUT_FILES, ALL_DB_FILES)
-
-
-@needs('pavelib.prereqs.install_prereqs')
-@task
-@timed
-@cmdopts([
-    ("rewrite_fingerprint", None, "Optional flag that will write the new sha1 fingerprint to disk")
-])
-def update_local_bokchoy_db_from_s3(options):
-    """
-    Prepare the local MYSQL test database for running bokchoy tests. Since
-    most pull requests do not introduce migrations, this task provides
-    an optimization for caching the state of the db when migrations are
-    added into a bucket in s3. Subsequent commits can avoid rerunning
-    migrations by using the cache files from s3, until the local cache files
-    are updated by running the `update_bokchoy_db_cache` Paver task, and
-    committing the updated cache files to github.
-
-    Steps:
-    1. Determine which migrations, if any, need to be applied to your current
-       db cache files to make them up to date
-    2. Compute the sha1 fingerprint of the local db cache files and the output
-       of the migration
-    3a. If the fingerprint computed in step 2 is equal to the local
-        fingerprint file, load the cache files into the MYSQL test database
-    3b. If the fingerprints are not equal, but there is bucket matching the
-        fingerprint computed in step 2, download and extract the contents of
-        bucket (db cache files) and load them into the MYSQL test database
-    3c. If the fingerprints are not equal AND there is no bucket matching the
-        fingerprint computed in step 2, load the local db cache files into
-        the MYSQL test database and apply any needed migrations. Create a
-        bucket in s3 named the fingerprint computed in step 2 and push the
-        newly updated db cache files to the bucket.
-
-    NOTE: the computed fingerprints referenced in this and related functions
-    represent the state of the db cache files and migration output PRIOR
-    to running migrations. The corresponding s3 bucket named for a given
-    fingerprint contains the db cache files AFTER applying migrations
-    """
-    fingerprint = fingerprint_bokchoy_db_files(MIGRATION_OUTPUT_FILES, ALL_DB_FILES)
-    fingerprints_match = does_fingerprint_on_disk_match(fingerprint)
-
-    # Calculating the fingerprint already reset the DB, so we don't need to
-    # do it again (hence use_existing_db=True below)
-    if fingerprints_match:
-        print("DB cache files match the current migrations.")
-        reset_test_db(BOKCHOY_DB_FILES, update_cache_files=False, use_existing_db=True)
-
-    elif is_fingerprint_in_bucket(fingerprint, CACHE_BUCKET_NAME):
-        print("Found updated bokchoy db files at S3.")
-        refresh_bokchoy_db_cache_from_s3(fingerprint, CACHE_BUCKET_NAME, BOKCHOY_DB_FILES)
-        reset_test_db(BOKCHOY_DB_FILES, update_cache_files=False, use_existing_db=True)
-
-    else:
-        msg = "{} {} {}".format(
-            "Did not find updated bokchoy db files at S3.",
-            "Loading the bokchoy db files from disk",
-            "and running migrations."
-        )
-        print(msg)
-        reset_test_db(BOKCHOY_DB_FILES, update_cache_files=True, use_existing_db=True)
-        # Check one last time to see if the fingerprint is present in
-        # the s3 bucket. This could occur because the bokchoy job is
-        # sharded and running the same task in parallel
-        if not is_fingerprint_in_bucket(fingerprint, CACHE_BUCKET_NAME):
-            upload_db_cache_to_s3(fingerprint, BOKCHOY_DB_FILES, CACHE_BUCKET_NAME)
-        else:
-            msg = "{} {}. {}".format(
-                "Found a matching fingerprint in bucket ",
-                CACHE_BUCKET_NAME,
-                "Not pushing to s3"
-            )
-            print(msg)
-
-    rewrite_fingerprint = getattr(options, 'rewrite_fingerprint', False)
-    # If the rewrite_fingerprint flag is set, and the fingerpint has changed,
-    # write it to disk.
-    if not fingerprints_match and rewrite_fingerprint:
-        print("Updating fingerprint and writing to disk.")
-        compute_fingerprint_and_write_to_disk(MIGRATION_OUTPUT_FILES, ALL_DB_FILES)
--- a/pavelib/paver_tests/test_database.py
+++ b/pavelib/paver_tests/test_database.py
@@ -1,194 +0,0 @@
-"""
-Tests for the Paver commands for updating test databases and its utility methods
-"""
-
-
-import os
-import shutil
-import tarfile
-from tempfile import mkdtemp
-from unittest import TestCase
-from unittest.mock import call, patch, Mock
-
-import boto
-
-from pavelib import database
-from pavelib.utils import db_utils
-from pavelib.utils.db_utils import extract_files_from_zip
-from pavelib.utils.envs import Env
-
-from .utils import PaverTestCase
-
-
-class TestPaverDbUtils(TestCase):
-    """ Tests for paver bokchoy database utils """
-    @patch('pavelib.utils.db_utils.verify_files_exist')
-    def test_extract_files_from_zip(self, _mock_verify):
-        test_dir = mkdtemp()
-        output_dir = mkdtemp()
-        self.addCleanup(shutil.rmtree, test_dir)
-        self.addCleanup(shutil.rmtree, output_dir)
-
-        tmp_file_name = os.path.join(test_dir, 'test.txt')
-        with open(tmp_file_name, 'w') as tmp_file:
-            tmp_file.write('Test file content')
-
-        tmp_tarfile = os.path.join(test_dir, 'test.tar.gz')
-
-        with tarfile.open(name=tmp_tarfile, mode='w:gz') as tar_file:
-            tar_file.add(tmp_file_name, arcname='test.txt')
-
-        extract_files_from_zip(['test.txt'], tmp_tarfile, output_dir)
-
-        extracted_file = os.path.join(output_dir, 'test.txt')
-        assert os.path.isfile(extracted_file)
-
-        with open(extracted_file) as test_file:
-            data = test_file.read()
-        assert data == 'Test file content'
-
-
-def _write_temporary_db_cache_files(path, files):
-    """
-    create some temporary files to act as the local db cache files so that
-    we can compute a fingerprint
-    """
-    for index, filename in enumerate(files):
-        filepath = os.path.join(path, filename)
-        with open(filepath, 'w') as cache_file:
-            cache_file.write(str(index))
-
-
-class TestPaverDatabaseTasks(PaverTestCase):
-    """
-    Tests for the high level database tasks
-    """
-
-    def setUp(self):
-        super().setUp()
-        # This value is the actual sha1 fingerprint calculated for the dummy
-        # files used in these tests
-        self.expected_fingerprint = 'ccaa8d8dcc7d030cd6a6768db81f90d0ef976c3d'
-        self.fingerprint_filename = f'{self.expected_fingerprint}.tar.gz'
-        self.bucket = Mock(name='test_bucket')
-
-    @patch.object(db_utils, 'CACHE_FOLDER', mkdtemp())
-    @patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint'))
-    @patch.object(db_utils, 'sh')
-    def test_load_data_from_local_cache(self, _mock_sh):
-        """
-        Assuming that the computed db cache file fingerprint is the same as
-        the stored fingerprint, verify that we make a call to load data into
-        the database without running migrations
-        """
-        self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER)
-        self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH)
-        _write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES)
-        # write the local fingerprint file with the same value than the
-        # computed fingerprint
-        with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file:
-            fingerprint_file.write(self.expected_fingerprint)
-
-        with patch.object(db_utils, 'get_file_from_s3', wraps=db_utils.get_file_from_s3) as _mock_get_file:
-            database.update_local_bokchoy_db_from_s3()  # pylint: disable=no-value-for-parameter
-            # Make sure that the local cache files are used - NOT downloaded from s3
-            assert not _mock_get_file.called
-        calls = [
-            call(f'{Env.REPO_ROOT}/scripts/reset-test-db.sh --calculate_migrations'),
-            call(f'{Env.REPO_ROOT}/scripts/reset-test-db.sh --use-existing-db')
-        ]
-        _mock_sh.assert_has_calls(calls)
-
-    @patch.object(database, 'CACHE_BUCKET_NAME', 'test_bucket')
-    @patch.object(db_utils, 'CACHE_FOLDER', mkdtemp())
-    @patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint'))
-    @patch.object(db_utils, 'sh')
-    def test_load_data_from_s3_fingerprint(self, _mock_sh):
-        """
-        Assuming that the computed db cache file fingerprint is different
-        than the stored fingerprint AND there is a matching fingerprint file
-        in s3, verify that we make a call to load data into the database
-        without running migrations
-        """
-        self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER)
-        self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH)
-        _write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES)
-
-        # zip the temporary files and push them to s3 bucket
-        zipfile_path = os.path.join(db_utils.CACHE_FOLDER, self.fingerprint_filename)
-        with tarfile.open(name=zipfile_path, mode='w:gz') as tar_file:
-            for name in database.ALL_DB_FILES:
-                tar_file.add(os.path.join(db_utils.CACHE_FOLDER, name), arcname=name)
-        key = boto.s3.key.Key(bucket=self.bucket, name=self.fingerprint_filename)
-        key.set_contents_from_filename(zipfile_path, replace=False)
-
-        # write the local fingerprint file with a different value than
-        # the computed fingerprint
-        local_fingerprint = '123456789'
-        with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file:
-            fingerprint_file.write(local_fingerprint)
-
-        with patch('boto.connect_s3', Mock(return_value=Mock())):
-            with patch.object(db_utils, 'get_file_from_s3') as _mock_get_file:
-                database.update_local_bokchoy_db_from_s3()  # pylint: disable=no-value-for-parameter
-                # Make sure that the fingerprint file is downloaded from s3
-                _mock_get_file.assert_called_once_with(
-                    'test_bucket', self.fingerprint_filename, db_utils.CACHE_FOLDER
-                )
-
-        calls = [
-            call(f'{Env.REPO_ROOT}/scripts/reset-test-db.sh --calculate_migrations'),
-            call(f'{Env.REPO_ROOT}/scripts/reset-test-db.sh --use-existing-db')
-        ]
-        _mock_sh.assert_has_calls(calls)
-
-    @patch.object(database, 'CACHE_BUCKET_NAME', 'test_bucket')
-    @patch.object(db_utils, 'CACHE_FOLDER', mkdtemp())
-    @patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint'))
-    @patch.object(db_utils, 'sh')
-    def test_load_data_and_run_migrations(self, _mock_sh):
-        """
-        Assuming that the computed db cache file fingerprint is different
-        than the stored fingerprint AND there is NO matching fingerprint file
-        in s3, verify that we make a call to load data into the database, run
-        migrations and update the local db cache files
-        """
-        self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER)
-        self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH)
-        _write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES)
-
-        # write the local fingerprint file with a different value than
-        # the computed fingerprint
-        local_fingerprint = '123456789'
-        with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file:
-            fingerprint_file.write(local_fingerprint)
-
-        database.update_local_bokchoy_db_from_s3()  # pylint: disable=no-value-for-parameter
-        calls = [
-            call(f'{Env.REPO_ROOT}/scripts/reset-test-db.sh --calculate_migrations'),
-            call(f'{Env.REPO_ROOT}/scripts/reset-test-db.sh --rebuild_cache --use-existing-db')
-        ]
-        _mock_sh.assert_has_calls(calls)
-
-    @patch.object(database, 'CACHE_BUCKET_NAME', 'test_bucket')
-    @patch.object(db_utils, 'CACHE_FOLDER', mkdtemp())
-    @patch.object(db_utils, 'FINGERPRINT_FILEPATH', os.path.join(mkdtemp(), 'fingerprint'))
-    @patch.object(db_utils, 'sh')
-    def test_updated_db_cache_pushed_to_s3(self, _mock_sh):
-        """
-        Assuming that the computed db cache file fingerprint is different
-        than the stored fingerprint AND there is NO matching fingerprint file
-        in s3, verify that an updated fingeprint file is pushed to s3
-        """
-        self.addCleanup(shutil.rmtree, db_utils.CACHE_FOLDER)
-        self.addCleanup(os.remove, db_utils.FINGERPRINT_FILEPATH)
-        _write_temporary_db_cache_files(db_utils.CACHE_FOLDER, database.ALL_DB_FILES)
-
-        # write the local fingerprint file with a different value than
-        # the computed fingerprint
-        local_fingerprint = '123456789'
-        with open(db_utils.FINGERPRINT_FILEPATH, 'w') as fingerprint_file:
-            fingerprint_file.write(local_fingerprint)
-
-        database.update_local_bokchoy_db_from_s3()  # pylint: disable=no-value-for-parameter
-        assert self.bucket.get_key(self.fingerprint_filename)
--- a/pavelib/utils/test/suites/bokchoy_suite.py
+++ b/pavelib/utils/test/suites/bokchoy_suite.py
@@ -9,7 +9,6 @@ from time import sleep
 from paver.easy import call_task, cmdopts, dry, might_call, needs, sh, task

 from common.test.acceptance.fixtures.course import CourseFixture, FixtureError
-from pavelib.database import update_local_bokchoy_db_from_s3
 from pavelib.utils.envs import Env
 from pavelib.utils.test import utils as test_utils
 from pavelib.utils.test.bokchoy_options import (
@@ -102,18 +101,7 @@ def update_fixtures():


@task
-@timed
-def reset_test_database():
-    """
-    Reset the database used by the bokchoy tests.
-
-    Use the database cache automation defined in pavelib/database.py
-    """
-    update_local_bokchoy_db_from_s3()  # pylint: disable=no-value-for-parameter
-
-
-@task
-@needs(['reset_test_database', 'clear_mongo', 'load_bok_choy_data', 'load_courses', 'update_fixtures'])
+@needs(['clear_mongo', 'load_bok_choy_data', 'load_courses', 'update_fixtures'])
@might_call('start_servers')
@cmdopts([BOKCHOY_FASTTEST], share_with=['start_servers'])
@timed