From fb73888ebacc3b93cdc58fcb86cc5fe1224d9142 Mon Sep 17 00:00:00 2001 From: Nimisha Asthagiri Date: Sun, 26 Feb 2017 21:12:51 -0500 Subject: [PATCH] Storage-backed versioned Block Structures: Models --- .../migrations/0002_blockstructuremodel.py | 35 +++ .../content/block_structure/models.py | 208 ++++++++++++++++++ .../block_structure/tests/test_models.py | 146 ++++++++++++ 3 files changed, 389 insertions(+) create mode 100644 openedx/core/djangoapps/content/block_structure/migrations/0002_blockstructuremodel.py create mode 100644 openedx/core/djangoapps/content/block_structure/models.py create mode 100644 openedx/core/djangoapps/content/block_structure/tests/test_models.py diff --git a/openedx/core/djangoapps/content/block_structure/migrations/0002_blockstructuremodel.py b/openedx/core/djangoapps/content/block_structure/migrations/0002_blockstructuremodel.py new file mode 100644 index 0000000000..04c33ff223 --- /dev/null +++ b/openedx/core/djangoapps/content/block_structure/migrations/0002_blockstructuremodel.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models +import django.utils.timezone +import openedx.core.djangoapps.xmodule_django.models +import model_utils.fields +import openedx.core.djangoapps.content.block_structure.models + + +class Migration(migrations.Migration): + + dependencies = [ + ('block_structure', '0001_config'), + ] + + operations = [ + migrations.CreateModel( + name='BlockStructureModel', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('created', model_utils.fields.AutoCreatedField(default=django.utils.timezone.now, verbose_name='created', editable=False)), + ('modified', model_utils.fields.AutoLastModifiedField(default=django.utils.timezone.now, verbose_name='modified', editable=False)), + ('data_usage_key', openedx.core.djangoapps.xmodule_django.models.UsageKeyField(unique=True, max_length=255, verbose_name='Identifier of the data being collected.')), + ('data_version', models.CharField(max_length=255, null=True, verbose_name='Version of the data at the time of collection.', blank=True)), + ('data_edit_timestamp', models.DateTimeField(null=True, verbose_name='Edit timestamp of the data at the time of collection.', blank=True)), + ('transformers_schema_version', models.CharField(max_length=255, verbose_name='Representation of the schema version of the transformers used during collection.')), + ('block_structure_schema_version', models.CharField(max_length=255, verbose_name='Version of the block structure schema at the time of collection.')), + ('data', models.FileField(max_length=500, upload_to=openedx.core.djangoapps.content.block_structure.models._path_name)), + ], + options={ + 'db_table': 'block_structure', + }, + ), + ] diff --git a/openedx/core/djangoapps/content/block_structure/models.py b/openedx/core/djangoapps/content/block_structure/models.py new file mode 100644 index 0000000000..a0ad5927de --- /dev/null +++ b/openedx/core/djangoapps/content/block_structure/models.py @@ -0,0 +1,208 @@ +""" +Models used by the block structure framework. +""" + +from datetime import datetime +from django.conf import settings +from django.core.files.base import ContentFile +from django.db import models +from logging import getLogger + +from model_utils.models import TimeStampedModel +from openedx.core.djangoapps.xmodule_django.models import UsageKeyField +from openedx.core.lib.block_structure.exceptions import BlockStructureNotFound +from openedx.core.storage import get_storage + +import openedx.core.djangoapps.content.block_structure.config as config + + +log = getLogger(__name__) + + +def _create_path(directory, filename): + """ + Returns the full path for the given directory and filename. + """ + return '{}/{}'.format(directory, filename) + + +def _directory_name(data_usage_key): + """ + Returns the directory name for the given + data_usage_key. + """ + return '{}{}'.format( + settings.BLOCK_STRUCTURES_SETTINGS.get('DIRECTORY_PREFIX', ''), + unicode(data_usage_key), + ) + + +def _path_name(bs_model, filename): # pylint:disable=unused-argument + """ + Returns path name to use for the given + BlockStructureModel instance. + """ + filename = datetime.utcnow().strftime('%Y-%m-%d-%H:%M:%S-%f') + return _create_path( + _directory_name(bs_model.data_usage_key), + filename, + ) + + +def _bs_model_storage(): + """ + Get django Storage object for BlockStructureModel. + """ + return get_storage( + settings.BLOCK_STRUCTURES_SETTINGS.get('STORAGE_CLASS'), + **settings.BLOCK_STRUCTURES_SETTINGS.get('STORAGE_KWARGS', {}) + ) + + +class BlockStructureModel(TimeStampedModel): + """ + Model for storing Block Structure information. + """ + VERSION_FIELDS = [ + u'data_version', + u'data_edit_timestamp', + u'transformers_schema_version', + u'block_structure_schema_version', + ] + UNIQUENESS_FIELDS = [u'data_usage_key'] + VERSION_FIELDS + + class Meta(object): + db_table = 'block_structure' + + data_usage_key = UsageKeyField( + u'Identifier of the data being collected.', + blank=False, + max_length=255, + unique=True, + ) + data_version = models.CharField( + u'Version of the data at the time of collection.', + blank=True, + null=True, + max_length=255, + ) + data_edit_timestamp = models.DateTimeField( + u'Edit timestamp of the data at the time of collection.', + blank=True, + null=True, + ) + transformers_schema_version = models.CharField( + u'Representation of the schema version of the transformers used during collection.', + blank=False, + max_length=255, + ) + block_structure_schema_version = models.CharField( + u'Version of the block structure schema at the time of collection.', + blank=False, + max_length=255, + ) + data = models.FileField( + upload_to=_path_name, + max_length=500, # allocate enough for base path + prefix + usage_key + timestamp in filepath + ) + + def get_serialized_data(self): + """ + Returns the collected data for this instance. + """ + serialized_data = self.data.read() + log.info("BlockStructure: Read data from store; %r, size: %d", self, len(serialized_data)) + return serialized_data + + @classmethod + def get(cls, data_usage_key): + """ + Returns the entry associated with the given data_usage_key. + Raises: + BlockStructureNotFound if an entry for data_usage_key is not found. + """ + try: + return cls.objects.get(data_usage_key=data_usage_key) + except cls.DoesNotExist: + log.info("BlockStructure: Not found in table; %r.", data_usage_key) + raise BlockStructureNotFound(data_usage_key) + + @classmethod + def update_or_create(cls, serialized_data, data_usage_key, **kwargs): + """ + Updates or creates the BlockStructureModel entry + for the given data_usage_key in the kwargs, + uploading serialized_data as the content data. + """ + bs_model, created = cls.objects.update_or_create(defaults=kwargs, data_usage_key=data_usage_key) + bs_model.data.save('', ContentFile(serialized_data)) + log.info( + 'BlockStructure: %s in store; %r, size: %d', + 'Created' if created else 'Updated', + bs_model, + len(serialized_data), + ) + if not created: + cls._prune_files(data_usage_key) + + return bs_model, created + + def __unicode__(self): + """ + Returns a string representation of this model. + """ + return u', '.join( + u'{}: {}'.format(field_name, unicode(getattr(self, field_name))) + for field_name in self.UNIQUENESS_FIELDS + ) + + @classmethod + def _prune_files(cls, data_usage_key, num_to_keep=None): + """ + Deletes previous file versions for data_usage_key. + """ + if not config.is_enabled(config.PRUNE_OLD_VERSIONS): + return + + if num_to_keep is None: + num_to_keep = config.num_versions_to_keep() + + try: + all_files_by_date = sorted(cls._get_all_files(data_usage_key)) + files_to_delete = all_files_by_date[:-num_to_keep] if num_to_keep > 0 else all_files_by_date + cls._delete_files(files_to_delete) + log.info( + 'BlockStructure: Deleted %d out of total %d files in store; data_usage_key: %r, num_to_keep: %d.', + len(files_to_delete), + len(all_files_by_date), + data_usage_key, + num_to_keep, + ) + + except Exception as error: # pylint: disable=broad-except + log.exception( + 'BlockStructure: Exception when deleting old files; data_usage_key: %r, %r', + data_usage_key, + error, + ) + + @classmethod + def _delete_files(cls, files): + """ + Deletes the given files from storage. + """ + storage = _bs_model_storage() + map(storage.delete, files) + + @classmethod + def _get_all_files(cls, data_usage_key): + """ + Returns all filenames that exist for the given key. + """ + directory = _directory_name(data_usage_key) + _, filenames = _bs_model_storage().listdir(directory) + return [ + _create_path(directory, filename) + for filename in filenames + if filename and not filename.startswith('.') + ] diff --git a/openedx/core/djangoapps/content/block_structure/tests/test_models.py b/openedx/core/djangoapps/content/block_structure/tests/test_models.py new file mode 100644 index 0000000000..74d41fc2d2 --- /dev/null +++ b/openedx/core/djangoapps/content/block_structure/tests/test_models.py @@ -0,0 +1,146 @@ +""" +Unit tests for Block Structure models. +""" +# pylint: disable=protected-access +import ddt +from django.test import TestCase +from django.utils.timezone import now +from itertools import product +from mock import patch, Mock +from uuid import uuid4 + +from opaque_keys.edx.locator import CourseLocator, BlockUsageLocator +from openedx.core.lib.block_structure.exceptions import BlockStructureNotFound + +from ..config import PRUNE_OLD_VERSIONS +from ..models import BlockStructureModel +from .helpers import override_config_setting + + +@ddt.ddt +class BlockStructureModelTestCase(TestCase): + """ + Tests for BlockStructureModel. + """ + def setUp(self): + super(BlockStructureModelTestCase, self).setUp() + self.course_key = CourseLocator('org', 'course', unicode(uuid4())) + self.usage_key = BlockUsageLocator(course_key=self.course_key, block_type='course', block_id='course') + + self.params = self._create_bsm_params() + + def tearDown(self): + with override_config_setting(PRUNE_OLD_VERSIONS, active=True): + BlockStructureModel._prune_files(self.usage_key, num_to_keep=0) + super(BlockStructureModelTestCase, self).tearDown() + + def _assert_bsm_fields(self, bsm, expected_serialized_data): + """ + Verifies that the field values and serialized data + on the given bsm are as expected. + """ + for field_name, field_value in self.params.iteritems(): + self.assertEqual(field_value, getattr(bsm, field_name)) + + self.assertEqual(bsm.get_serialized_data(), expected_serialized_data) + self.assertIn(unicode(self.usage_key), bsm.data.name) + + def _assert_file_count_equal(self, expected_count): + """ + Asserts the number of files for self.usage_key + is as expected. + """ + self.assertEqual(len(BlockStructureModel._get_all_files(self.usage_key)), expected_count) + + def _create_bsm_params(self): + """ + Returns the parameters for creating a BlockStructureModel. + """ + return dict( + data_usage_key=self.usage_key, + data_version='DV', + data_edit_timestamp=now(), + transformers_schema_version='TV', + block_structure_schema_version=unicode(1), + ) + + def _verify_update_or_create_call(self, serialized_data, mock_log=None, expect_created=None): + """ + Calls BlockStructureModel.update_or_create + and verifies the response. + """ + bsm, created = BlockStructureModel.update_or_create(serialized_data, **self.params) + if mock_log: + self.assertEqual("Created" if expect_created else "Updated", mock_log.info.call_args[0][1]) + self.assertEqual(len(serialized_data), mock_log.info.call_args[0][3]) + self._assert_bsm_fields(bsm, serialized_data) + if expect_created is not None: + self.assertEqual(created, expect_created) + return bsm + + @patch('openedx.core.djangoapps.content.block_structure.models.log') + def test_update_or_create(self, mock_log): + serialized_data = 'initial data' + + # shouldn't already exist + with self.assertRaises(BlockStructureNotFound): + BlockStructureModel.get(self.usage_key) + self.assertIn("BlockStructure: Not found in table;", mock_log.info.call_args[0][0]) + + # create an entry + bsm = self._verify_update_or_create_call(serialized_data, mock_log, expect_created=True) + + # get entry + found_bsm = BlockStructureModel.get(self.usage_key) + self._assert_bsm_fields(found_bsm, serialized_data) + self.assertIn("BlockStructure: Read data from store;", mock_log.info.call_args[0][0]) + + # update entry + self.params.update(dict(data_version='new version')) + updated_serialized_data = 'updated data' + updated_bsm = self._verify_update_or_create_call(updated_serialized_data, mock_log, expect_created=False) + self.assertNotEqual(bsm.data.name, updated_bsm.data.name) + + # old files not pruned + self._assert_file_count_equal(2) + + @override_config_setting(PRUNE_OLD_VERSIONS, active=True) + @patch('openedx.core.djangoapps.content.block_structure.config.num_versions_to_keep', Mock(return_value=1)) + def test_prune_files(self): + self._verify_update_or_create_call('test data', expect_created=True) + self._verify_update_or_create_call('updated data', expect_created=False) + self._assert_file_count_equal(1) + + @override_config_setting(PRUNE_OLD_VERSIONS, active=True) + @patch('openedx.core.djangoapps.content.block_structure.config.num_versions_to_keep', Mock(return_value=1)) + @patch('openedx.core.djangoapps.content.block_structure.models.BlockStructureModel._delete_files') + @patch('openedx.core.djangoapps.content.block_structure.models.log') + def test_prune_exception(self, mock_log, mock_delete): + mock_delete.side_effect = Exception + self._verify_update_or_create_call('test data', expect_created=True) + self._verify_update_or_create_call('updated data', expect_created=False) + + self.assertIn('BlockStructure: Exception when deleting old files', mock_log.exception.call_args[0][0]) + self._assert_file_count_equal(2) # old files not pruned + + @ddt.data( + *product( + range(1, 3), # prune_keep_count + range(4), # num_prior_edits + ) + ) + @ddt.unpack + def test_prune_keep_count(self, prune_keep_count, num_prior_edits): + with patch( + 'openedx.core.djangoapps.content.block_structure.config.num_versions_to_keep', + return_value=prune_keep_count, + ): + for _ in range(num_prior_edits): + self._verify_update_or_create_call('data') + + if num_prior_edits: + self._assert_file_count_equal(num_prior_edits) + + with override_config_setting(PRUNE_OLD_VERSIONS, active=True): + self._verify_update_or_create_call('data') + self._assert_file_count_equal(min(prune_keep_count, num_prior_edits + 1))