From a4bbed3bc1abf4dac34ffa16cff50a5263c0c7fb Mon Sep 17 00:00:00 2001 From: John Eskew Date: Thu, 15 Jan 2015 17:20:23 -0500 Subject: [PATCH] Add performance test which finds the BSON size of varying amounts of asset metadata in both the old Mongo and Split modulestores. --- .../perf_tests/generate_asset_xml.py | 69 ++++++++------- .../perf_tests/test_asset_import_export.py | 83 ++++++++++++++++++- .../test_cross_modulestore_import_export.py | 43 ++++++++-- 3 files changed, 153 insertions(+), 42 deletions(-) diff --git a/common/lib/xmodule/xmodule/modulestore/perf_tests/generate_asset_xml.py b/common/lib/xmodule/xmodule/modulestore/perf_tests/generate_asset_xml.py index 854c3c7603..101544b7c2 100644 --- a/common/lib/xmodule/xmodule/modulestore/perf_tests/generate_asset_xml.py +++ b/common/lib/xmodule/xmodule/modulestore/perf_tests/generate_asset_xml.py @@ -6,12 +6,15 @@ Generates fake XML for asset metadata. """ import random -#import click from lxml import etree from datetime import datetime, timedelta from xmodule.assetstore import AssetMetadata from opaque_keys.edx.keys import CourseKey +try: + import click +except ImportError: + click = None # Name of the asset metadata XML schema definition file. ASSET_XSD_FILE = 'assets.xsd' @@ -145,7 +148,7 @@ def generate_random_asset_md(): return AssetMetadata( asset_key, pathname=pathname(), - internal_name=filename(), + internal_name=str([filename() for __ in xrange(10)]), locked=locked(), contenttype=contenttype(), thumbnail=filename(), @@ -199,33 +202,37 @@ def validate_xml(xsd_filename, xml_filename): with open(xml_filename, 'r') as f: etree.fromstring(f.read(), xmlparser) +if click is not None: + # pylint: disable=bad-continuation + @click.command() + @click.option('--num_assets', + type=click.INT, + default=10, + help="Number of assets to be generated by the script.", + required=False + ) + @click.option('--output_xml', + type=click.File('w'), + default=AssetMetadata.EXPORTED_ASSET_FILENAME, + help="Filename for the output XML file.", + required=False + ) + @click.option('--input_xsd', + type=click.File('r'), + default=ASSET_XSD_FILE, + help="Filename for the XSD (schema) file to read in.", + required=False + ) + def cli(num_assets, output_xml, input_xsd): + """ + Generates a number of fake asset metadata items as XML - and validates the XML against the schema. + """ + make_asset_xml(num_assets, output_xml) + # Now - validate the XML against the XSD. + validate_xml(input_xsd, output_xml) -# @click.command() -# @click.option('--numAssets', -# type=click.INT, -# default=10, -# help="Number of assets to be generated by the script.", -# required=False -# ) -# @click.option('--outputXml', -# type=click.File('w'), -# default=AssetMetadata.EXPORTED_ASSET_FILENAME, -# help="Filename for the output XML file.", -# required=False -# ) -# @click.option('--inputXsd', -# type=click.File('r'), -# default=ASSET_XSD_FILE, -# help="Filename for the XSD (schema) file to read in.", -# required=False -# ) -# def cli(numAssets, outputXml, inputXsd): -# """ -# Generates a number of fake asset metadata items as XML - and validates the XML against the schema. -# """ -# make_asset_xml(numAssets, outputXml) -# # Now - validate the XML against the XSD. -# validate_xml(inputXsd, outputXml) - -# if __name__ == '__main__': -# cli() +if __name__ == '__main__': + if click is not None: + cli() # pylint: disable=no-value-for-parameter + else: + print "Aborted! Module 'click' is not installed." diff --git a/common/lib/xmodule/xmodule/modulestore/perf_tests/test_asset_import_export.py b/common/lib/xmodule/xmodule/modulestore/perf_tests/test_asset_import_export.py index d87b6d07d4..f241e92085 100644 --- a/common/lib/xmodule/xmodule/modulestore/perf_tests/test_asset_import_export.py +++ b/common/lib/xmodule/xmodule/modulestore/perf_tests/test_asset_import_export.py @@ -6,7 +6,8 @@ import unittest from tempfile import mkdtemp import itertools from shutil import rmtree - +from bson.code import Code +import datetime import ddt #from nose.plugins.attrib import attr @@ -31,7 +32,7 @@ except ImportError: CodeBlockTimer = None # Number of assets saved in the modulestore per test run. -ASSET_AMOUNT_PER_TEST = (1, 10, 100, 1000, 10000) +ASSET_AMOUNT_PER_TEST = (0, 1, 10, 100, 1000, 10000) # Use only this course in asset metadata performance testing. COURSE_NAME = 'manual-testing-complete' @@ -160,7 +161,7 @@ class FindAssetTest(unittest.TestCase): classes with different amounts of asset metadata. """ - # Use this attribute to skip this test on regular unittest CI runs. + # Use this attr to skip this test on regular unittest CI runs. perf_test = True def setUp(self): @@ -233,3 +234,79 @@ class FindAssetTest(unittest.TestCase): __ = source_store.get_all_asset_metadata( source_course_key, 'asset', start=start_middle, sort=sort, maxresults=50 ) + + +@ddt.ddt +# Eventually, exclude this attribute from regular unittests while running *only* tests +# with this attribute during regular performance tests. +# @attr("perf_test") +@unittest.skip +class TestModulestoreAssetSize(unittest.TestCase): + """ + This class exists to measure the size of asset metadata in ifferent modulestore + classes with different amount of asset metadata. + """ + + # Use this attribute to skip this test on regular unittest CI runs. + perf_test = True + + test_run_time = datetime.datetime.now() + + @ddt.data(*itertools.product( + MODULESTORE_SETUPS, + ASSET_AMOUNT_PER_TEST + )) + @ddt.unpack + def test_asset_sizes(self, source_ms, num_assets): + """ + Generate timings for different amounts of asset metadata and different modulestores. + """ + # First, make the fake asset metadata. + make_asset_xml(num_assets, ASSET_XML_PATH) + validate_xml(ASSET_XSD_PATH, ASSET_XML_PATH) + + # Construct the contentstore for storing the first import + with MongoContentstoreBuilder().build() as source_content: + # Construct the modulestore for storing the first import (using the previously created contentstore) + with source_ms.build(source_content) as source_store: + source_course_key = source_store.make_course_key('a', 'course', 'course') + + import_from_xml( + source_store, + 'test_user', + TEST_DATA_ROOT, + course_dirs=TEST_COURSE, + static_content_store=source_content, + target_course_id=source_course_key, + create_course_if_not_present=True, + raise_on_failure=True, + ) + + asset_collection = source_ms.asset_collection() + # Ensure the asset collection exists. + if asset_collection.name in asset_collection.database.collection_names(): + + # Map gets the size of each structure. + mapper = Code(""" + function() { emit("size", (this == null) ? 0 : Object.bsonsize(this)) } + """) + + # Reduce finds the largest structure size and returns only it. + reducer = Code(""" + function(key, values) { + var max_size = 0; + for (var i=0; i < values.length; i++) { + if (values[i] > max_size) { + max_size = values[i]; + } + } + return max_size; + } + """) + + results = asset_collection.map_reduce(mapper, reducer, "size_results") + result_str = "{} - Store: {:<15} - Num Assets: {:>6} - Result: {}\n".format( + self.test_run_time, SHORT_NAME_MAP[source_ms], num_assets, [r for r in results.find()] + ) + with open("bson_sizes.txt", "a") as f: + f.write(result_str) diff --git a/common/lib/xmodule/xmodule/modulestore/tests/test_cross_modulestore_import_export.py b/common/lib/xmodule/xmodule/modulestore/tests/test_cross_modulestore_import_export.py index 736b9626f3..358322d2eb 100644 --- a/common/lib/xmodule/xmodule/modulestore/tests/test_cross_modulestore_import_export.py +++ b/common/lib/xmodule/xmodule/modulestore/tests/test_cross_modulestore_import_export.py @@ -100,7 +100,8 @@ class MongoModulestoreBuilder(object): # Set up a temp directory for storing filesystem content created during import fs_root = mkdtemp() - modulestore = DraftModuleStore( + # pylint: disable=attribute-defined-outside-init + self.modulestore = DraftModuleStore( contentstore, doc_store_config, fs_root, @@ -109,13 +110,13 @@ class MongoModulestoreBuilder(object): metadata_inheritance_cache_subsystem=MemoryCache(), xblock_mixins=XBLOCK_MIXINS, ) - modulestore.ensure_indexes() + self.modulestore.ensure_indexes() try: - yield modulestore + yield self.modulestore finally: # Delete the created database - modulestore._drop_database() + self.modulestore._drop_database() # pylint: disable=protected-access # Delete the created directory on the filesystem rmtree(fs_root, ignore_errors=True) @@ -123,6 +124,12 @@ class MongoModulestoreBuilder(object): def __repr__(self): return 'MongoModulestoreBuilder()' + def asset_collection(self): + """ + Returns the collection storing the asset metadata. + """ + return self.modulestore.asset_collection + class VersioningModulestoreBuilder(object): """ @@ -160,7 +167,7 @@ class VersioningModulestoreBuilder(object): yield modulestore finally: # Delete the created database - modulestore._drop_database() + modulestore._drop_database() # pylint: disable=protected-access # Delete the created directory on the filesystem rmtree(fs_root, ignore_errors=True) @@ -206,6 +213,7 @@ class MixedModulestoreBuilder(object): """ self.store_builders = store_builders self.mappings = mappings or {} + self.modulestore = None @contextmanager def build(self, contentstore): @@ -227,7 +235,7 @@ class MixedModulestoreBuilder(object): # Generate a fake list of stores to give the already generated stores appropriate names stores = [{'NAME': name, 'ENGINE': 'This space deliberately left blank'} for name in names] - modulestore = MixedModuleStore( + self.modulestore = MixedModuleStore( contentstore, self.mappings, stores, @@ -235,11 +243,29 @@ class MixedModulestoreBuilder(object): xblock_mixins=XBLOCK_MIXINS, ) - yield modulestore + yield self.modulestore def __repr__(self): return 'MixedModulestoreBuilder({!r}, {!r})'.format(self.store_builders, self.mappings) + def asset_collection(self): + """ + Returns the collection storing the asset metadata. + """ + all_stores = self.modulestore.modulestores + if len(all_stores) > 1: + return None + + store = all_stores[0] + if hasattr(store, 'asset_collection'): + # Mongo modulestore beneath mixed. + # Returns the entire collection with *all* courses' asset metadata. + return store.asset_collection + else: + # Split modulestore beneath mixed. + # Split stores all asset metadata in the structure collection. + return store.db_connection.structures + class MongoContentstoreBuilder(object): """ @@ -276,7 +302,8 @@ MIXED_MODULESTORE_SETUPS = ( MixedModulestoreBuilder([('split', VersioningModulestoreBuilder())]), ) MIXED_MS_SETUPS_SHORT = ( - 'mixed_mongo', 'mixed_split' + 'mixed_mongo', + 'mixed_split', ) DIRECT_MODULESTORE_SETUPS = ( MongoModulestoreBuilder(),