diff --git a/common/lib/xmodule/xmodule/modulestore/perf_tests/generate_report.py b/common/lib/xmodule/xmodule/modulestore/perf_tests/generate_report.py index 894b03404e..8e35fee7ec 100644 --- a/common/lib/xmodule/xmodule/modulestore/perf_tests/generate_report.py +++ b/common/lib/xmodule/xmodule/modulestore/perf_tests/generate_report.py @@ -8,7 +8,10 @@ various parts of the system. import sqlite3 from lxml.builder import E import lxml.html -#import click +try: + import click +except ImportError: + click = None DB_NAME = 'block_times.db' @@ -70,105 +73,217 @@ class HTMLDocument(object): return lxml.html.tostring(self.html, pretty_print=pretty_print) -def read_timing_data(): +class ReportGenerator(object): """ - Read in the timing data from the sqlite DB and save into a dict. + Base class for report generation. """ - run_data = {} - - # Read data from all modulestore combos. - conn = sqlite3.connect(DB_NAME) - conn.row_factory = sqlite3.Row - sel_sql = 'select id, run_id, block_desc, elapsed, timestamp FROM block_times ORDER BY run_id DESC' - cur = conn.cursor() - cur.execute(sel_sql) - all_modulestore_combos = set() - for row in cur.fetchall(): - time_taken = row[3] - - # Split apart the description into its parts. - desc_parts = row[2].split(':') - modulestores = desc_parts[1] - all_modulestore_combos.add(modulestores) - amount_md = desc_parts[2] - test_phase = 'all' - if len(desc_parts) > 3: - test_phase = desc_parts[3] - - # Save the data in a multi-level dict - { phase1: { amount1: {ms1->ms2: duration, ...}, ...}, ...}. - phase_data = run_data.setdefault(test_phase, {}) - amount_data = phase_data.setdefault(amount_md, {}) - __ = amount_data.setdefault(modulestores, time_taken) - - return all_modulestore_combos, run_data + def __init__(self, db_name): + # Read data from all modulestore combos. + conn = sqlite3.connect(db_name) + conn.row_factory = sqlite3.Row + sel_sql = 'select id, run_id, block_desc, elapsed, timestamp FROM block_times ORDER BY run_id DESC' + cur = conn.cursor() + cur.execute(sel_sql) + self.all_rows = cur.fetchall() -def generate_html(all_ms_combos, run_data): +class ImportExportReportGen(ReportGenerator): """ - Generate HTML. + Class which generates report for course import/export performance test data. """ + def __init__(self, db_name): + super(ImportExportReportGen, self).__init__(db_name) + self._read_timing_data() - html = HTMLDocument("Results") + def _read_timing_data(self): + """ + Read in the timing data from the sqlite DB and save into a dict. + """ + self.run_data = {} - # Output comparison of each phase to a different table. - for phase in run_data.keys(): - if phase in ('fake_assets',): - continue - per_phase = run_data[phase] - html.add_header(1, phase) + self.all_modulestore_combos = set() + for row in self.all_rows: + time_taken = row[3] - title_map = { - 'duration': 'Total Duration (ms)', - 'ratio': 'Total Duration Per Number of Assets (ms/asset)', - 'variable_cost': 'Asset Export Duration Per Number of Assets (ms/asset)' - } - for table_type in ('duration', 'ratio', 'variable_cost'): - if phase == 'all' and table_type in ('ratio', 'variable_cost'): + # Split apart the description into its parts. + desc_parts = row[2].split(':') + modulestores = desc_parts[1] + self.all_modulestore_combos.add(modulestores) + amount_md = desc_parts[2] + test_phase = 'all' + if len(desc_parts) > 3: + test_phase = desc_parts[3] + + # Save the data in a multi-level dict - { phase1: { amount1: {ms1->ms2: duration, ...}, ...}, ...}. + phase_data = self.run_data.setdefault(test_phase, {}) + amount_data = phase_data.setdefault(amount_md, {}) + __ = amount_data.setdefault(modulestores, time_taken) + + def generate_html(self): + """ + Generate HTML. + """ + html = HTMLDocument("Results") + + # Output comparison of each phase to a different table. + for phase in self.run_data.keys(): + if phase in ('fake_assets',): continue + per_phase = self.run_data[phase] + html.add_header(1, phase) + + title_map = { + 'duration': 'Total Duration (ms)', + 'ratio': 'Total Duration Per Number of Assets (ms/asset)', + 'variable_cost': 'Asset Export Duration Per Number of Assets (ms/asset)' + } + for table_type in ('duration', 'ratio', 'variable_cost'): + if phase == 'all' and table_type in ('ratio', 'variable_cost'): + continue + # Make the table header columns and the table. + columns = ["Asset Metadata Amount", ] + ms_keys = sorted(self.all_modulestore_combos) + for k in ms_keys: + columns.append("{} ({})".format(k, table_type)) + phase_table = HTMLTable(columns) + + # Make a row for each amount of asset metadata. + for amount in sorted(per_phase.keys()): + per_amount = per_phase[amount] + num_assets = int(amount) + row = [amount, ] + for modulestore in ms_keys: + if table_type == 'duration': + value = per_amount[modulestore] + elif table_type == 'ratio': + if num_assets != 0: + value = per_amount[modulestore] / float(amount) + else: + value = 0 + elif table_type == 'variable_cost': + if num_assets == 0: + value = 0 + else: + value = (per_amount[modulestore] - per_phase['0'][modulestore]) / float(amount) + row.append("{}".format(value)) + phase_table.add_row(row) + + # Add the table title and the table. + html.add_header(2, title_map[table_type]) + html.add_to_body(phase_table.table) + + return html + + +class FindReportGen(ReportGenerator): + """ + Class which generates report for asset access performance test data. + """ + def __init__(self, db_name): + super(FindReportGen, self).__init__(db_name) + self._read_timing_data() + + def _read_timing_data(self): + """ + Read in the timing data from the sqlite DB and save into a dict. + """ + self.run_data = {} + + self.all_modulestores = set() + for row in self.all_rows: + time_taken = row[3] + + # Split apart the description into its parts. + desc_parts = row[2].split(':') + if desc_parts[0] != 'FindAssetTest': + continue + modulestore, amount_md = desc_parts[1:3] + self.all_modulestores.add(modulestore) + test_phase = 'all' + sort = None + if len(desc_parts) >= 4: + test_phase = desc_parts[3] + if len(desc_parts) >= 5: + sort = desc_parts[4] + + # Save the data in a multi-level dict: + # { phase1: { [sort1: {] amount1: { modulestore1: duration, ...}, ...}, ...}. + phase_data = self.run_data.setdefault(test_phase, {}) + if test_phase == 'get_asset_list': + # Add a level here for the sort. + phase_data = phase_data.setdefault(sort, {}) + amount_data = phase_data.setdefault(amount_md, {}) + __ = amount_data.setdefault(modulestore, time_taken) + + def generate_html(self): + """ + Generate HTML. + """ + html = HTMLDocument("Results") + + # Output comparison of each phase to a different table. + # for store in self.run_data.keys(): + # per_phase = self.run_data[store] + # html.add_header(1, store) + + for phase in self.run_data.keys(): + per_phase = self.run_data[phase] + # Make the table header columns and the table. columns = ["Asset Metadata Amount", ] - ms_keys = sorted(all_ms_combos) + ms_keys = sorted(self.all_modulestores) for k in ms_keys: - columns.append("{} ({})".format(k, table_type)) + columns.append("Time Taken (ms) ({})".format(k)) phase_table = HTMLTable(columns) + if phase != 'get_asset_list': + for amount in sorted(per_phase.keys()): + per_amount = per_phase[amount] + row = [amount, ] + for modulestore in ms_keys: + time_taken = per_amount[modulestore] + row.append("{}".format(time_taken)) + phase_table.add_row(row) + html.add_header(2, phase) + html.add_to_body(phase_table.table) + else: + # get_asset_list phase includes the sort as well. + html.add_header(2, phase) + for sort in per_phase.keys(): + sort_table = HTMLTable(columns) + per_sort = per_phase[sort] + for amount in sorted(per_sort.keys()): + per_amount = per_sort[amount] + row = [amount, ] + for modulestore in ms_keys: + # Each sort has two different ranges retrieved. + time_taken = per_amount[modulestore] / 2.0 + row.append("{}".format(time_taken)) + sort_table.add_row(row) + html.add_header(3, sort) + html.add_to_body(sort_table.table) - # Make a row for each amount of asset metadata. - for amount in sorted(per_phase.keys()): - per_amount = per_phase[amount] - num_assets = int(amount) - row = [amount, ] - for modulestore in ms_keys: - if table_type == 'duration': - value = per_amount[modulestore] - elif table_type == 'ratio': - if num_assets != 0: - value = per_amount[modulestore] / float(amount) - else: - value = 0 - elif table_type == 'variable_cost': - if num_assets == 0: - value = 0 - else: - value = (per_amount[modulestore] - per_phase['0'][modulestore]) / float(amount) - row.append("{}".format(value)) - phase_table.add_row(row) - - # Add the table title and the table. - html.add_header(2, title_map[table_type]) - html.add_to_body(phase_table.table) - - return html + return html -# @click.command() -# @click.argument('outfile', type=click.File('w'), default='-', required=False) -# def cli(outfile): -# """ -# Generate an HTML report from the sqlite timing data. -# """ -# all_ms_combos, run_data = read_timing_data() -# html = generate_html(all_ms_combos, run_data) -# click.echo(html.tostring(), file=outfile) +if click is not None: + @click.command() + @click.argument('outfile', type=click.File('w'), default='-', required=False) + @click.option('--db_name', help='Name of sqlite database from which to read data.', default=DB_NAME) + @click.option('--data_type', help='Data type to process. One of: "imp_exp" or "find"', default="find") + def cli(outfile, db_name, data_type): + """ + Generate an HTML report from the sqlite timing data. + """ + if data_type == 'imp_exp': + ie_gen = ImportExportReportGen(db_name) + html = ie_gen.generate_html() + elif data_type == 'find': + f_gen = FindReportGen(db_name) + html = f_gen.generate_html() + click.echo(html.tostring(), file=outfile) -# if __name__ == '__main__': -# cli() # pylint: disable=no-value-for-parameter +if __name__ == '__main__': + if click is not None: + cli() # pylint: disable=no-value-for-parameter + else: + print "Aborted! Module 'click' is not installed." diff --git a/common/lib/xmodule/xmodule/modulestore/perf_tests/test_asset_import_export.py b/common/lib/xmodule/xmodule/modulestore/perf_tests/test_asset_import_export.py index 1510f652ec..d87b6d07d4 100644 --- a/common/lib/xmodule/xmodule/modulestore/perf_tests/test_asset_import_export.py +++ b/common/lib/xmodule/xmodule/modulestore/perf_tests/test_asset_import_export.py @@ -10,7 +10,9 @@ from shutil import rmtree import ddt #from nose.plugins.attrib import attr +from nose.plugins.skip import SkipTest from xmodule.assetstore import AssetMetadata +from xmodule.modulestore import ModuleStoreEnum from xmodule.modulestore.xml_importer import import_from_xml from xmodule.modulestore.xml_exporter import export_to_xml from xmodule.modulestore.tests.test_cross_modulestore_import_export import ( @@ -23,15 +25,10 @@ from xmodule.modulestore.perf_tests.generate_asset_xml import make_asset_xml, va # The dependency below needs to be installed manually from the development.txt file, which doesn't # get installed during unit tests! -#from code_block_timer import CodeBlockTimer - - -class CodeBlockTimer(object): - """ - To fake out the tests below, this class definition is used. Remove it when uncommenting above. - """ - def __init__(self, desc): - pass +try: + from code_block_timer import CodeBlockTimer +except ImportError: + CodeBlockTimer = None # Number of assets saved in the modulestore per test run. ASSET_AMOUNT_PER_TEST = (1, 10, 100, 1000, 10000) @@ -42,6 +39,13 @@ COURSE_NAME = 'manual-testing-complete' # A list of courses to test - only one. TEST_COURSE = (COURSE_NAME, ) +ALL_SORTS = ( + ('displayname', ModuleStoreEnum.SortOrder.ascending), + ('displayname', ModuleStoreEnum.SortOrder.descending), + ('uploadDate', ModuleStoreEnum.SortOrder.ascending), + ('uploadDate', ModuleStoreEnum.SortOrder.descending), +) + # pylint: disable=invalid-name TEST_DIR = path(__file__).dirname() PLATFORM_ROOT = TEST_DIR.parent.parent.parent.parent.parent.parent @@ -80,10 +84,13 @@ class CrossStoreXMLRoundtrip(unittest.TestCase): ASSET_AMOUNT_PER_TEST )) @ddt.unpack - def test_generate_timings(self, source_ms, dest_ms, num_assets): + def test_generate_import_export_timings(self, source_ms, dest_ms, num_assets): """ Generate timings for different amounts of asset metadata and different modulestores. """ + if CodeBlockTimer is None: + raise SkipTest("CodeBlockTimer undefined.") + desc = "XMLRoundTrip:{}->{}:{}".format( SHORT_NAME_MAP[source_ms], SHORT_NAME_MAP[dest_ms], @@ -140,3 +147,89 @@ class CrossStoreXMLRoundtrip(unittest.TestCase): create_course_if_not_present=True, raise_on_failure=True, ) + + +@ddt.ddt +# Eventually, exclude this attribute from regular unittests while running *only* tests +# with this attribute during regular performance tests. +# @attr("perf_test") +@unittest.skip +class FindAssetTest(unittest.TestCase): + """ + This class exists to time asset finding in different modulestore + classes with different amounts of asset metadata. + """ + + # Use this attribute to skip this test on regular unittest CI runs. + perf_test = True + + def setUp(self): + super(FindAssetTest, self).setUp() + self.export_dir = mkdtemp() + self.addCleanup(rmtree, self.export_dir, ignore_errors=True) + + @ddt.data(*itertools.product( + MODULESTORE_SETUPS, + ASSET_AMOUNT_PER_TEST, + )) + @ddt.unpack + def test_generate_find_timings(self, source_ms, num_assets): + """ + Generate timings for different amounts of asset metadata and different modulestores. + """ + if CodeBlockTimer is None: + raise SkipTest("CodeBlockTimer undefined.") + + desc = "FindAssetTest:{}:{}".format( + SHORT_NAME_MAP[source_ms], + num_assets, + ) + + with CodeBlockTimer(desc): + + with CodeBlockTimer("fake_assets"): + # First, make the fake asset metadata. + make_asset_xml(num_assets, ASSET_XML_PATH) + validate_xml(ASSET_XSD_PATH, ASSET_XML_PATH) + + # Construct the contentstore for storing the first import + with MongoContentstoreBuilder().build() as source_content: + # Construct the modulestore for storing the first import (using the previously created contentstore) + with source_ms.build(source_content) as source_store: + source_course_key = source_store.make_course_key('a', 'course', 'course') + asset_key = source_course_key.make_asset_key( + AssetMetadata.GENERAL_ASSET_TYPE, 'silly_cat_picture.gif' + ) + + with CodeBlockTimer("initial_import"): + import_from_xml( + source_store, + 'test_user', + TEST_DATA_ROOT, + course_dirs=TEST_COURSE, + static_content_store=source_content, + target_course_id=source_course_key, + create_course_if_not_present=True, + raise_on_failure=True, + ) + + with CodeBlockTimer("find_nonexistent_asset"): + # More correct would be using the AssetManager.find() - but since the test + # has created its own test modulestore, the AssetManager can't be used. + __ = source_store.find_asset_metadata(asset_key) + + # Perform get_all_asset_metadata for each sort. + for sort in ALL_SORTS: + with CodeBlockTimer("get_asset_list:{}-{}".format( + sort[0], + 'asc' if sort[1] == ModuleStoreEnum.SortOrder.ascending else 'desc' + )): + # Grab two ranges of 50 assets using different sorts. + # Why 50? That's how many are displayed on the current Studio "Files & Uploads" page. + start_middle = num_assets / 2 + __ = source_store.get_all_asset_metadata( + source_course_key, 'asset', start=0, sort=sort, maxresults=50 + ) + __ = source_store.get_all_asset_metadata( + source_course_key, 'asset', start=start_middle, sort=sort, maxresults=50 + )