diff --git a/common/lib/xmodule/xmodule/modulestore/perf_tests/__init__.py b/common/lib/xmodule/xmodule/modulestore/perf_tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/common/lib/xmodule/xmodule/modulestore/perf_tests/generate_asset_xml.py b/common/lib/xmodule/xmodule/modulestore/perf_tests/generate_asset_xml.py new file mode 100644 index 0000000000..854c3c7603 --- /dev/null +++ b/common/lib/xmodule/xmodule/modulestore/perf_tests/generate_asset_xml.py @@ -0,0 +1,231 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +Generates fake XML for asset metadata. +""" + +import random +#import click +from lxml import etree +from datetime import datetime, timedelta +from xmodule.assetstore import AssetMetadata +from opaque_keys.edx.keys import CourseKey + + +# Name of the asset metadata XML schema definition file. +ASSET_XSD_FILE = 'assets.xsd' + +# Characters used in name generation below. +NAME_CHARS = u'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-' +NAME_CHARS_W_UNICODE = NAME_CHARS + u'àĚŘDžΦШΩΣӔ' + + +def coin_flip(): + """ + 50/50 chance + """ + return random.choice((True, False)) + + +def asset_type(): + """ + Pick an asset type at random. + """ + asset_type_choices = ( + (95, "asset"), + (100, "video") + ) + d100 = random.randint(0, 100) + for choice in asset_type_choices: + if d100 <= choice[0]: + return choice[1] + return asset_type_choices[-1][1] + + +def filename(): + """ + Fake a filename. + """ + fname = u'' + for __ in xrange(random.randint(10, 30)): + fname += random.choice(NAME_CHARS_W_UNICODE) + fname += random.choice(('.jpg', '.pdf', '.png', '.txt')) + return fname + + +def pathname(): + """ + Fake a pathname. + """ + pname = u'' + for __ in xrange(random.randint(2, 3)): + for __ in xrange(random.randint(5, 10)): + pname += random.choice(NAME_CHARS) + pname += '/' + return pname + + +def locked(): + """ + Locked or unlocked. + """ + return coin_flip() + + +def fields(): + """ + Generate some fake extra fields. + """ + f = {} + if coin_flip(): + if coin_flip(): + f['copyrighted'] = coin_flip() + if coin_flip(): + f['size'] = random.randint(100, 10000000) + if coin_flip(): + f['color'] = random.choice(('blue', 'pink', 'fuchsia', 'rose', 'mauve', 'black')) + return f + + +def user_id(): + """ + Fake user id. + """ + return random.randint(1, 100000000) + + +def versions(): + """ + Fake versions. + """ + curr_ver = random.randint(1, 500) + prev_ver = curr_ver - 1 + + def ver_str(ver): + """ + Version string. + """ + return 'v{}.0'.format(ver) + return (ver_str(curr_ver), ver_str(prev_ver)) + + +def date_and_time(): + """ + Fake date/time. + """ + start_date = datetime.now() + time_back = timedelta(seconds=random.randint(0, 473040000)) # 15 year interval + return start_date - time_back + + +def contenttype(): + """ + Random MIME type. + """ + return random.choice(( + 'image/jpeg', + 'text/html', + 'audio/aiff', + 'video/avi', + 'text/plain', + 'application/msword', + 'application/x-gzip', + 'application/javascript', + )) + + +def generate_random_asset_md(): + """ + Generates a single AssetMetadata object with semi-random data. + """ + course_key = CourseKey.from_string('org/course/run') + asset_key = course_key.make_asset_key(asset_type(), filename()) + (curr_version, prev_version) = versions() + return AssetMetadata( + asset_key, + pathname=pathname(), + internal_name=filename(), + locked=locked(), + contenttype=contenttype(), + thumbnail=filename(), + fields=fields(), + curr_version=curr_version, + prev_version=prev_version, + edited_by=user_id(), + edited_by_email='staff@edx.org', + edited_on=date_and_time(), + created_by=user_id(), + created_by_email='staff@edx.org', + created_on=date_and_time(), + ) + + +def make_asset_md(amount): + """ + Make a number of fake AssetMetadata objects. + """ + all_asset_md = [] + for __ in xrange(amount): + all_asset_md.append(generate_random_asset_md()) + return all_asset_md + +# pylint: disable=no-member + + +def make_asset_xml(amount, xml_filename): + """ + Make an XML file filled with fake AssetMetadata. + """ + all_md = make_asset_md(amount) + xml_root = etree.Element("assets") + for mdata in all_md: + asset_element = etree.SubElement(xml_root, "asset") + mdata.to_xml(asset_element) + with open(xml_filename, "w") as xml_file: + etree.ElementTree(xml_root).write(xml_file) + + +def validate_xml(xsd_filename, xml_filename): + """ + Validate a generated XML file against the XSD. + """ + with open(xsd_filename, 'r') as f: + schema_root = etree.XML(f.read()) + + schema = etree.XMLSchema(schema_root) + xmlparser = etree.XMLParser(schema=schema) + + with open(xml_filename, 'r') as f: + etree.fromstring(f.read(), xmlparser) + + +# @click.command() +# @click.option('--numAssets', +# type=click.INT, +# default=10, +# help="Number of assets to be generated by the script.", +# required=False +# ) +# @click.option('--outputXml', +# type=click.File('w'), +# default=AssetMetadata.EXPORTED_ASSET_FILENAME, +# help="Filename for the output XML file.", +# required=False +# ) +# @click.option('--inputXsd', +# type=click.File('r'), +# default=ASSET_XSD_FILE, +# help="Filename for the XSD (schema) file to read in.", +# required=False +# ) +# def cli(numAssets, outputXml, inputXsd): +# """ +# Generates a number of fake asset metadata items as XML - and validates the XML against the schema. +# """ +# make_asset_xml(numAssets, outputXml) +# # Now - validate the XML against the XSD. +# validate_xml(inputXsd, outputXml) + +# if __name__ == '__main__': +# cli() diff --git a/common/lib/xmodule/xmodule/modulestore/perf_tests/generate_report.py b/common/lib/xmodule/xmodule/modulestore/perf_tests/generate_report.py new file mode 100644 index 0000000000..894b03404e --- /dev/null +++ b/common/lib/xmodule/xmodule/modulestore/perf_tests/generate_report.py @@ -0,0 +1,174 @@ + +""" +Reads the data generated by performance tests and generates a savable +report which can be viewed over time to examine the performance effects of code changes on +various parts of the system. +""" + +import sqlite3 +from lxml.builder import E +import lxml.html +#import click + + +DB_NAME = 'block_times.db' + + +class HTMLTable(object): + """ + Simple wrapper for an HTML table. + """ + def __init__(self, hdr_columns): + self.table = E.TABLE() + col_headers = [E.TH(x) for x in hdr_columns] + header_row = E.TR(*col_headers) + self.table.append(header_row) + + def add_row(self, items): + """Add row to table.""" + row_items = [E.TD(x) for x in items] + self.table.append(E.TR(*row_items)) + + def tostring(self): + """Output table HTML as string.""" + return lxml.html.tostring(self.table) + + @staticmethod + def style(): + """ Return a hard-coded table style.""" + return E.style(""" + table, th, td { + border: 1px solid black; + border-collapse: collapse; + } + th, td { + padding: 5px; + }""" + ) # pylint: disable=bad-continuation + + +class HTMLDocument(object): + """ + Simple wrapper for an entire HTML document. + """ + def __init__(self, title): + self.html = E.html(E.head(E.title(title), HTMLTable.style())) + self.body = E.body() + self.html.append(self.body) + + def add_header(self, level, text): + """Add a header to the document.""" + func_name = "H{}".format(level) + self.body.append(getattr(E, func_name)(text)) + + def add_to_body(self, elem): + """Add to document body.""" + self.body.append(elem) + + def tostring(self, pretty_print=False): + """Output HTML document as string.""" + return lxml.html.tostring(self.html, pretty_print=pretty_print) + + +def read_timing_data(): + """ + Read in the timing data from the sqlite DB and save into a dict. + """ + run_data = {} + + # Read data from all modulestore combos. + conn = sqlite3.connect(DB_NAME) + conn.row_factory = sqlite3.Row + sel_sql = 'select id, run_id, block_desc, elapsed, timestamp FROM block_times ORDER BY run_id DESC' + cur = conn.cursor() + cur.execute(sel_sql) + all_modulestore_combos = set() + for row in cur.fetchall(): + time_taken = row[3] + + # Split apart the description into its parts. + desc_parts = row[2].split(':') + modulestores = desc_parts[1] + all_modulestore_combos.add(modulestores) + amount_md = desc_parts[2] + test_phase = 'all' + if len(desc_parts) > 3: + test_phase = desc_parts[3] + + # Save the data in a multi-level dict - { phase1: { amount1: {ms1->ms2: duration, ...}, ...}, ...}. + phase_data = run_data.setdefault(test_phase, {}) + amount_data = phase_data.setdefault(amount_md, {}) + __ = amount_data.setdefault(modulestores, time_taken) + + return all_modulestore_combos, run_data + + +def generate_html(all_ms_combos, run_data): + """ + Generate HTML. + """ + + html = HTMLDocument("Results") + + # Output comparison of each phase to a different table. + for phase in run_data.keys(): + if phase in ('fake_assets',): + continue + per_phase = run_data[phase] + html.add_header(1, phase) + + title_map = { + 'duration': 'Total Duration (ms)', + 'ratio': 'Total Duration Per Number of Assets (ms/asset)', + 'variable_cost': 'Asset Export Duration Per Number of Assets (ms/asset)' + } + for table_type in ('duration', 'ratio', 'variable_cost'): + if phase == 'all' and table_type in ('ratio', 'variable_cost'): + continue + # Make the table header columns and the table. + columns = ["Asset Metadata Amount", ] + ms_keys = sorted(all_ms_combos) + for k in ms_keys: + columns.append("{} ({})".format(k, table_type)) + phase_table = HTMLTable(columns) + + # Make a row for each amount of asset metadata. + for amount in sorted(per_phase.keys()): + per_amount = per_phase[amount] + num_assets = int(amount) + row = [amount, ] + for modulestore in ms_keys: + if table_type == 'duration': + value = per_amount[modulestore] + elif table_type == 'ratio': + if num_assets != 0: + value = per_amount[modulestore] / float(amount) + else: + value = 0 + elif table_type == 'variable_cost': + if num_assets == 0: + value = 0 + else: + value = (per_amount[modulestore] - per_phase['0'][modulestore]) / float(amount) + row.append("{}".format(value)) + phase_table.add_row(row) + + # Add the table title and the table. + html.add_header(2, title_map[table_type]) + html.add_to_body(phase_table.table) + + return html + + +# @click.command() +# @click.argument('outfile', type=click.File('w'), default='-', required=False) +# def cli(outfile): +# """ +# Generate an HTML report from the sqlite timing data. +# """ +# all_ms_combos, run_data = read_timing_data() +# html = generate_html(all_ms_combos, run_data) +# click.echo(html.tostring(), file=outfile) + +# if __name__ == '__main__': +# cli() # pylint: disable=no-value-for-parameter diff --git a/common/lib/xmodule/xmodule/modulestore/perf_tests/test_asset_import_export.py b/common/lib/xmodule/xmodule/modulestore/perf_tests/test_asset_import_export.py new file mode 100644 index 0000000000..1510f652ec --- /dev/null +++ b/common/lib/xmodule/xmodule/modulestore/perf_tests/test_asset_import_export.py @@ -0,0 +1,142 @@ +""" +Performance test for asset metadata in the modulestore. +""" +from path import path +import unittest +from tempfile import mkdtemp +import itertools +from shutil import rmtree + +import ddt +#from nose.plugins.attrib import attr + +from xmodule.assetstore import AssetMetadata +from xmodule.modulestore.xml_importer import import_from_xml +from xmodule.modulestore.xml_exporter import export_to_xml +from xmodule.modulestore.tests.test_cross_modulestore_import_export import ( + MODULESTORE_SETUPS, + SHORT_NAME_MAP, + TEST_DATA_DIR, + MongoContentstoreBuilder, +) +from xmodule.modulestore.perf_tests.generate_asset_xml import make_asset_xml, validate_xml, ASSET_XSD_FILE + +# The dependency below needs to be installed manually from the development.txt file, which doesn't +# get installed during unit tests! +#from code_block_timer import CodeBlockTimer + + +class CodeBlockTimer(object): + """ + To fake out the tests below, this class definition is used. Remove it when uncommenting above. + """ + def __init__(self, desc): + pass + +# Number of assets saved in the modulestore per test run. +ASSET_AMOUNT_PER_TEST = (1, 10, 100, 1000, 10000) + +# Use only this course in asset metadata performance testing. +COURSE_NAME = 'manual-testing-complete' + +# A list of courses to test - only one. +TEST_COURSE = (COURSE_NAME, ) + +# pylint: disable=invalid-name +TEST_DIR = path(__file__).dirname() +PLATFORM_ROOT = TEST_DIR.parent.parent.parent.parent.parent.parent +TEST_DATA_ROOT = PLATFORM_ROOT / TEST_DATA_DIR +COURSE_DATA_DIR = TEST_DATA_ROOT / COURSE_NAME + +# Path where generated asset file is saved. +ASSET_XML_PATH = COURSE_DATA_DIR / AssetMetadata.EXPORTED_ASSET_DIR / AssetMetadata.EXPORTED_ASSET_FILENAME + +# Path where asset XML schema definition file is located. +ASSET_XSD_PATH = PLATFORM_ROOT / "common" / "lib" / "xmodule" / "xmodule" / "assetstore" / "tests" / ASSET_XSD_FILE + + +@ddt.ddt +# Eventually, exclude this attribute from regular unittests while running *only* tests +# with this attribute during regular performance tests. +# @attr("perf_test") +@unittest.skip +class CrossStoreXMLRoundtrip(unittest.TestCase): + """ + This class exists to time XML import and export between different modulestore + classes with different amount of asset metadata. + """ + + # Use this attribute to skip this test on regular unittest CI runs. + perf_test = True + + def setUp(self): + super(CrossStoreXMLRoundtrip, self).setUp() + self.export_dir = mkdtemp() + self.addCleanup(rmtree, self.export_dir, ignore_errors=True) + + @ddt.data(*itertools.product( + MODULESTORE_SETUPS, + MODULESTORE_SETUPS, + ASSET_AMOUNT_PER_TEST + )) + @ddt.unpack + def test_generate_timings(self, source_ms, dest_ms, num_assets): + """ + Generate timings for different amounts of asset metadata and different modulestores. + """ + desc = "XMLRoundTrip:{}->{}:{}".format( + SHORT_NAME_MAP[source_ms], + SHORT_NAME_MAP[dest_ms], + num_assets + ) + + with CodeBlockTimer(desc): + + with CodeBlockTimer("fake_assets"): + # First, make the fake asset metadata. + make_asset_xml(num_assets, ASSET_XML_PATH) + validate_xml(ASSET_XSD_PATH, ASSET_XML_PATH) + + # Construct the contentstore for storing the first import + with MongoContentstoreBuilder().build() as source_content: + # Construct the modulestore for storing the first import (using the previously created contentstore) + with source_ms.build(source_content) as source_store: + # Construct the contentstore for storing the second import + with MongoContentstoreBuilder().build() as dest_content: + # Construct the modulestore for storing the second import (using the second contentstore) + with dest_ms.build(dest_content) as dest_store: + source_course_key = source_store.make_course_key('a', 'course', 'course') + dest_course_key = dest_store.make_course_key('a', 'course', 'course') + + with CodeBlockTimer("initial_import"): + import_from_xml( + source_store, + 'test_user', + TEST_DATA_ROOT, + course_dirs=TEST_COURSE, + static_content_store=source_content, + target_course_id=source_course_key, + create_course_if_not_present=True, + raise_on_failure=True, + ) + + with CodeBlockTimer("export"): + export_to_xml( + source_store, + source_content, + source_course_key, + self.export_dir, + 'exported_source_course', + ) + + with CodeBlockTimer("second_import"): + import_from_xml( + dest_store, + 'test_user', + self.export_dir, + course_dirs=['exported_source_course'], + static_content_store=dest_content, + target_course_id=dest_course_key, + create_course_if_not_present=True, + raise_on_failure=True, + ) diff --git a/common/lib/xmodule/xmodule/modulestore/tests/test_cross_modulestore_import_export.py b/common/lib/xmodule/xmodule/modulestore/tests/test_cross_modulestore_import_export.py index dd684c6826..736b9626f3 100644 --- a/common/lib/xmodule/xmodule/modulestore/tests/test_cross_modulestore_import_export.py +++ b/common/lib/xmodule/xmodule/modulestore/tests/test_cross_modulestore_import_export.py @@ -275,11 +275,20 @@ MIXED_MODULESTORE_SETUPS = ( MixedModulestoreBuilder([('draft', MongoModulestoreBuilder())]), MixedModulestoreBuilder([('split', VersioningModulestoreBuilder())]), ) +MIXED_MS_SETUPS_SHORT = ( + 'mixed_mongo', 'mixed_split' +) DIRECT_MODULESTORE_SETUPS = ( MongoModulestoreBuilder(), # VersioningModulestoreBuilder(), # FUTUREDO: LMS-11227 ) +DIRECT_MS_SETUPS_SHORT = ( + 'mongo', + #'split', +) MODULESTORE_SETUPS = DIRECT_MODULESTORE_SETUPS + MIXED_MODULESTORE_SETUPS +MODULESTORE_SHORTNAMES = DIRECT_MS_SETUPS_SHORT + MIXED_MS_SETUPS_SHORT +SHORT_NAME_MAP = dict(zip(MODULESTORE_SETUPS, MODULESTORE_SHORTNAMES)) CONTENTSTORE_SETUPS = (MongoContentstoreBuilder(),) COURSE_DATA_NAMES = ( @@ -312,7 +321,6 @@ class CrossStoreXMLRoundtrip(CourseComparisonTest, PartitionTestCase): )) @ddt.unpack def test_round_trip(self, source_builder, dest_builder, source_content_builder, dest_content_builder, course_data_name): - # Construct the contentstore for storing the first import with source_content_builder.build() as source_content: # Construct the modulestore for storing the first import (using the previously created contentstore) @@ -354,14 +362,14 @@ class CrossStoreXMLRoundtrip(CourseComparisonTest, PartitionTestCase): raise_on_failure=True, ) -# NOT CURRENTLY USED -# export_to_xml( -# dest_store, -# dest_content, -# dest_course_key, -# self.export_dir, -# 'exported_dest_course', -# ) + # NOT CURRENTLY USED + # export_to_xml( + # dest_store, + # dest_content, + # dest_course_key, + # self.export_dir, + # 'exported_dest_course', + # ) self.exclude_field(None, 'wiki_slug') self.exclude_field(None, 'xml_attributes') diff --git a/requirements/edx/development.txt b/requirements/edx/development.txt new file mode 100644 index 0000000000..e7d763d7b6 --- /dev/null +++ b/requirements/edx/development.txt @@ -0,0 +1,10 @@ +# +# Dependencies that are used in development only - and are *NOT* needed to be installed in staging/production. +# + +# Python libraries to install directly from github / PyPi + +click==3.3 + +# Third-party: +-e git+https://github.com/doctoryes/code_block_timer.git@f3d0629f086bcc649c3c77f4bc5b9c2c8172c3bf#egg=code_block_timer diff --git a/requirements/edx/github.txt b/requirements/edx/github.txt index 53e07030c4..607517c332 100644 --- a/requirements/edx/github.txt +++ b/requirements/edx/github.txt @@ -2,7 +2,7 @@ # # If you open a pull request that adds a new dependency, you should notify: # * @mollydb - to check licensing -# * One of @e0d, @jarv, or @feanil - to check system requirements +# * One of @e0d or @feanil - to check system requirements # Python libraries to install directly from github