Merge pull request #6627 from edx/jeskew/perf_test_asset_metadata_find_mongo
Add performance test when finding asset metadata.
This commit is contained in:
@@ -8,7 +8,10 @@ various parts of the system.
|
||||
import sqlite3
|
||||
from lxml.builder import E
|
||||
import lxml.html
|
||||
#import click
|
||||
try:
|
||||
import click
|
||||
except ImportError:
|
||||
click = None
|
||||
|
||||
|
||||
DB_NAME = 'block_times.db'
|
||||
@@ -70,105 +73,217 @@ class HTMLDocument(object):
|
||||
return lxml.html.tostring(self.html, pretty_print=pretty_print)
|
||||
|
||||
|
||||
def read_timing_data():
|
||||
class ReportGenerator(object):
|
||||
"""
|
||||
Read in the timing data from the sqlite DB and save into a dict.
|
||||
Base class for report generation.
|
||||
"""
|
||||
run_data = {}
|
||||
|
||||
# Read data from all modulestore combos.
|
||||
conn = sqlite3.connect(DB_NAME)
|
||||
conn.row_factory = sqlite3.Row
|
||||
sel_sql = 'select id, run_id, block_desc, elapsed, timestamp FROM block_times ORDER BY run_id DESC'
|
||||
cur = conn.cursor()
|
||||
cur.execute(sel_sql)
|
||||
all_modulestore_combos = set()
|
||||
for row in cur.fetchall():
|
||||
time_taken = row[3]
|
||||
|
||||
# Split apart the description into its parts.
|
||||
desc_parts = row[2].split(':')
|
||||
modulestores = desc_parts[1]
|
||||
all_modulestore_combos.add(modulestores)
|
||||
amount_md = desc_parts[2]
|
||||
test_phase = 'all'
|
||||
if len(desc_parts) > 3:
|
||||
test_phase = desc_parts[3]
|
||||
|
||||
# Save the data in a multi-level dict - { phase1: { amount1: {ms1->ms2: duration, ...}, ...}, ...}.
|
||||
phase_data = run_data.setdefault(test_phase, {})
|
||||
amount_data = phase_data.setdefault(amount_md, {})
|
||||
__ = amount_data.setdefault(modulestores, time_taken)
|
||||
|
||||
return all_modulestore_combos, run_data
|
||||
def __init__(self, db_name):
|
||||
# Read data from all modulestore combos.
|
||||
conn = sqlite3.connect(db_name)
|
||||
conn.row_factory = sqlite3.Row
|
||||
sel_sql = 'select id, run_id, block_desc, elapsed, timestamp FROM block_times ORDER BY run_id DESC'
|
||||
cur = conn.cursor()
|
||||
cur.execute(sel_sql)
|
||||
self.all_rows = cur.fetchall()
|
||||
|
||||
|
||||
def generate_html(all_ms_combos, run_data):
|
||||
class ImportExportReportGen(ReportGenerator):
|
||||
"""
|
||||
Generate HTML.
|
||||
Class which generates report for course import/export performance test data.
|
||||
"""
|
||||
def __init__(self, db_name):
|
||||
super(ImportExportReportGen, self).__init__(db_name)
|
||||
self._read_timing_data()
|
||||
|
||||
html = HTMLDocument("Results")
|
||||
def _read_timing_data(self):
|
||||
"""
|
||||
Read in the timing data from the sqlite DB and save into a dict.
|
||||
"""
|
||||
self.run_data = {}
|
||||
|
||||
# Output comparison of each phase to a different table.
|
||||
for phase in run_data.keys():
|
||||
if phase in ('fake_assets',):
|
||||
continue
|
||||
per_phase = run_data[phase]
|
||||
html.add_header(1, phase)
|
||||
self.all_modulestore_combos = set()
|
||||
for row in self.all_rows:
|
||||
time_taken = row[3]
|
||||
|
||||
title_map = {
|
||||
'duration': 'Total Duration (ms)',
|
||||
'ratio': 'Total Duration Per Number of Assets (ms/asset)',
|
||||
'variable_cost': 'Asset Export Duration Per Number of Assets (ms/asset)'
|
||||
}
|
||||
for table_type in ('duration', 'ratio', 'variable_cost'):
|
||||
if phase == 'all' and table_type in ('ratio', 'variable_cost'):
|
||||
# Split apart the description into its parts.
|
||||
desc_parts = row[2].split(':')
|
||||
modulestores = desc_parts[1]
|
||||
self.all_modulestore_combos.add(modulestores)
|
||||
amount_md = desc_parts[2]
|
||||
test_phase = 'all'
|
||||
if len(desc_parts) > 3:
|
||||
test_phase = desc_parts[3]
|
||||
|
||||
# Save the data in a multi-level dict - { phase1: { amount1: {ms1->ms2: duration, ...}, ...}, ...}.
|
||||
phase_data = self.run_data.setdefault(test_phase, {})
|
||||
amount_data = phase_data.setdefault(amount_md, {})
|
||||
__ = amount_data.setdefault(modulestores, time_taken)
|
||||
|
||||
def generate_html(self):
|
||||
"""
|
||||
Generate HTML.
|
||||
"""
|
||||
html = HTMLDocument("Results")
|
||||
|
||||
# Output comparison of each phase to a different table.
|
||||
for phase in self.run_data.keys():
|
||||
if phase in ('fake_assets',):
|
||||
continue
|
||||
per_phase = self.run_data[phase]
|
||||
html.add_header(1, phase)
|
||||
|
||||
title_map = {
|
||||
'duration': 'Total Duration (ms)',
|
||||
'ratio': 'Total Duration Per Number of Assets (ms/asset)',
|
||||
'variable_cost': 'Asset Export Duration Per Number of Assets (ms/asset)'
|
||||
}
|
||||
for table_type in ('duration', 'ratio', 'variable_cost'):
|
||||
if phase == 'all' and table_type in ('ratio', 'variable_cost'):
|
||||
continue
|
||||
# Make the table header columns and the table.
|
||||
columns = ["Asset Metadata Amount", ]
|
||||
ms_keys = sorted(self.all_modulestore_combos)
|
||||
for k in ms_keys:
|
||||
columns.append("{} ({})".format(k, table_type))
|
||||
phase_table = HTMLTable(columns)
|
||||
|
||||
# Make a row for each amount of asset metadata.
|
||||
for amount in sorted(per_phase.keys()):
|
||||
per_amount = per_phase[amount]
|
||||
num_assets = int(amount)
|
||||
row = [amount, ]
|
||||
for modulestore in ms_keys:
|
||||
if table_type == 'duration':
|
||||
value = per_amount[modulestore]
|
||||
elif table_type == 'ratio':
|
||||
if num_assets != 0:
|
||||
value = per_amount[modulestore] / float(amount)
|
||||
else:
|
||||
value = 0
|
||||
elif table_type == 'variable_cost':
|
||||
if num_assets == 0:
|
||||
value = 0
|
||||
else:
|
||||
value = (per_amount[modulestore] - per_phase['0'][modulestore]) / float(amount)
|
||||
row.append("{}".format(value))
|
||||
phase_table.add_row(row)
|
||||
|
||||
# Add the table title and the table.
|
||||
html.add_header(2, title_map[table_type])
|
||||
html.add_to_body(phase_table.table)
|
||||
|
||||
return html
|
||||
|
||||
|
||||
class FindReportGen(ReportGenerator):
|
||||
"""
|
||||
Class which generates report for asset access performance test data.
|
||||
"""
|
||||
def __init__(self, db_name):
|
||||
super(FindReportGen, self).__init__(db_name)
|
||||
self._read_timing_data()
|
||||
|
||||
def _read_timing_data(self):
|
||||
"""
|
||||
Read in the timing data from the sqlite DB and save into a dict.
|
||||
"""
|
||||
self.run_data = {}
|
||||
|
||||
self.all_modulestores = set()
|
||||
for row in self.all_rows:
|
||||
time_taken = row[3]
|
||||
|
||||
# Split apart the description into its parts.
|
||||
desc_parts = row[2].split(':')
|
||||
if desc_parts[0] != 'FindAssetTest':
|
||||
continue
|
||||
modulestore, amount_md = desc_parts[1:3]
|
||||
self.all_modulestores.add(modulestore)
|
||||
test_phase = 'all'
|
||||
sort = None
|
||||
if len(desc_parts) >= 4:
|
||||
test_phase = desc_parts[3]
|
||||
if len(desc_parts) >= 5:
|
||||
sort = desc_parts[4]
|
||||
|
||||
# Save the data in a multi-level dict:
|
||||
# { phase1: { [sort1: {] amount1: { modulestore1: duration, ...}, ...}, ...}.
|
||||
phase_data = self.run_data.setdefault(test_phase, {})
|
||||
if test_phase == 'get_asset_list':
|
||||
# Add a level here for the sort.
|
||||
phase_data = phase_data.setdefault(sort, {})
|
||||
amount_data = phase_data.setdefault(amount_md, {})
|
||||
__ = amount_data.setdefault(modulestore, time_taken)
|
||||
|
||||
def generate_html(self):
|
||||
"""
|
||||
Generate HTML.
|
||||
"""
|
||||
html = HTMLDocument("Results")
|
||||
|
||||
# Output comparison of each phase to a different table.
|
||||
# for store in self.run_data.keys():
|
||||
# per_phase = self.run_data[store]
|
||||
# html.add_header(1, store)
|
||||
|
||||
for phase in self.run_data.keys():
|
||||
per_phase = self.run_data[phase]
|
||||
|
||||
# Make the table header columns and the table.
|
||||
columns = ["Asset Metadata Amount", ]
|
||||
ms_keys = sorted(all_ms_combos)
|
||||
ms_keys = sorted(self.all_modulestores)
|
||||
for k in ms_keys:
|
||||
columns.append("{} ({})".format(k, table_type))
|
||||
columns.append("Time Taken (ms) ({})".format(k))
|
||||
phase_table = HTMLTable(columns)
|
||||
if phase != 'get_asset_list':
|
||||
for amount in sorted(per_phase.keys()):
|
||||
per_amount = per_phase[amount]
|
||||
row = [amount, ]
|
||||
for modulestore in ms_keys:
|
||||
time_taken = per_amount[modulestore]
|
||||
row.append("{}".format(time_taken))
|
||||
phase_table.add_row(row)
|
||||
html.add_header(2, phase)
|
||||
html.add_to_body(phase_table.table)
|
||||
else:
|
||||
# get_asset_list phase includes the sort as well.
|
||||
html.add_header(2, phase)
|
||||
for sort in per_phase.keys():
|
||||
sort_table = HTMLTable(columns)
|
||||
per_sort = per_phase[sort]
|
||||
for amount in sorted(per_sort.keys()):
|
||||
per_amount = per_sort[amount]
|
||||
row = [amount, ]
|
||||
for modulestore in ms_keys:
|
||||
# Each sort has two different ranges retrieved.
|
||||
time_taken = per_amount[modulestore] / 2.0
|
||||
row.append("{}".format(time_taken))
|
||||
sort_table.add_row(row)
|
||||
html.add_header(3, sort)
|
||||
html.add_to_body(sort_table.table)
|
||||
|
||||
# Make a row for each amount of asset metadata.
|
||||
for amount in sorted(per_phase.keys()):
|
||||
per_amount = per_phase[amount]
|
||||
num_assets = int(amount)
|
||||
row = [amount, ]
|
||||
for modulestore in ms_keys:
|
||||
if table_type == 'duration':
|
||||
value = per_amount[modulestore]
|
||||
elif table_type == 'ratio':
|
||||
if num_assets != 0:
|
||||
value = per_amount[modulestore] / float(amount)
|
||||
else:
|
||||
value = 0
|
||||
elif table_type == 'variable_cost':
|
||||
if num_assets == 0:
|
||||
value = 0
|
||||
else:
|
||||
value = (per_amount[modulestore] - per_phase['0'][modulestore]) / float(amount)
|
||||
row.append("{}".format(value))
|
||||
phase_table.add_row(row)
|
||||
|
||||
# Add the table title and the table.
|
||||
html.add_header(2, title_map[table_type])
|
||||
html.add_to_body(phase_table.table)
|
||||
|
||||
return html
|
||||
return html
|
||||
|
||||
|
||||
# @click.command()
|
||||
# @click.argument('outfile', type=click.File('w'), default='-', required=False)
|
||||
# def cli(outfile):
|
||||
# """
|
||||
# Generate an HTML report from the sqlite timing data.
|
||||
# """
|
||||
# all_ms_combos, run_data = read_timing_data()
|
||||
# html = generate_html(all_ms_combos, run_data)
|
||||
# click.echo(html.tostring(), file=outfile)
|
||||
if click is not None:
|
||||
@click.command()
|
||||
@click.argument('outfile', type=click.File('w'), default='-', required=False)
|
||||
@click.option('--db_name', help='Name of sqlite database from which to read data.', default=DB_NAME)
|
||||
@click.option('--data_type', help='Data type to process. One of: "imp_exp" or "find"', default="find")
|
||||
def cli(outfile, db_name, data_type):
|
||||
"""
|
||||
Generate an HTML report from the sqlite timing data.
|
||||
"""
|
||||
if data_type == 'imp_exp':
|
||||
ie_gen = ImportExportReportGen(db_name)
|
||||
html = ie_gen.generate_html()
|
||||
elif data_type == 'find':
|
||||
f_gen = FindReportGen(db_name)
|
||||
html = f_gen.generate_html()
|
||||
click.echo(html.tostring(), file=outfile)
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# cli() # pylint: disable=no-value-for-parameter
|
||||
if __name__ == '__main__':
|
||||
if click is not None:
|
||||
cli() # pylint: disable=no-value-for-parameter
|
||||
else:
|
||||
print "Aborted! Module 'click' is not installed."
|
||||
|
||||
@@ -10,7 +10,9 @@ from shutil import rmtree
|
||||
import ddt
|
||||
#from nose.plugins.attrib import attr
|
||||
|
||||
from nose.plugins.skip import SkipTest
|
||||
from xmodule.assetstore import AssetMetadata
|
||||
from xmodule.modulestore import ModuleStoreEnum
|
||||
from xmodule.modulestore.xml_importer import import_from_xml
|
||||
from xmodule.modulestore.xml_exporter import export_to_xml
|
||||
from xmodule.modulestore.tests.test_cross_modulestore_import_export import (
|
||||
@@ -23,15 +25,10 @@ from xmodule.modulestore.perf_tests.generate_asset_xml import make_asset_xml, va
|
||||
|
||||
# The dependency below needs to be installed manually from the development.txt file, which doesn't
|
||||
# get installed during unit tests!
|
||||
#from code_block_timer import CodeBlockTimer
|
||||
|
||||
|
||||
class CodeBlockTimer(object):
|
||||
"""
|
||||
To fake out the tests below, this class definition is used. Remove it when uncommenting above.
|
||||
"""
|
||||
def __init__(self, desc):
|
||||
pass
|
||||
try:
|
||||
from code_block_timer import CodeBlockTimer
|
||||
except ImportError:
|
||||
CodeBlockTimer = None
|
||||
|
||||
# Number of assets saved in the modulestore per test run.
|
||||
ASSET_AMOUNT_PER_TEST = (1, 10, 100, 1000, 10000)
|
||||
@@ -42,6 +39,13 @@ COURSE_NAME = 'manual-testing-complete'
|
||||
# A list of courses to test - only one.
|
||||
TEST_COURSE = (COURSE_NAME, )
|
||||
|
||||
ALL_SORTS = (
|
||||
('displayname', ModuleStoreEnum.SortOrder.ascending),
|
||||
('displayname', ModuleStoreEnum.SortOrder.descending),
|
||||
('uploadDate', ModuleStoreEnum.SortOrder.ascending),
|
||||
('uploadDate', ModuleStoreEnum.SortOrder.descending),
|
||||
)
|
||||
|
||||
# pylint: disable=invalid-name
|
||||
TEST_DIR = path(__file__).dirname()
|
||||
PLATFORM_ROOT = TEST_DIR.parent.parent.parent.parent.parent.parent
|
||||
@@ -80,10 +84,13 @@ class CrossStoreXMLRoundtrip(unittest.TestCase):
|
||||
ASSET_AMOUNT_PER_TEST
|
||||
))
|
||||
@ddt.unpack
|
||||
def test_generate_timings(self, source_ms, dest_ms, num_assets):
|
||||
def test_generate_import_export_timings(self, source_ms, dest_ms, num_assets):
|
||||
"""
|
||||
Generate timings for different amounts of asset metadata and different modulestores.
|
||||
"""
|
||||
if CodeBlockTimer is None:
|
||||
raise SkipTest("CodeBlockTimer undefined.")
|
||||
|
||||
desc = "XMLRoundTrip:{}->{}:{}".format(
|
||||
SHORT_NAME_MAP[source_ms],
|
||||
SHORT_NAME_MAP[dest_ms],
|
||||
@@ -140,3 +147,89 @@ class CrossStoreXMLRoundtrip(unittest.TestCase):
|
||||
create_course_if_not_present=True,
|
||||
raise_on_failure=True,
|
||||
)
|
||||
|
||||
|
||||
@ddt.ddt
|
||||
# Eventually, exclude this attribute from regular unittests while running *only* tests
|
||||
# with this attribute during regular performance tests.
|
||||
# @attr("perf_test")
|
||||
@unittest.skip
|
||||
class FindAssetTest(unittest.TestCase):
|
||||
"""
|
||||
This class exists to time asset finding in different modulestore
|
||||
classes with different amounts of asset metadata.
|
||||
"""
|
||||
|
||||
# Use this attribute to skip this test on regular unittest CI runs.
|
||||
perf_test = True
|
||||
|
||||
def setUp(self):
|
||||
super(FindAssetTest, self).setUp()
|
||||
self.export_dir = mkdtemp()
|
||||
self.addCleanup(rmtree, self.export_dir, ignore_errors=True)
|
||||
|
||||
@ddt.data(*itertools.product(
|
||||
MODULESTORE_SETUPS,
|
||||
ASSET_AMOUNT_PER_TEST,
|
||||
))
|
||||
@ddt.unpack
|
||||
def test_generate_find_timings(self, source_ms, num_assets):
|
||||
"""
|
||||
Generate timings for different amounts of asset metadata and different modulestores.
|
||||
"""
|
||||
if CodeBlockTimer is None:
|
||||
raise SkipTest("CodeBlockTimer undefined.")
|
||||
|
||||
desc = "FindAssetTest:{}:{}".format(
|
||||
SHORT_NAME_MAP[source_ms],
|
||||
num_assets,
|
||||
)
|
||||
|
||||
with CodeBlockTimer(desc):
|
||||
|
||||
with CodeBlockTimer("fake_assets"):
|
||||
# First, make the fake asset metadata.
|
||||
make_asset_xml(num_assets, ASSET_XML_PATH)
|
||||
validate_xml(ASSET_XSD_PATH, ASSET_XML_PATH)
|
||||
|
||||
# Construct the contentstore for storing the first import
|
||||
with MongoContentstoreBuilder().build() as source_content:
|
||||
# Construct the modulestore for storing the first import (using the previously created contentstore)
|
||||
with source_ms.build(source_content) as source_store:
|
||||
source_course_key = source_store.make_course_key('a', 'course', 'course')
|
||||
asset_key = source_course_key.make_asset_key(
|
||||
AssetMetadata.GENERAL_ASSET_TYPE, 'silly_cat_picture.gif'
|
||||
)
|
||||
|
||||
with CodeBlockTimer("initial_import"):
|
||||
import_from_xml(
|
||||
source_store,
|
||||
'test_user',
|
||||
TEST_DATA_ROOT,
|
||||
course_dirs=TEST_COURSE,
|
||||
static_content_store=source_content,
|
||||
target_course_id=source_course_key,
|
||||
create_course_if_not_present=True,
|
||||
raise_on_failure=True,
|
||||
)
|
||||
|
||||
with CodeBlockTimer("find_nonexistent_asset"):
|
||||
# More correct would be using the AssetManager.find() - but since the test
|
||||
# has created its own test modulestore, the AssetManager can't be used.
|
||||
__ = source_store.find_asset_metadata(asset_key)
|
||||
|
||||
# Perform get_all_asset_metadata for each sort.
|
||||
for sort in ALL_SORTS:
|
||||
with CodeBlockTimer("get_asset_list:{}-{}".format(
|
||||
sort[0],
|
||||
'asc' if sort[1] == ModuleStoreEnum.SortOrder.ascending else 'desc'
|
||||
)):
|
||||
# Grab two ranges of 50 assets using different sorts.
|
||||
# Why 50? That's how many are displayed on the current Studio "Files & Uploads" page.
|
||||
start_middle = num_assets / 2
|
||||
__ = source_store.get_all_asset_metadata(
|
||||
source_course_key, 'asset', start=0, sort=sort, maxresults=50
|
||||
)
|
||||
__ = source_store.get_all_asset_metadata(
|
||||
source_course_key, 'asset', start=start_middle, sort=sort, maxresults=50
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user