diff --git a/common/lib/xmodule/xmodule/assetstore/__init__.py b/common/lib/xmodule/xmodule/assetstore/__init__.py index 63b98c7df3..48e3ff935e 100644 --- a/common/lib/xmodule/xmodule/assetstore/__init__.py +++ b/common/lib/xmodule/xmodule/assetstore/__init__.py @@ -5,6 +5,7 @@ Classes representing asset & asset thumbnail metadata. from datetime import datetime import pytz from contracts import contract, new_contract +from bisect import bisect_left, bisect_right from opaque_keys.edx.keys import CourseKey, AssetKey new_contract('AssetKey', AssetKey) @@ -33,8 +34,8 @@ class AssetMetadata(object): # All AssetMetadata objects should have AssetLocators with this type. ASSET_TYPE = 'asset' - @contract(asset_id='AssetKey', basename='basestring | None', internal_name='str | None', locked='bool | None', contenttype='basestring | None', - md5='str | None', curr_version='str | None', prev_version='str | None', edited_by='int | None', edited_on='datetime | None') + @contract(asset_id='AssetKey', basename='basestring|None', internal_name='basestring|None', locked='bool|None', contenttype='basestring|None', + md5='basestring|None', curr_version='basestring|None', prev_version='basestring|None', edited_by='int|None', edited_on='datetime|None') def __init__(self, asset_id, basename=None, internal_name=None, locked=None, contenttype=None, md5=None, @@ -99,12 +100,10 @@ class AssetMetadata(object): 'locked': self.locked, 'contenttype': self.contenttype, 'md5': self.md5, - 'edit_info': { - 'curr_version': self.curr_version, - 'prev_version': self.prev_version, - 'edited_by': self.edited_by, - 'edited_on': self.edited_on - } + 'curr_version': self.curr_version, + 'prev_version': self.prev_version, + 'edited_by': self.edited_by, + 'edited_on': self.edited_on } @contract(asset_doc='dict | None') @@ -121,11 +120,10 @@ class AssetMetadata(object): self.locked = asset_doc['locked'] self.contenttype = asset_doc['contenttype'] self.md5 = asset_doc['md5'] - edit_info = asset_doc['edit_info'] - self.curr_version = edit_info['curr_version'] - self.prev_version = edit_info['prev_version'] - self.edited_by = edit_info['edited_by'] - self.edited_on = edit_info['edited_on'] + self.curr_version = asset_doc['curr_version'] + self.prev_version = asset_doc['prev_version'] + self.edited_by = asset_doc['edited_by'] + self.edited_on = asset_doc['edited_on'] class AssetThumbnailMetadata(object): diff --git a/common/lib/xmodule/xmodule/modulestore/__init__.py b/common/lib/xmodule/xmodule/modulestore/__init__.py index a436bd34c0..64d5c10c01 100644 --- a/common/lib/xmodule/xmodule/modulestore/__init__.py +++ b/common/lib/xmodule/xmodule/modulestore/__init__.py @@ -14,6 +14,8 @@ import collections from contextlib import contextmanager import functools import threading +from operator import itemgetter +from sortedcontainers import SortedListWithKey from abc import ABCMeta, abstractmethod from contracts import contract, new_contract @@ -292,19 +294,23 @@ class ModuleStoreAssetInterface(object): if course_assets is None: return None, None - if get_thumbnail: - all_assets = course_assets['thumbnails'] - else: - all_assets = course_assets['assets'] + info = 'thumbnails' if get_thumbnail else 'assets' + all_assets = SortedListWithKey([], key=itemgetter('filename')) + # Assets should be pre-sorted, so add them efficiently without sorting. + # extend() will raise a ValueError if the passed-in list is not sorted. + all_assets.extend(course_assets.get(info, [])) # See if this asset already exists by checking the external_filename. # Studio doesn't currently support using multiple course assets with the same filename. # So use the filename as the unique identifier. - for idx, asset in enumerate(all_assets): - if asset['filename'] == filename: - return course_assets, idx + idx = None + idx_left = all_assets.bisect_left({'filename': filename}) + idx_right = all_assets.bisect_right({'filename': filename}) + if idx_left != idx_right: + # Asset was found in the list. + idx = idx_left - return course_assets, None + return course_assets, idx @contract(asset_key='AssetKey') def _find_asset_info(self, asset_key, thumbnail=False, **kwargs): @@ -358,14 +364,14 @@ class ModuleStoreAssetInterface(object): """ return self._find_asset_info(asset_key, thumbnail=True, **kwargs) - @contract(course_key='CourseKey', start='int | None', maxresults='int | None', sort='list | None', get_thumbnails='bool') + @contract(course_key='CourseKey', start='int|None', maxresults='int|None', sort='tuple(str,str)|None', get_thumbnails='bool') def _get_all_asset_metadata(self, course_key, start=0, maxresults=-1, sort=None, get_thumbnails=False, **kwargs): """ Returns a list of static asset (or thumbnail) metadata for a course. Args: course_key (CourseKey): course identifier - start (int): optional - start at this asset number + start (int): optional - start at this asset number. Zero-based! maxresults (int): optional - return at most this many, -1 means no limit sort (array): optional - None means no sort (sort_by (str), sort_order (str)) @@ -382,35 +388,60 @@ class ModuleStoreAssetInterface(object): # to distinguish zero assets from "not able to retrieve assets". return None - if get_thumbnails: - all_assets = course_assets.get('thumbnails', []) - else: - all_assets = course_assets.get('assets', []) + # Determine the proper sort - with defaults of ('displayname', 'ascending'). + sort_field = 'filename' + sort_order = 'ascending' + if sort: + if sort[0] == 'uploadDate': + sort_field = 'edited_on' + if sort[1] == 'descending': + sort_order = 'descending' - # DO_NEXT: Add start/maxresults/sort functionality as part of https://openedx.atlassian.net/browse/PLAT-74 - if start and maxresults and sort: - pass + info = 'thumbnails' if get_thumbnails else 'assets' + all_assets = SortedListWithKey(course_assets.get(info, []), key=itemgetter(sort_field)) + num_assets = len(all_assets) + + start_idx = start + end_idx = min(num_assets, start + maxresults) + if maxresults < 0: + # No limit on the results. + end_idx = num_assets + + step_incr = 1 + if sort_order == 'descending': + # Flip the indices and iterate backwards. + step_incr = -1 + start_idx = (num_assets - 1) - start_idx + end_idx = (num_assets - 1) - end_idx ret_assets = [] - for asset in all_assets: + for idx in range(start_idx, end_idx, step_incr): + asset = all_assets[idx] if get_thumbnails: thumb = AssetThumbnailMetadata( course_key.make_asset_key('thumbnail', asset['filename']), - internal_name=asset['filename'], **kwargs + internal_name=asset['filename'], + **kwargs ) ret_assets.append(thumb) else: - asset = AssetMetadata( + new_asset = AssetMetadata( course_key.make_asset_key('asset', asset['filename']), basename=asset['filename'], - edited_on=asset['edit_info']['edited_on'], + internal_name=asset['internal_name'], + locked=asset['locked'], contenttype=asset['contenttype'], - md5=str(asset['md5']), **kwargs + md5=asset['md5'], + curr_version=asset['curr_version'], + prev_version=asset['prev_version'], + edited_on=asset['edited_on'], + edited_by=asset['edited_by'], + **kwargs ) - ret_assets.append(asset) + ret_assets.append(new_asset) return ret_assets - @contract(course_key='CourseKey', start='int | None', maxresults='int | None', sort='list | None') + @contract(course_key='CourseKey', start='int|None', maxresults='int|None', sort='tuple(str,str)|None') def get_all_asset_metadata(self, course_key, start=0, maxresults=-1, sort=None, **kwargs): """ Returns a list of static assets for a course. diff --git a/common/lib/xmodule/xmodule/modulestore/mixed.py b/common/lib/xmodule/xmodule/modulestore/mixed.py index 652d09a52c..c7c71fc8fb 100644 --- a/common/lib/xmodule/xmodule/modulestore/mixed.py +++ b/common/lib/xmodule/xmodule/modulestore/mixed.py @@ -370,7 +370,7 @@ class MixedModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase): return store.find_asset_thumbnail_metadata(asset_key, **kwargs) @strip_key - @contract(course_key='CourseKey', start=int, maxresults=int, sort='list | None') + @contract(course_key='CourseKey', start=int, maxresults=int, sort='tuple|None') def get_all_asset_metadata(self, course_key, start=0, maxresults=-1, sort=None, **kwargs): """ Returns a list of static assets for a course. diff --git a/common/lib/xmodule/xmodule/modulestore/mongo/base.py b/common/lib/xmodule/xmodule/modulestore/mongo/base.py index 6adf74b150..02b33e3fd3 100644 --- a/common/lib/xmodule/xmodule/modulestore/mongo/base.py +++ b/common/lib/xmodule/xmodule/modulestore/mongo/base.py @@ -25,6 +25,8 @@ from path import path from datetime import datetime from pytz import UTC from contracts import contract, new_contract +from operator import itemgetter +from sortedcontainers import SortedListWithKey from importlib import import_module from xmodule.errortracker import null_error_tracker, exc_info_to_str @@ -1493,7 +1495,10 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo course_assets, asset_idx = self._find_course_asset(course_key, asset_metadata.asset_id.path, thumbnail) info = 'thumbnails' if thumbnail else 'assets' - all_assets = course_assets[info] + all_assets = SortedListWithKey([], key=itemgetter('filename')) + # Assets should be pre-sorted, so add them efficiently without sorting. + # extend() will raise a ValueError if the passed-in list is not sorted. + all_assets.extend(course_assets[info]) # Set the edited information for assets only - not thumbnails. if not thumbnail: @@ -1502,15 +1507,15 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo # Translate metadata to Mongo format. metadata_to_insert = asset_metadata.to_mongo() if asset_idx is None: - # Append new metadata. - # Future optimization: Insert in order & binary search to retrieve. - all_assets.append(metadata_to_insert) + # Add new metadata sorted into the list. + all_assets.add(metadata_to_insert) else: # Replace existing metadata. - all_assets[asset_idx] = metadata_to_insert + all_assets.pop(asset_idx) + all_assets.insert(asset_idx, metadata_to_insert) # Update the document. - self.asset_collection.update({'_id': course_assets['_id']}, {'$set': {info: all_assets}}) + self.asset_collection.update({'_id': course_assets['_id']}, {'$set': {info: all_assets.as_list()}}) return True @contract(asset_key='AssetKey', attr_dict=dict) diff --git a/common/lib/xmodule/xmodule/modulestore/tests/test_assetstore.py b/common/lib/xmodule/xmodule/modulestore/tests/test_assetstore.py index 1b0d83c63a..40498490e2 100644 --- a/common/lib/xmodule/xmodule/modulestore/tests/test_assetstore.py +++ b/common/lib/xmodule/xmodule/modulestore/tests/test_assetstore.py @@ -6,6 +6,7 @@ from datetime import datetime, timedelta import pytz import unittest import ddt +from time import sleep from xmodule.assetstore import AssetMetadata, AssetThumbnailMetadata from xmodule.modulestore import ModuleStoreEnum @@ -79,12 +80,23 @@ class TestMongoAssetMetadataStorage(unittest.TestCase): asset6_vals = ('asset.txt', 'JJJCCC747858', '/dev/null', False, ModuleStoreEnum.UserID.test * 4, datetime.now(pytz.utc), '50', '49') asset6 = dict(zip(asset_fields[1:], asset6_vals[1:])) + # More assets. + asset7_vals = ('roman_history.pdf', 'JASDUNSADK', 'texts/italy', True, ModuleStoreEnum.UserID.test * 7, datetime.now(pytz.utc), '1.1', '1.01') + asset8_vals = ('weather_patterns.bmp', '928SJXX2EB', 'science', False, ModuleStoreEnum.UserID.test * 8, datetime.now(pytz.utc), '52', '51') + asset9_vals = ('demo.swf', 'DFDFGGGG14', 'demos/easy', False, ModuleStoreEnum.UserID.test * 9, datetime.now(pytz.utc), '5', '4') + asset7 = dict(zip(asset_fields[1:], asset7_vals[1:])) + asset8 = dict(zip(asset_fields[1:], asset8_vals[1:])) + asset9 = dict(zip(asset_fields[1:], asset9_vals[1:])) + asset1_key = course1_key.make_asset_key('asset', asset1_vals[0]) asset2_key = course1_key.make_asset_key('asset', asset2_vals[0]) asset3_key = course2_key.make_asset_key('asset', asset3_vals[0]) asset4_key = course2_key.make_asset_key('asset', asset4_vals[0]) asset5_key = course2_key.make_asset_key('asset', asset5_vals[0]) asset6_key = course2_key.make_asset_key('asset', asset6_vals[0]) + asset7_key = course2_key.make_asset_key('asset', asset7_vals[0]) + asset8_key = course2_key.make_asset_key('asset', asset8_vals[0]) + asset9_key = course2_key.make_asset_key('asset', asset9_vals[0]) asset1_md = AssetMetadata(asset1_key, **asset1) asset2_md = AssetMetadata(asset2_key, **asset2) @@ -92,13 +104,26 @@ class TestMongoAssetMetadataStorage(unittest.TestCase): asset4_md = AssetMetadata(asset4_key, **asset4) asset5_md = AssetMetadata(asset5_key, **non_existent_asset) asset6_md = AssetMetadata(asset6_key, **asset6) + asset7_md = AssetMetadata(asset7_key, **asset7) + asset8_md = AssetMetadata(asset8_key, **asset8) + asset9_md = AssetMetadata(asset9_key, **asset9) if store is not None: + # Sleeps are to ensure that edited_on order is correct. store.save_asset_metadata(course1_key, asset1_md, ModuleStoreEnum.UserID.test) - store.save_asset_metadata(course1_key, asset2_md, ModuleStoreEnum.UserID.test) - store.save_asset_metadata(course2_key, asset3_md, ModuleStoreEnum.UserID.test) - store.save_asset_metadata(course2_key, asset4_md, ModuleStoreEnum.UserID.test) + sleep(0.0001) + store.save_asset_metadata(course1_key, asset2_md, ModuleStoreEnum.UserID.test * 2) + sleep(0.0001) + store.save_asset_metadata(course2_key, asset3_md, ModuleStoreEnum.UserID.test * 3) + sleep(0.0001) + store.save_asset_metadata(course2_key, asset4_md, ModuleStoreEnum.UserID.test * 4) + sleep(0.0001) # 5 & 6 are not saved on purpose! + store.save_asset_metadata(course2_key, asset7_md, ModuleStoreEnum.UserID.test * 7) + sleep(0.0001) + store.save_asset_metadata(course2_key, asset8_md, ModuleStoreEnum.UserID.test * 8) + sleep(0.0001) + store.save_asset_metadata(course2_key, asset9_md, ModuleStoreEnum.UserID.test * 9) return (asset1_md, asset2_md, asset3_md, asset4_md, asset5_md, asset6_md) @@ -387,8 +412,67 @@ class TestMongoAssetMetadataStorage(unittest.TestCase): store.delete_all_asset_metadata(course.id, ModuleStoreEnum.UserID.test) self.assertEquals(len(store.get_all_asset_thumbnail_metadata(course.id)), 0) - def test_get_all_assets_with_paging(self): - pass + @ddt.data(*MODULESTORE_SETUPS) + def test_get_all_assets_with_paging(self, storebuilder): + """ + Save multiple metadata in each store and retrieve it singularly, as all assets, and after deleting all. + """ + # Temporarily only perform this test for Old Mongo - not Split. + if not isinstance(storebuilder, MongoModulestoreBuilder): + raise unittest.SkipTest + with MongoContentstoreBuilder().build() as contentstore: + with storebuilder.build(contentstore) as store: + course1 = CourseFactory.create(modulestore=store) + course2 = CourseFactory.create(modulestore=store) + self.setup_assets(course1.id, course2.id, store) + + expected_sorts_by_2 = ( + ( + ('displayname', 'ascending'), + ('code.tgz', 'demo.swf', 'dog.png', 'roman_history.pdf', 'weather_patterns.bmp'), + (2, 2, 1) + ), + ( + ('displayname', 'descending'), + ('weather_patterns.bmp', 'roman_history.pdf', 'dog.png', 'demo.swf', 'code.tgz'), + (2, 2, 1) + ), + ( + ('uploadDate', 'ascending'), + ('code.tgz', 'dog.png', 'roman_history.pdf', 'weather_patterns.bmp', 'demo.swf'), + (2, 2, 1) + ), + ( + ('uploadDate', 'descending'), + ('demo.swf', 'weather_patterns.bmp', 'roman_history.pdf', 'dog.png', 'code.tgz'), + (2, 2, 1) + ), + ) + # First, with paging across all sorts. + for sort_test in expected_sorts_by_2: + for i in xrange(3): + asset_page = store.get_all_asset_metadata(course2.id, start=2 * i, maxresults=2, sort=sort_test[0]) + self.assertEquals(len(asset_page), sort_test[2][i]) + self.assertEquals(asset_page[0].asset_id.path, sort_test[1][2 * i]) + if sort_test[2][i] == 2: + self.assertEquals(asset_page[1].asset_id.path, sort_test[1][(2 * i) + 1]) + + # Now fetch everything. + asset_page = store.get_all_asset_metadata(course2.id, start=0, sort=('displayname', 'ascending')) + self.assertEquals(len(asset_page), 5) + self.assertEquals(asset_page[0].asset_id.path, 'code.tgz') + self.assertEquals(asset_page[1].asset_id.path, 'demo.swf') + self.assertEquals(asset_page[2].asset_id.path, 'dog.png') + self.assertEquals(asset_page[3].asset_id.path, 'roman_history.pdf') + self.assertEquals(asset_page[4].asset_id.path, 'weather_patterns.bmp') + + # Some odd conditions. + asset_page = store.get_all_asset_metadata(course2.id, start=100, sort=('displayname', 'ascending')) + self.assertEquals(len(asset_page), 0) + asset_page = store.get_all_asset_metadata(course2.id, start=3, maxresults=0, sort=('displayname', 'ascending')) + self.assertEquals(len(asset_page), 0) + asset_page = store.get_all_asset_metadata(course2.id, start=3, maxresults=-12345, sort=('displayname', 'descending')) + self.assertEquals(len(asset_page), 2) def test_copy_all_assets(self): pass diff --git a/docs/shared/requirements.txt b/docs/shared/requirements.txt index 860e55356d..330a7a1a21 100644 --- a/docs/shared/requirements.txt +++ b/docs/shared/requirements.txt @@ -56,6 +56,7 @@ PyYAML==3.10 requests==2.3.0 Shapely==1.2.16 sorl-thumbnail==11.12 +sortedcontainers==0.9.2 South==0.7.6 sympy==0.7.1 xmltodict==0.4.1