From b55aeb47981cbee072c6e2e22e0fde6014febf41 Mon Sep 17 00:00:00 2001 From: Tyler Hallada Date: Fri, 26 Jan 2018 11:48:57 -0500 Subject: [PATCH 1/5] Use Mongo aggregate hack to sort case-insensitive --- .../lib/xmodule/xmodule/contentstore/mongo.py | 59 +++++++++++++++---- 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/common/lib/xmodule/xmodule/contentstore/mongo.py b/common/lib/xmodule/xmodule/contentstore/mongo.py index 8482de5e27..393fb3ca76 100644 --- a/common/lib/xmodule/xmodule/contentstore/mongo.py +++ b/common/lib/xmodule/xmodule/contentstore/mongo.py @@ -247,19 +247,58 @@ class MongoContentStore(ContentStore): contentType: The mimetype string of the asset md5: An md5 hash of the asset content ''' - query = query_for_course(course_key, "asset" if not get_thumbnails else "thumbnail") - find_args = {"sort": sort} - if maxresults > 0: - find_args.update({ - "skip": start, - "limit": maxresults, - }) + # TODO: Using an aggregate() instead of a find() here is a hack to get around the fact that Mongo 3.2 does not + # support sorting case-insensitively. + # If a sort on displayname is requested, the aggregation pipeline creates a new field: + # `insensitive_displayname`, a lowercase version of `displayname` that is sorted on instead. + # Mongo 3.4 does not require this hack. When upgraded, change this aggregation back to a find and specifiy + # a collation based on user's language locale instead. + pipeline_stages = [] + query = query_for_course(course_key, 'asset' if not get_thumbnails else 'thumbnail') if filter_params: query.update(filter_params) + pipeline_stages.append({'$match': query}) - items = self.fs_files.find(query, **find_args) - count = items.count() - assets = list(items) + sort = dict(sort) + if 'displayname' in sort: + pipeline_stages.append({ + '$project': { + 'contentType': 1, + 'locked': 1, + 'chunkSize': 1, + 'content_son': 1, + 'displayname': 1, + 'filename': 1, + 'length': 1, + 'import_path': 1, + 'uploadDate': 1, + 'thumbnail_location': 1, + 'md5': 1, + 'insensitive_displayname': { + '$toLower': '$displayname' + } + } + }) + sort = {'insensitive_displayname': sort['displayname']} + pipeline_stages.append({'$sort': sort}) + + # This is another hack to get the total query result count, but only the Nth page of actual documents + # See: https://stackoverflow.com/a/39784851/6620612 + pipeline_stages.append({'$group': {'_id': None, 'count': {'$sum': 1}, 'results': {'$push': '$$ROOT'}}}) + if maxresults > 0: + pipeline_stages.append({ + '$project': { + 'count': 1, + 'results': { + '$slice': ['$results', start, maxresults] + } + } + }) + + items = self.fs_files.aggregate(pipeline_stages) + result = items['result'][0] + count = result['count'] + assets = list(result['results']) # We're constructing the asset key immediately after retrieval from the database so that # callers are insulated from knowing how our identifiers are stored. From 1710021c0de98743c26cafe0fbd7a1b03d286620 Mon Sep 17 00:00:00 2001 From: Tyler Hallada Date: Fri, 26 Jan 2018 14:57:26 -0500 Subject: [PATCH 2/5] Don't sort when sort is None --- .../lib/xmodule/xmodule/contentstore/mongo.py | 43 ++++++++++--------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/common/lib/xmodule/xmodule/contentstore/mongo.py b/common/lib/xmodule/xmodule/contentstore/mongo.py index 393fb3ca76..0f2274ed03 100644 --- a/common/lib/xmodule/xmodule/contentstore/mongo.py +++ b/common/lib/xmodule/xmodule/contentstore/mongo.py @@ -259,28 +259,29 @@ class MongoContentStore(ContentStore): query.update(filter_params) pipeline_stages.append({'$match': query}) - sort = dict(sort) - if 'displayname' in sort: - pipeline_stages.append({ - '$project': { - 'contentType': 1, - 'locked': 1, - 'chunkSize': 1, - 'content_son': 1, - 'displayname': 1, - 'filename': 1, - 'length': 1, - 'import_path': 1, - 'uploadDate': 1, - 'thumbnail_location': 1, - 'md5': 1, - 'insensitive_displayname': { - '$toLower': '$displayname' + if sort: + sort = dict(sort) + if 'displayname' in sort: + pipeline_stages.append({ + '$project': { + 'contentType': 1, + 'locked': 1, + 'chunkSize': 1, + 'content_son': 1, + 'displayname': 1, + 'filename': 1, + 'length': 1, + 'import_path': 1, + 'uploadDate': 1, + 'thumbnail_location': 1, + 'md5': 1, + 'insensitive_displayname': { + '$toLower': '$displayname' + } } - } - }) - sort = {'insensitive_displayname': sort['displayname']} - pipeline_stages.append({'$sort': sort}) + }) + sort = {'insensitive_displayname': sort['displayname']} + pipeline_stages.append({'$sort': sort}) # This is another hack to get the total query result count, but only the Nth page of actual documents # See: https://stackoverflow.com/a/39784851/6620612 From df5671927fb3ab924179a69b1afa4fef9563b27c Mon Sep 17 00:00:00 2001 From: Tyler Hallada Date: Fri, 26 Jan 2018 15:49:50 -0500 Subject: [PATCH 3/5] Handle no assets results case properly --- common/lib/xmodule/xmodule/contentstore/mongo.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/common/lib/xmodule/xmodule/contentstore/mongo.py b/common/lib/xmodule/xmodule/contentstore/mongo.py index 0f2274ed03..019f506f79 100644 --- a/common/lib/xmodule/xmodule/contentstore/mongo.py +++ b/common/lib/xmodule/xmodule/contentstore/mongo.py @@ -297,9 +297,14 @@ class MongoContentStore(ContentStore): }) items = self.fs_files.aggregate(pipeline_stages) - result = items['result'][0] - count = result['count'] - assets = list(result['results']) + if items['result']: + result = items['result'][0] + count = result['count'] + assets = list(result['results']) + else: + # no results + count = 0 + assets = [] # We're constructing the asset key immediately after retrieval from the database so that # callers are insulated from knowing how our identifiers are stored. From 45f9b88298aea43402fb2c2ae0d5f6b2c50e8053 Mon Sep 17 00:00:00 2001 From: Tyler Hallada Date: Mon, 29 Jan 2018 13:24:05 -0500 Subject: [PATCH 4/5] Fix modulestore mongo tests --- .../lib/xmodule/xmodule/modulestore/tests/test_mongo.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/common/lib/xmodule/xmodule/modulestore/tests/test_mongo.py b/common/lib/xmodule/xmodule/modulestore/tests/test_mongo.py index 8840633c13..e9a2d40cdf 100644 --- a/common/lib/xmodule/xmodule/modulestore/tests/test_mongo.py +++ b/common/lib/xmodule/xmodule/modulestore/tests/test_mongo.py @@ -846,9 +846,11 @@ def _build_requested_filter(requested_filter): ], } requested_file_types = all_filters.get(requested_filter, None) - where = ["JSON.stringify(this.contentType).toUpperCase() == JSON.stringify('{}').toUpperCase()".format( - req_filter) for req_filter in requested_file_types] filter_params = { - "$where": ' || '.join(where), + '$or': [{ + 'contentType': { + '$in': requested_file_types, + }, + }] } return filter_params From c959ebce01892113924a7cfe874874a69ebea816 Mon Sep 17 00:00:00 2001 From: Tyler Hallada Date: Mon, 5 Feb 2018 11:27:10 -0500 Subject: [PATCH 5/5] Add Jira ticket number to TODO comment --- common/lib/xmodule/xmodule/contentstore/mongo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/common/lib/xmodule/xmodule/contentstore/mongo.py b/common/lib/xmodule/xmodule/contentstore/mongo.py index 019f506f79..a3e1f2110b 100644 --- a/common/lib/xmodule/xmodule/contentstore/mongo.py +++ b/common/lib/xmodule/xmodule/contentstore/mongo.py @@ -253,6 +253,7 @@ class MongoContentStore(ContentStore): # `insensitive_displayname`, a lowercase version of `displayname` that is sorted on instead. # Mongo 3.4 does not require this hack. When upgraded, change this aggregation back to a find and specifiy # a collation based on user's language locale instead. + # See: https://openedx.atlassian.net/browse/EDUCATOR-2221 pipeline_stages = [] query = query_for_course(course_key, 'asset' if not get_thumbnails else 'thumbnail') if filter_params: