Merge pull request #17322 from edx/thallada/assets-case-insensitive-sort

EDUCATOR-2191 Use Mongo aggregate hack to sort case-insensitive
2018-02-05 14:05:32 -05:00
parent be474b5d61 c959ebce01
commit ffbb0c16ef
2 changed files with 61 additions and 13 deletions
--- a/common/lib/xmodule/xmodule/contentstore/mongo.py
+++ b/common/lib/xmodule/xmodule/contentstore/mongo.py
@@ -247,19 +247,65 @@ class MongoContentStore(ContentStore):
            contentType: The mimetype string of the asset
            md5: An md5 hash of the asset content
        '''
-        query = query_for_course(course_key, "asset" if not get_thumbnails else "thumbnail")
-        find_args = {"sort": sort}
-        if maxresults > 0:
-            find_args.update({
-                "skip": start,
-                "limit": maxresults,
-            })
+        # TODO: Using an aggregate() instead of a find() here is a hack to get around the fact that Mongo 3.2 does not
+        # support sorting case-insensitively.
+        # If a sort on displayname is requested, the aggregation pipeline creates a new field:
+        # `insensitive_displayname`, a lowercase version of `displayname` that is sorted on instead.
+        # Mongo 3.4 does not require this hack. When upgraded, change this aggregation back to a find and specifiy
+        # a collation based on user's language locale instead.
+        # See: https://openedx.atlassian.net/browse/EDUCATOR-2221
+        pipeline_stages = []
+        query = query_for_course(course_key, 'asset' if not get_thumbnails else 'thumbnail')
        if filter_params:
            query.update(filter_params)
+        pipeline_stages.append({'$match': query})

-        items = self.fs_files.find(query, **find_args)
-        count = items.count()
-        assets = list(items)
+        if sort:
+            sort = dict(sort)
+            if 'displayname' in sort:
+                pipeline_stages.append({
+                    '$project': {
+                        'contentType': 1,
+                        'locked': 1,
+                        'chunkSize': 1,
+                        'content_son': 1,
+                        'displayname': 1,
+                        'filename': 1,
+                        'length': 1,
+                        'import_path': 1,
+                        'uploadDate': 1,
+                        'thumbnail_location': 1,
+                        'md5': 1,
+                        'insensitive_displayname': {
+                            '$toLower': '$displayname'
+                        }
+                    }
+                })
+                sort = {'insensitive_displayname': sort['displayname']}
+            pipeline_stages.append({'$sort': sort})
+
+        # This is another hack to get the total query result count, but only the Nth page of actual documents
+        # See: https://stackoverflow.com/a/39784851/6620612
+        pipeline_stages.append({'$group': {'_id': None, 'count': {'$sum': 1}, 'results': {'$push': '$$ROOT'}}})
+        if maxresults > 0:
+            pipeline_stages.append({
+                '$project': {
+                    'count': 1,
+                    'results': {
+                        '$slice': ['$results', start, maxresults]
+                    }
+                }
+            })
+
+        items = self.fs_files.aggregate(pipeline_stages)
+        if items['result']:
+            result = items['result'][0]
+            count = result['count']
+            assets = list(result['results'])
+        else:
+            # no results
+            count = 0
+            assets = []

        # We're constructing the asset key immediately after retrieval from the database so that
        # callers are insulated from knowing how our identifiers are stored.
--- a/common/lib/xmodule/xmodule/modulestore/tests/test_mongo.py
+++ b/common/lib/xmodule/xmodule/modulestore/tests/test_mongo.py
@@ -868,9 +868,11 @@ def _build_requested_filter(requested_filter):
        ],
    }
    requested_file_types = all_filters.get(requested_filter, None)
-    where = ["JSON.stringify(this.contentType).toUpperCase() == JSON.stringify('{}').toUpperCase()".format(
-        req_filter) for req_filter in requested_file_types]
    filter_params = {
-        "$where": ' || '.join(where),
+        '$or': [{
+            'contentType': {
+                '$in': requested_file_types,
+            },
+        }]
    }
    return filter_params