Use Mongo aggregate hack to sort case-insensitive

2018-01-26 11:48:57 -05:00
parent f4a5a68568
commit b55aeb4798
1 changed files with 49 additions and 10 deletions
--- a/common/lib/xmodule/xmodule/contentstore/mongo.py
+++ b/common/lib/xmodule/xmodule/contentstore/mongo.py
@@ -247,19 +247,58 @@ class MongoContentStore(ContentStore):
            contentType: The mimetype string of the asset
            md5: An md5 hash of the asset content
        '''
-        query = query_for_course(course_key, "asset" if not get_thumbnails else "thumbnail")
-        find_args = {"sort": sort}
-        if maxresults > 0:
-            find_args.update({
-                "skip": start,
-                "limit": maxresults,
-            })
+        # TODO: Using an aggregate() instead of a find() here is a hack to get around the fact that Mongo 3.2 does not
+        # support sorting case-insensitively.
+        # If a sort on displayname is requested, the aggregation pipeline creates a new field:
+        # `insensitive_displayname`, a lowercase version of `displayname` that is sorted on instead.
+        # Mongo 3.4 does not require this hack. When upgraded, change this aggregation back to a find and specifiy
+        # a collation based on user's language locale instead.
+        pipeline_stages = []
+        query = query_for_course(course_key, 'asset' if not get_thumbnails else 'thumbnail')
        if filter_params:
            query.update(filter_params)
+        pipeline_stages.append({'$match': query})

-        items = self.fs_files.find(query, **find_args)
-        count = items.count()
-        assets = list(items)
+        sort = dict(sort)
+        if 'displayname' in sort:
+            pipeline_stages.append({
+                '$project': {
+                    'contentType': 1,
+                    'locked': 1,
+                    'chunkSize': 1,
+                    'content_son': 1,
+                    'displayname': 1,
+                    'filename': 1,
+                    'length': 1,
+                    'import_path': 1,
+                    'uploadDate': 1,
+                    'thumbnail_location': 1,
+                    'md5': 1,
+                    'insensitive_displayname': {
+                        '$toLower': '$displayname'
+                    }
+                }
+            })
+            sort = {'insensitive_displayname': sort['displayname']}
+        pipeline_stages.append({'$sort': sort})
+
+        # This is another hack to get the total query result count, but only the Nth page of actual documents
+        # See: https://stackoverflow.com/a/39784851/6620612
+        pipeline_stages.append({'$group': {'_id': None, 'count': {'$sum': 1}, 'results': {'$push': '$$ROOT'}}})
+        if maxresults > 0:
+            pipeline_stages.append({
+                '$project': {
+                    'count': 1,
+                    'results': {
+                        '$slice': ['$results', start, maxresults]
+                    }
+                }
+            })
+
+        items = self.fs_files.aggregate(pipeline_stages)
+        result = items['result'][0]
+        count = result['count']
+        assets = list(result['results'])

        # We're constructing the asset key immediately after retrieval from the database so that
        # callers are insulated from knowing how our identifiers are stored.