Use Mongo aggregate hack to sort case-insensitive

This commit is contained in:
Tyler Hallada
2018-01-26 11:48:57 -05:00
parent f4a5a68568
commit b55aeb4798

View File

@@ -247,19 +247,58 @@ class MongoContentStore(ContentStore):
contentType: The mimetype string of the asset
md5: An md5 hash of the asset content
'''
query = query_for_course(course_key, "asset" if not get_thumbnails else "thumbnail")
find_args = {"sort": sort}
if maxresults > 0:
find_args.update({
"skip": start,
"limit": maxresults,
})
# TODO: Using an aggregate() instead of a find() here is a hack to get around the fact that Mongo 3.2 does not
# support sorting case-insensitively.
# If a sort on displayname is requested, the aggregation pipeline creates a new field:
# `insensitive_displayname`, a lowercase version of `displayname` that is sorted on instead.
# Mongo 3.4 does not require this hack. When upgraded, change this aggregation back to a find and specifiy
# a collation based on user's language locale instead.
pipeline_stages = []
query = query_for_course(course_key, 'asset' if not get_thumbnails else 'thumbnail')
if filter_params:
query.update(filter_params)
pipeline_stages.append({'$match': query})
items = self.fs_files.find(query, **find_args)
count = items.count()
assets = list(items)
sort = dict(sort)
if 'displayname' in sort:
pipeline_stages.append({
'$project': {
'contentType': 1,
'locked': 1,
'chunkSize': 1,
'content_son': 1,
'displayname': 1,
'filename': 1,
'length': 1,
'import_path': 1,
'uploadDate': 1,
'thumbnail_location': 1,
'md5': 1,
'insensitive_displayname': {
'$toLower': '$displayname'
}
}
})
sort = {'insensitive_displayname': sort['displayname']}
pipeline_stages.append({'$sort': sort})
# This is another hack to get the total query result count, but only the Nth page of actual documents
# See: https://stackoverflow.com/a/39784851/6620612
pipeline_stages.append({'$group': {'_id': None, 'count': {'$sum': 1}, 'results': {'$push': '$$ROOT'}}})
if maxresults > 0:
pipeline_stages.append({
'$project': {
'count': 1,
'results': {
'$slice': ['$results', start, maxresults]
}
}
})
items = self.fs_files.aggregate(pipeline_stages)
result = items['result'][0]
count = result['count']
assets = list(result['results'])
# We're constructing the asset key immediately after retrieval from the database so that
# callers are insulated from knowing how our identifiers are stored.