feat!: upgrade pymongo (#34675)

This commit is contained in:
Muhammad Umar Khan
2024-07-25 16:03:06 +05:00
committed by GitHub
parent 03a8f5daf7
commit cbd4904e1b
15 changed files with 248 additions and 83 deletions

View File

@@ -137,6 +137,8 @@ compile-requirements: pre-requirements $(COMMON_CONSTRAINTS_TXT) ## Re-compile *
mv requirements/common_constraints.tmp requirements/common_constraints.txt
sed 's/Django<4.0//g' requirements/common_constraints.txt > requirements/common_constraints.tmp
mv requirements/common_constraints.tmp requirements/common_constraints.txt
sed 's/event-tracking<2.4.1//g' requirements/common_constraints.txt > requirements/common_constraints.tmp
mv requirements/common_constraints.tmp requirements/common_constraints.txt
pip-compile -v --allow-unsafe ${COMPILE_OPTS} -o requirements/pip.txt requirements/pip.in
pip install -r requirements/pip.txt

View File

@@ -40,4 +40,4 @@ importlib-metadata<7
# We will pin event-tracking to do not break existing installations
# This can be unpinned once https://github.com/openedx/edx-platform/issues/34586
# has been resolved and edx-platform is running with pymongo>=4.4.0
event-tracking<2.4.1

View File

@@ -32,9 +32,13 @@ django-oauth-toolkit==1.7.1
# incremental upgrade
django-simple-history==3.4.0
# constrained in opaque_keys. migration guide here: https://pymongo.readthedocs.io/en/4.0/migrate-to-pymongo4.html
# Major upgrade will be done in separate ticket.
pymongo<4.0.0
# Adding pin to avoid any major upgrade
pymongo<4.4.1
# To override the constraint of edx-lint
# This can be removed once https://github.com/openedx/edx-platform/issues/34586 is resolved
# and the upstream constraint in edx-lint has been removed.
event-tracking==3.0.0
# greater version has breaking changes and requires some migration steps.
django-webpack-loader==0.7.0
@@ -125,4 +129,4 @@ numpy<2.0.0
# django-storages==1.14.4 breaks course imports
# Two lines were added in 1.14.4 that make file_exists_in_storage function always return False,
# as the default value of AWS_S3_FILE_OVERWRITE is True
django-storages<1.14.4
django-storages<1.14.4

View File

@@ -376,6 +376,10 @@ djangorestframework==3.14.0
# super-csv
djangorestframework-xml==2.0.0
# via edx-enterprise
dnspython==2.6.1
# via
# -r requirements/edx/paver.txt
# pymongo
done-xblock==2.3.0
# via -r requirements/edx/bundled.in
drf-jwt==1.19.2
@@ -536,9 +540,9 @@ enmerkar==0.7.1
# via enmerkar-underscore
enmerkar-underscore==2.3.0
# via -r requirements/edx/kernel.in
event-tracking==2.4.0
event-tracking==3.0.0
# via
# -c requirements/edx/../common_constraints.txt
# -c requirements/edx/../constraints.txt
# -r requirements/edx/kernel.in
# edx-completion
# edx-proctoring
@@ -865,7 +869,7 @@ pylti1p3==2.0.0
# via -r requirements/edx/kernel.in
pymemcache==4.0.0
# via -r requirements/edx/paver.txt
pymongo==3.13.0
pymongo==4.4.0
# via
# -c requirements/edx/../constraints.txt
# -r requirements/edx/kernel.in

View File

@@ -614,8 +614,10 @@ djangorestframework-xml==2.0.0
# edx-enterprise
dnspython==2.6.1
# via
# -r requirements/edx/doc.txt
# -r requirements/edx/testing.txt
# email-validator
# pymongo
docutils==0.21.2
# via
# -r requirements/edx/doc.txt
@@ -853,9 +855,9 @@ enmerkar-underscore==2.3.0
# via
# -r requirements/edx/doc.txt
# -r requirements/edx/testing.txt
event-tracking==2.4.0
event-tracking==3.0.0
# via
# -c requirements/edx/../common_constraints.txt
# -c requirements/edx/../constraints.txt
# -r requirements/edx/doc.txt
# -r requirements/edx/testing.txt
# edx-completion
@@ -1535,7 +1537,7 @@ pymemcache==4.0.0
# via
# -r requirements/edx/doc.txt
# -r requirements/edx/testing.txt
pymongo==3.13.0
pymongo==4.4.0
# via
# -c requirements/edx/../constraints.txt
# -r requirements/edx/doc.txt

View File

@@ -440,6 +440,10 @@ djangorestframework-xml==2.0.0
# via
# -r requirements/edx/base.txt
# edx-enterprise
dnspython==2.6.1
# via
# -r requirements/edx/base.txt
# pymongo
docutils==0.21.2
# via
# pydata-sphinx-theme
@@ -614,9 +618,9 @@ enmerkar==0.7.1
# enmerkar-underscore
enmerkar-underscore==2.3.0
# via -r requirements/edx/base.txt
event-tracking==2.4.0
event-tracking==3.0.0
# via
# -c requirements/edx/../common_constraints.txt
# -c requirements/edx/../constraints.txt
# -r requirements/edx/base.txt
# edx-completion
# edx-proctoring
@@ -1026,7 +1030,7 @@ pylti1p3==2.0.0
# via -r requirements/edx/base.txt
pymemcache==4.0.0
# via -r requirements/edx/base.txt
pymongo==3.13.0
pymongo==4.4.0
# via
# -c requirements/edx/../constraints.txt
# -r requirements/edx/base.txt

View File

@@ -10,6 +10,8 @@ charset-normalizer==2.0.12
# via
# -c requirements/edx/../constraints.txt
# requests
dnspython==2.6.1
# via pymongo
edx-opaque-keys==2.10.0
# via -r requirements/edx/paver.in
idna==3.7
@@ -36,7 +38,7 @@ psutil==6.0.0
# via -r requirements/edx/paver.in
pymemcache==4.0.0
# via -r requirements/edx/paver.in
pymongo==3.13.0
pymongo==4.4.0
# via
# -c requirements/edx/../constraints.txt
# -r requirements/edx/paver.in

View File

@@ -476,7 +476,10 @@ djangorestframework-xml==2.0.0
# -r requirements/edx/base.txt
# edx-enterprise
dnspython==2.6.1
# via email-validator
# via
# -r requirements/edx/base.txt
# email-validator
# pymongo
done-xblock==2.3.0
# via -r requirements/edx/base.txt
drf-jwt==1.19.2
@@ -650,9 +653,9 @@ enmerkar==0.7.1
# enmerkar-underscore
enmerkar-underscore==2.3.0
# via -r requirements/edx/base.txt
event-tracking==2.4.0
event-tracking==3.0.0
# via
# -c requirements/edx/../common_constraints.txt
# -c requirements/edx/../constraints.txt
# -r requirements/edx/base.txt
# edx-completion
# edx-proctoring
@@ -1141,7 +1144,7 @@ pylti1p3==2.0.0
# via -r requirements/edx/base.txt
pymemcache==4.0.0
# via -r requirements/edx/base.txt
pymongo==3.13.0
pymongo==4.4.0
# via
# -c requirements/edx/../constraints.txt
# -r requirements/edx/base.txt

View File

@@ -11,11 +11,13 @@ click==8.1.6
# click-log
click-log==0.4.0
# via -r scripts/structures_pruning/requirements/base.in
dnspython==2.6.1
# via pymongo
edx-opaque-keys==2.10.0
# via -r scripts/structures_pruning/requirements/base.in
pbr==6.0.0
# via stevedore
pymongo==3.13.0
pymongo==4.4.0
# via
# -c scripts/structures_pruning/requirements/../../../requirements/constraints.txt
# -r scripts/structures_pruning/requirements/base.in

View File

@@ -12,6 +12,10 @@ click-log==0.4.0
# via -r scripts/structures_pruning/requirements/base.txt
ddt==1.7.2
# via -r scripts/structures_pruning/requirements/testing.in
dnspython==2.6.1
# via
# -r scripts/structures_pruning/requirements/base.txt
# pymongo
edx-opaque-keys==2.10.0
# via -r scripts/structures_pruning/requirements/base.txt
iniconfig==2.0.0
@@ -24,7 +28,7 @@ pbr==6.0.0
# stevedore
pluggy==1.5.0
# via pytest
pymongo==3.13.0
pymongo==4.4.0
# via
# -r scripts/structures_pruning/requirements/base.txt
# edx-opaque-keys

View File

@@ -3,6 +3,7 @@ MongoDB/GridFS-level code for the contentstore.
"""
import hashlib
import json
import os
@@ -40,16 +41,29 @@ class MongoContentStore(ContentStore):
# GridFS will throw an exception if the Database is wrapped in a MongoProxy. So don't wrap it.
# The appropriate methods below are marked as autoretry_read - those methods will handle
# the AutoReconnect errors.
proxy = False
mongo_db = connect_to_mongodb(
db, host,
port=port, tz_aware=tz_aware, user=user, password=password, proxy=proxy, **kwargs
)
self.connection_params = {
'db': db,
'host': host,
'port': port,
'tz_aware': tz_aware,
'user': user,
'password': password,
'proxy': False,
**kwargs
}
self.bucket = bucket
self.do_connection()
self.fs = gridfs.GridFS(mongo_db, bucket) # pylint: disable=invalid-name
def do_connection(self):
"""
Connects to mongodb.
"""
mongo_db = connect_to_mongodb(**self.connection_params)
self.fs_files = mongo_db[bucket + ".files"] # the underlying collection GridFS uses
self.chunks = mongo_db[bucket + ".chunks"]
self.fs = gridfs.GridFS(mongo_db, self.bucket) # pylint: disable=invalid-name
self.fs_files = mongo_db[self.bucket + ".files"] # the underlying collection GridFS uses
self.chunks = mongo_db[self.bucket + ".chunks"]
def close_connections(self):
"""
@@ -57,6 +71,25 @@ class MongoContentStore(ContentStore):
"""
self.fs_files.database.client.close()
def ensure_connection(self):
"""
Ensure that mongodb connection is open.
"""
if self.check_connection():
return
self.do_connection()
def check_connection(self):
"""
Check if mongodb connection is open or not.
"""
connection = self.fs_files.database.client
try:
connection.admin.command('ping')
return True
except pymongo.errors.InvalidOperation:
return False
def _drop_database(self, database=True, collections=True, connections=True):
"""
A destructive operation to drop the underlying database and close all connections.
@@ -69,8 +102,8 @@ class MongoContentStore(ContentStore):
If connections is True, then close the connection to the database as well.
"""
self.ensure_connection()
connection = self.fs_files.database.client
if database:
connection.drop_database(self.fs_files.database.name)
elif collections:
@@ -103,16 +136,22 @@ class MongoContentStore(ContentStore):
# but many more objects have this in python3 and shouldn't be using the chunking logic. For string and
# byte streams we write them directly to gridfs and convert them to byetarrys if necessary.
if hasattr(content.data, '__iter__') and not isinstance(content.data, (bytes, (str,))):
custom_md5 = hashlib.md5()
for chunk in content.data:
fp.write(chunk)
custom_md5.update(chunk)
fp.custom_md5 = custom_md5.hexdigest()
else:
# Ideally we could just ensure that we don't get strings in here and only byte streams
# but being confident of that wolud be a lot more work than we have time for so we just
# handle both cases here.
if isinstance(content.data, str):
fp.write(content.data.encode('utf-8'))
encoded_data = content.data.encode('utf-8')
fp.write(encoded_data)
fp.custom_md5 = hashlib.md5(encoded_data).hexdigest()
else:
fp.write(content.data)
fp.custom_md5 = hashlib.md5(content.data).hexdigest()
return content
@@ -142,12 +181,13 @@ class MongoContentStore(ContentStore):
'thumbnail',
thumbnail_location[4]
)
return StaticContentStream(
location, fp.displayname, fp.content_type, fp, last_modified_at=fp.uploadDate,
thumbnail_location=thumbnail_location,
import_path=getattr(fp, 'import_path', None),
length=fp.length, locked=getattr(fp, 'locked', False),
content_digest=getattr(fp, 'md5', None),
content_digest=getattr(fp, 'custom_md5', None),
)
else:
with self.fs.get(content_id) as fp:
@@ -161,12 +201,13 @@ class MongoContentStore(ContentStore):
'thumbnail',
thumbnail_location[4]
)
return StaticContent(
location, fp.displayname, fp.content_type, fp.read(), last_modified_at=fp.uploadDate,
thumbnail_location=thumbnail_location,
import_path=getattr(fp, 'import_path', None),
length=fp.length, locked=getattr(fp, 'locked', False),
content_digest=getattr(fp, 'md5', None),
content_digest=getattr(fp, 'custom_md5', None),
)
except NoFile:
if throw_on_not_found: # lint-amnesty, pylint: disable=no-else-raise

View File

@@ -473,30 +473,9 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo
super().__init__(contentstore=contentstore, **kwargs)
def do_connection(
db, collection, host, port=27017, tz_aware=True, user=None, password=None, asset_collection=None, **kwargs
):
"""
Create & open the connection, authenticate, and provide pointers to the collection
"""
# Set a write concern of 1, which makes writes complete successfully to the primary
# only before returning. Also makes pymongo report write errors.
kwargs['w'] = 1
self.database = connect_to_mongodb(
db, host,
port=port, tz_aware=tz_aware, user=user, password=password,
retry_wait_time=retry_wait_time, **kwargs
)
self.collection = self.database[collection]
# Collection which stores asset metadata.
if asset_collection is None:
asset_collection = self.DEFAULT_ASSET_COLLECTION_NAME
self.asset_collection = self.database[asset_collection]
do_connection(**doc_store_config)
self.doc_store_config = doc_store_config
self.retry_wait_time = retry_wait_time
self.do_connection(**self.doc_store_config)
if default_class is not None:
module_path, _, class_name = default_class.rpartition('.')
@@ -523,6 +502,48 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo
self._course_run_cache = {}
self.signal_handler = signal_handler
def check_connection(self):
"""
Check if mongodb connection is open or not.
"""
try:
# The ismaster command is cheap and does not require auth.
self.database.client.admin.command('ismaster')
return True
except pymongo.errors.InvalidOperation:
return False
def ensure_connection(self):
"""
Ensure that mongodb connection is open.
"""
if self.check_connection():
return
self.do_connection(**self.doc_store_config)
def do_connection(
self, db, collection, host, port=27017, tz_aware=True, user=None, password=None, asset_collection=None, **kwargs
):
"""
Create & open the connection, authenticate, and provide pointers to the collection
"""
# Set a write concern of 1, which makes writes complete successfully to the primary
# only before returning. Also makes pymongo report write errors.
kwargs['w'] = 1
self.database = connect_to_mongodb(
db, host,
port=port, tz_aware=tz_aware, user=user, password=password,
retry_wait_time=self.retry_wait_time, **kwargs
)
self.collection = self.database[collection]
# Collection which stores asset metadata.
if asset_collection is None:
asset_collection = self.DEFAULT_ASSET_COLLECTION_NAME
self.asset_collection = self.database[asset_collection]
def close_connections(self):
"""
Closes any open connections to the underlying database
@@ -541,6 +562,7 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo
If connections is True, then close the connection to the database as well.
"""
self.ensure_connection()
# drop the assets
super()._drop_database(database, collections, connections)
@@ -872,6 +894,8 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo
course_query[key] = re.compile(r"(?i)^{}$".format(course_query[key]))
else:
course_query = {'_id': location.to_deprecated_son()}
self.ensure_connection()
course = self.collection.find_one(course_query, projection={'_id': True})
if course:
return CourseKey.from_string('/'.join([

View File

@@ -279,20 +279,30 @@ class MongoPersistenceBackend:
#make sure the course index cache is fresh.
RequestCache(namespace="course_index_cache").clear()
self.database = connect_to_mongodb(
db, host,
port=port, tz_aware=tz_aware, user=user, password=password,
retry_wait_time=retry_wait_time, **kwargs
)
self.collection = collection
self.connection_params = {
'db': db,
'host': host,
'port': port,
'tz_aware': tz_aware,
'user': user,
'password': password,
'retry_wait_time': retry_wait_time,
**kwargs
}
self.course_index = self.database[collection + '.active_versions']
self.structures = self.database[collection + '.structures']
self.definitions = self.database[collection + '.definitions']
self.do_connection()
# Is the MySQL subclass in use, passing through some reads/writes to us? If so this will be True.
# If this MongoPersistenceBackend is being used directly (only MongoDB is involved), this is False.
self.with_mysql_subclass = with_mysql_subclass
def do_connection(self):
self.database = connect_to_mongodb(**self.connection_params)
self.course_index = self.database[self.collection + '.active_versions']
self.structures = self.database[self.collection + '.structures']
self.definitions = self.database[self.collection + '.definitions']
def heartbeat(self):
"""
Check that the db is reachable.
@@ -304,6 +314,24 @@ class MongoPersistenceBackend:
except pymongo.errors.ConnectionFailure:
raise HeartbeatFailure(f"Can't connect to {self.database.name}", 'mongo') # lint-amnesty, pylint: disable=raise-missing-from
def check_connection(self):
"""
Check if mongodb connection is open or not.
"""
try:
self.database.client.admin.command("ping")
return True
except pymongo.errors.InvalidOperation:
return False
def ensure_connection(self):
"""
Ensure that mongodb connection is open.
"""
if self.check_connection():
return
self.do_connection()
def get_structure(self, key, course_context=None):
"""
Get the structure from the persistence mechanism whose id is the given key.
@@ -502,6 +530,7 @@ class MongoPersistenceBackend:
"""
Delete the course_index from the persistence mechanism whose id is the given course_index
"""
self.ensure_connection()
with TIMER.timer("delete_course_index", course_key):
query = {
key_attr: getattr(course_key, key_attr)
@@ -561,7 +590,8 @@ class MongoPersistenceBackend:
Closes any open connections to the underlying databases
"""
RequestCache(namespace="course_index_cache").clear()
self.database.client.close()
if self.check_connection():
self.database.client.close()
def _drop_database(self, database=True, collections=True, connections=True):
"""
@@ -576,6 +606,8 @@ class MongoPersistenceBackend:
If connections is True, then close the connection to the database as well.
"""
RequestCache(namespace="course_index_cache").clear()
self.ensure_connection()
connection = self.database.client
if database:

View File

@@ -156,8 +156,8 @@ class CommonMixedModuleStoreSetup(CourseComparisonTest, OpenEdxEventsTestMixin):
tz_aware=True,
)
self.connection.drop_database(self.DB)
self.addCleanup(self.connection.drop_database, self.DB)
self.addCleanup(self.connection.close)
self.addCleanup(self._drop_database)
self.addCleanup(self._close_connection)
# define attrs which get set in initdb to quell pylint
self.writable_chapter_location = self.store = self.fake_location = None
@@ -165,6 +165,43 @@ class CommonMixedModuleStoreSetup(CourseComparisonTest, OpenEdxEventsTestMixin):
self.user_id = ModuleStoreEnum.UserID.test
def _check_connection(self):
"""
Check mongodb connection is open or not.
"""
try:
self.connection.admin.command('ping')
return True
except pymongo.errors.InvalidOperation:
return False
def _ensure_connection(self):
"""
Make sure that mongodb connection is open.
"""
if not self._check_connection():
self.connection = pymongo.MongoClient(
host=self.HOST,
port=self.PORT,
tz_aware=True,
)
def _drop_database(self):
"""
Drop mongodb database.
"""
self._ensure_connection()
self.connection.drop_database(self.DB)
def _close_connection(self):
"""
Close mongodb connection.
"""
try:
self.connection.close()
except pymongo.errors.InvalidOperation:
pass
def _create_course(self, course_key, asides=None):
"""
Create a course w/ one item in the persistence store using the given course & item location.

View File

@@ -51,27 +51,31 @@ def connect_to_mongodb(
if read_preference is not None:
kwargs['read_preference'] = read_preference
mongo_conn = pymongo.database.Database(
pymongo.MongoClient(
host=host,
port=port,
tz_aware=tz_aware,
document_class=dict,
**kwargs
),
db
)
if 'replicaSet' in kwargs and kwargs['replicaSet'] == '':
kwargs['replicaSet'] = None
connection_params = {
'host': host,
'port': port,
'tz_aware': tz_aware,
'document_class': dict,
'directConnection': True,
**kwargs,
}
if user is not None and password is not None and not db.startswith('test_'):
connection_params.update({'username': user, 'password': password, 'authSource': db})
mongo_conn = pymongo.MongoClient(**connection_params)
if proxy:
mongo_conn = MongoProxy(
mongo_conn,
mongo_conn[db],
wait_time=retry_wait_time
)
# If credentials were provided, authenticate the user.
if user is not None and password is not None:
mongo_conn.authenticate(user, password, source=auth_source)
return mongo_conn
return mongo_conn
return mongo_conn[db]
def create_collection_index(