Files
edx-platform/common/djangoapps/student/management/commands/backfill_is_disabled.py
Eemaan Amir 8c8a567dd0 feat: added batching to managemnet command to avoid queueing errors (#36720)
* feat: added batching to managemnet command to avoid queueing errors

* fix: refactored code to fix pylint errors

* fix: fixed quality check errors

* fix: fixed quality check errors
2025-05-15 17:02:37 +05:00

153 lines
5.5 KiB
Python

"""
Backfill the is_disabled attribute for existing users in Segment and Braze.
This management command identifies users with unusable passwords (starting with
UNUSABLE_PASSWORD_PREFIX) and syncs the is_disabled=true attribute to their Segment
profiles using the segment.identify() function. It processes users in
batches to minimize memory usage and supports a dry-run mode for testing.
"""
import logging
import time
from django.core.management.base import BaseCommand
from django.contrib.auth import get_user_model
from django.contrib.auth.hashers import UNUSABLE_PASSWORD_PREFIX
from django.db import DatabaseError
from common.djangoapps.track import segment
import requests
LOGGER = logging.getLogger(__name__)
User = get_user_model()
class Command(BaseCommand):
"""
Backfill is_disabled attribute for users with unusable passwords in Segment.
"""
help = 'Backfill is_disabled attribute for existing disabled users in Segment'
def add_arguments(self, parser):
parser.add_argument(
'--batch-size',
type=int,
default=9000,
help='Number of users to process per batch'
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Simulate the back fill without calling Segment'
)
parser.add_argument(
'--retry-limit',
type=int, default=3,
help='Retry attempts for failed API calls'
)
def _process_user(self, user_id, batch_number, dry_run):
"""Process a single user, logging success or failure."""
if dry_run:
LOGGER.info(
f"[Dry Run] Would update user {user_id} with is_disabled=true "
f"in batch {batch_number}"
)
return True
try:
segment.analytics.identify(user_id=user_id, traits={'is_disabled': True})
LOGGER.info(
f"Successfully updated user {user_id} with is_disabled=true "
f"in batch {batch_number}"
)
return True
except (ConnectionError, ValueError) as e:
LOGGER.error(
f"Failed to update user {user_id} in batch {batch_number}: "
f"{str(e)}"
)
return False
def _process_batch(self, users_batch, batch_number, dry_run, retry_limit):
"""Process a batch of users with retries."""
current_batch_size = len(users_batch)
if dry_run:
for user_id in users_batch:
self._process_user(user_id, batch_number, dry_run)
return current_batch_size
retry_count = 0
success = False
while not success and retry_count <= retry_limit:
try:
for user_id in users_batch:
self._process_user(user_id, batch_number, dry_run)
segment.analytics.flush()
LOGGER.info(f"Successfully processed batch {batch_number}")
success = True
return current_batch_size
except (requests.exceptions.RequestException, segment.analytics.errors.APIError) as e:
retry_count += 1
if retry_count <= retry_limit:
LOGGER.warning(
f"Batch {batch_number} failed (attempt {retry_count}/"
f"{retry_limit}): {str(e)}"
)
time.sleep(2 * retry_count)
else:
LOGGER.error(
f"Batch {batch_number} failed after {retry_limit} attempts, "
f"processed {{processed}} users: {str(e)}"
)
return None
def handle(self, *args, **options):
batch_size = options['batch_size']
dry_run = options['dry_run']
retry_limit = options['retry_limit']
try:
LOGGER.info(
f"Starting backfill (batch_size={batch_size}, dry_run={dry_run}, "
f"retry_limit={retry_limit})"
)
total_users = User.objects.filter(
password__startswith=UNUSABLE_PASSWORD_PREFIX
).count()
if total_users == 0:
LOGGER.info("No users to process, exiting")
return
LOGGER.info(f"Found {total_users} users that are disabled")
offset = 0
processed = 0
batch_number = 0
while offset < total_users:
batch_number += 1
users_batch = User.objects.filter(
password__startswith=UNUSABLE_PASSWORD_PREFIX
).values_list('id', flat=True)[offset:offset + batch_size]
LOGGER.info(f"Processing batch {batch_number} ({len(users_batch)} users)")
batch_result = self._process_batch(
users_batch, batch_number, dry_run, retry_limit
)
if batch_result is None:
LOGGER.error(
f"Backfill stopped, processed {processed} users"
)
return
processed += batch_result
offset += batch_size
LOGGER.info(f"Processed {processed} / {total_users} users")
LOGGER.info(
f"Completed: processed {processed} / {total_users} users in "
f"{batch_number} batches"
)
except DatabaseError as e:
LOGGER.error(f"Back fill failed: {str(e)}")