#!/usr/bin/env python3 """ Cleanup script to delete ALL tenants, admin users, and courses before running e2e tests. Usage: python cleanup.py # Full cleanup (tenants + admins + courses) python cleanup.py --tenants-only # Only delete tenants python cleanup.py --admins-only # Only delete admin users python cleanup.py --courses-only # Only delete courses python cleanup.py --dry-run # Show what would be deleted without deleting """ import argparse import logging import os import subprocess import sys import time from pathlib import Path import requests import ast from dotenv import load_dotenv logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") logger = logging.getLogger(__name__) # Load .env from this directory TEST_E2E_DIR = Path(__file__).parent load_dotenv(TEST_E2E_DIR / ".env") LMS_BASE_URL = "http://local.openedx.io:8000" API_ADMIN_USERNAME = "admin" API_ADMIN_PASSWORD = "admin123" # Tutor command prefix - adjust if tutor binary is not in PATH or project dir differs _TUTOR_RAW = "tutor dev" def _get_tutor_cmd(): """ Resolve the tutor executable as a subprocess argument list. Returns e.g. ['C:\\...\\tutor.exe', 'dev'] or ['tutor', 'dev']. Checks (in order): 1. TUTOR_CMD env var (e.g. 'tutor dev' or 'C:\\...\\tutor.exe dev') 2. .venv/Scripts/tutor.exe relative to this file (project venv) 3. .venv/Scripts/tutor.exe relative to CWD 4. Falls back to 'tutor dev' (relies on PATH) """ env_val = os.getenv("TUTOR_CMD", "").strip() tutor_base = env_val if env_val else None if not tutor_base: # Check project .venv (this file is inside openedx-tenant-api/test_e2e) project_root = Path(__file__).resolve().parent.parent.parent venv_tutor = project_root / ".venv" / "Scripts" / "tutor.exe" if venv_tutor.exists(): tutor_base = str(venv_tutor) else: # Check CWD .venv cwd_tutor = Path.cwd() / ".venv" / "Scripts" / "tutor.exe" if cwd_tutor.exists(): tutor_base = str(cwd_tutor) else: tutor_base = "tutor" # Split into tokens so subprocess receives separate args tokens = tutor_base.split() # Ensure mode is always "dev" mode = "dev" if len(tokens) == 1: tokens.append(mode) elif tokens[-1] not in ("dev", "local"): tokens.append(mode) return tokens TUTOR_CMD = _get_tutor_cmd() def get_auth(): return (API_ADMIN_USERNAME, API_ADMIN_PASSWORD) # --------------------------------------------------------------------------- # Tenant operations (via REST API) # --------------------------------------------------------------------------- def list_tenants(): """List all tenants via the tenant API.""" url = f"{LMS_BASE_URL}/api/tenant/v1/list" resp = requests.get(url, auth=get_auth(), timeout=30) resp.raise_for_status() data = resp.json() return data.get("tenants", []) def delete_tenant(tenant_name: str): """Delete a single tenant via the tenant API.""" url = f"{LMS_BASE_URL}/api/tenant/v1/delete/{tenant_name}" resp = requests.delete(url, auth=get_auth(), timeout=30) resp.raise_for_status() return resp.json() def cleanup_tenants(dry_run: bool = False) -> list: """Delete all tenants. Returns list of deleted tenant names.""" deleted = [] try: tenants = list_tenants() logger.info(f"Found {len(tenants)} tenant(s): {[t['tenant_name'] for t in tenants]}") for tenant in tenants: name = tenant["tenant_name"] if dry_run: logger.info(f"[DRY RUN] Would delete tenant: {name}") else: logger.info(f"Deleting tenant: {name}") try: delete_tenant(name) deleted.append(name) logger.info(f" Deleted tenant: {name}") except Exception as e: logger.warning(f" Failed to delete tenant '{name}': {e}") except Exception as e: logger.warning(f"Could not list/delete tenants: {e}") return deleted # --------------------------------------------------------------------------- # Admin user cleanup (via LMS Django shell) # --------------------------------------------------------------------------- def list_admins_via_django(): """ List admin usernames (pattern: {tenant_name}_admin) via LMS Django shell. Returns a list of usernames. """ cmd = TUTOR_CMD + [ "exec", "-T", "lms", "python", "manage.py", "lms", "shell", "-c", ( "from django.contrib.auth import get_user_model; " "User = get_user_model(); " "admins = list(User.objects.filter(username__endswith='_admin').values_list('username', flat=True)); " "print(repr(admins))" ), ] try: result = run_tutor_command(cmd, timeout=60) output = result.stdout + result.stderr if result.returncode != 0: stderr_lower = result.stderr.lower() if "is not running" in stderr_lower or ("service" in stderr_lower and "not" in stderr_lower): logger.error( "LMS service is not running. Start it first:\n" f" {' '.join(TUTOR_CMD)} start\n" "Or clean only tenants/courses without admin cleanup:\n" " python cleanup.py --tenants-only" ) else: logger.warning(f"Failed to list admins (rc={result.returncode}): {result.stderr}") return [] # Parse the repr output — find a line that starts with '[' (the list repr) for line in output.strip().split("\n"): stripped = line.strip() if stripped.startswith("[") and stripped.endswith("]"): admins = ast.literal_eval(stripped) return admins return [] except Exception as e: logger.warning(f"Could not list admins: {e}") return [] def delete_admin_via_tutor(username: str, timeout: int = 60): """ Delete an admin user via LMS Django shell using direct SQL. Uses SET FOREIGN_KEY_CHECKS=0 to bypass FK constraints from Open edX historical tables. """ escaped_user = username.replace("'", "''") # Use %-style formatting for the shell command (compatible with Windows CMD) cmd = TUTOR_CMD + [ "exec", "-T", "lms", "python", "manage.py", "lms", "shell", "-c", ( "from django.db import connection; " "cursor = connection.cursor(); " "cursor.execute('SET FOREIGN_KEY_CHECKS = 0'); " "cursor.execute('DELETE FROM auth_user WHERE username = %s', ['" + escaped_user + "']); " "cursor.execute('SET FOREIGN_KEY_CHECKS = 1'); " "print('DELETED_" + escaped_user + "')" ), ] try: result = run_tutor_command(cmd, timeout=timeout) output = result.stdout + result.stderr marker = f"DELETED_{username}" if result.returncode == 0 and marker in output: return True # If user didn't exist (rowcount=0), still consider it success # Check stderr for FK errors (actual failure) vs empty (not found) if "FOREIGN_KEY_CHECKS" in result.stderr and "Error" in result.stderr: logger.warning(f" Failed to delete admin '{username}': {result.stderr.strip()[:200]}") return False # User not found is also OK logger.info(f" Admin '{username}' not found (already deleted)") return True except subprocess.TimeoutExpired: logger.warning(f" Timeout deleting admin '{username}'") return False except Exception as e: logger.warning(f" Error deleting admin '{username}': {e}") return False def cleanup_admins(dry_run: bool = False) -> list: """ Delete all admin users (username ending with _admin) created by the tenant API. Returns list of deleted usernames. """ deleted = [] admins = list_admins_via_django() logger.info(f"Found {len(admins)} admin user(s): {admins}") for username in admins: if dry_run: logger.info(f"[DRY RUN] Would delete admin: {username}") else: logger.info(f"Deleting admin: {username}") if delete_admin_via_tutor(username): deleted.append(username) logger.info(f" Deleted admin: {username}") return deleted # --------------------------------------------------------------------------- # Course operations (via CMS Django shell / modulestore) # --------------------------------------------------------------------------- def run_tutor_command(args: list, timeout: int = 120): """Run a tutor command and return the result.""" result = subprocess.run( args, capture_output=True, text=True, timeout=timeout, ) return result # --------------------------------------------------------------------------- # Meilisearch course index cleanup (course discovery / search API) # --------------------------------------------------------------------------- def get_meilisearch_info(): """ Get Meilisearch host URL and master key from LMS Django settings. Returns (host_url, master_key). host_url defaults to http://127.0.0.1:7700 if not configured. Returns (None, None) on failure. """ # Get master key from Django settings (always set in Tutor) cmd = TUTOR_CMD + [ "exec", "lms", "python", "-c", "from django.conf import settings; k=getattr(settings,'MEILISEARCH_MASTER_KEY',None); print(k or '')", ] try: result = run_tutor_command(cmd, timeout=30) output = result.stdout.strip() # Last non-empty line is the key (after startup noise) lines = [l.strip() for l in output.splitlines() if l.strip()] master_key = lines[-1] if lines else None if not master_key: return None, None except Exception as e: logger.warning(f"Could not get Meilisearch key: {e}") return None, None # host_url: try Django setting, then env var, then default Docker host cmd2 = TUTOR_CMD + [ "exec", "lms", "python", "-c", "import os; print(os.environ.get('MEILISEARCH_HOST','notset'))", ] host = "http://127.0.0.1:7700" # default: accessible from host machine try: result2 = run_tutor_command(cmd2, timeout=15) lines2 = [l.strip() for l in result2.stdout.splitlines() if l.strip() and not l.startswith('[')] if lines2: resolved = lines2[-1] if resolved != "notset": host = resolved except Exception: pass return host, master_key def cleanup_meilisearch(dry_run: bool = False) -> int: """ Delete all documents from Meilisearch course discovery indexes. This ensures courses no longer appear on the /courses page even though they are removed from the database (modulestore / CourseOverview). Returns number of documents deleted. """ host, master_key = get_meilisearch_info() if not host or not master_key: logger.warning("Meilisearch credentials not found, skipping Meilisearch cleanup") return 0 # The course discovery index name pattern (tutor uses 'tutor_' prefix) # We need to find the correct index name — query Meilisearch for all indexes import urllib.request import urllib.error import json headers = { "Authorization": f"Bearer {master_key}", "Content-Type": "application/json", } # List all indexes to find the course info index try: req = urllib.request.Request( f"{host}/indexes", headers=headers, ) with urllib.request.urlopen(req, timeout=15) as resp: data = json.loads(resp.read()) except Exception as e: logger.warning(f"Could not list Meilisearch indexes: {e}") return 0 indexes = data.get("results", []) course_indexes = [idx["uid"] for idx in indexes if "course" in idx["uid"].lower()] deleted_total = 0 for idx_uid in course_indexes: try: # Get current document count req = urllib.request.Request( f"{host}/indexes/{idx_uid}/stats", headers=headers, ) with urllib.request.urlopen(req, timeout=15) as resp: stats = json.loads(resp.read()) doc_count = stats.get("numberOfDocuments", 0) if dry_run: logger.info(f"[DRY RUN] Would delete {doc_count} doc(s) from Meilisearch index: {idx_uid}") deleted_total += doc_count continue # Delete all documents in the index using DELETE /indexes/{uid}/documents req = urllib.request.Request( f"{host}/indexes/{idx_uid}/documents", headers=headers, method="DELETE", ) with urllib.request.urlopen(req, timeout=15) as resp: task = json.loads(resp.read()) task_uid = task.get("taskUid") logger.info(f" Meilisearch delete task submitted: {idx_uid} (taskUid={task_uid})") deleted_total += doc_count except Exception as e: logger.warning(f" Could not delete from Meilisearch index '{idx_uid}': {e}") return deleted_total # --------------------------------------------------------------------------- # Course operations (via CMS Django shell / modulestore) # --------------------------------------------------------------------------- """ List all course keys via Django ORM using tutor dev exec. Returns a list of course_key strings like 'course-v1:Org+Num+Run'. """ cmd = TUTOR_CMD + [ "exec", "lms", "python", "manage.py", "lms", "shell", "-c", ( "from openedx.core.djangoapps.content.course_overviews.models import CourseOverview; " "print('\\n'.join([str(c.id) for c in CourseOverview.objects.all()]))" ), ] try: result = run_tutor_command(cmd, timeout=60) # Course data may be in stdout or stderr (Django logs to stderr) output = result.stdout + result.stderr if result.returncode != 0: stderr_lower = result.stderr.lower() if "is not running" in stderr_lower or ("service" in stderr_lower and "not" in stderr_lower): logger.error( "LMS service is not running. Start it first:\n" f" {' '.join(TUTOR_CMD)} start\n" "Or clean only tenants without course cleanup:\n" " python cleanup.py --tenants-only" ) else: logger.warning(f"Failed to list courses (rc={result.returncode}): {result.stderr}") return [] # Filter out noise lines (warnings, info, etc.) — only keep course-v1: lines courses = [ line.strip() for line in output.strip().split("\n") if line.strip().startswith("course-v1:") ] return courses except Exception as e: logger.warning(f"Could not list courses: {e}") return [] def delete_course_via_tutor(course_key: str, timeout: int = 120): """ Delete a course via Django shell using the modulestore API (CMS container). Uses CourseKey.from_string + store.delete_course() to bypass interactive prompts. Also explicitly deletes CourseOverview records to avoid orphaned cache rows that cause courses to still appear in Studio even after modulestore deletion. """ # Escape single quotes in course_key for shell (replace ' with '\'' ) escaped_key = course_key.replace("'", "'\\''") cmd = TUTOR_CMD + [ "exec", "-T", "cms", "python", "manage.py", "cms", "shell", "-c", ( f"from xmodule.modulestore.django import modulestore; " f"from opaque_keys.edx.keys import CourseKey; " f"from openedx.core.djangoapps.content.course_overviews.models import CourseOverview; " f"store = modulestore(); " f"key = CourseKey.from_string('{escaped_key}'); " f"store.delete_course(key, None); " # Also explicitly delete CourseOverview to avoid orphaned cache rows # that cause courses to still appear in Studio UI after modulestore deletion f"CourseOverview.objects.filter(id=key).delete(); " f"print(f'DELETED: ' + str(key))" ), ] try: result = run_tutor_command(cmd, timeout=timeout) output = result.stdout + result.stderr if result.returncode == 0 and f"DELETED: {course_key}" in output: return True err_lower = result.stderr.lower() if "not found" in err_lower or "does not exist" in err_lower or "does not have" in err_lower: logger.info(f" Course '{course_key}' not found (already deleted)") return True logger.warning(f" Failed to delete '{course_key}': {result.stderr.strip()[:200]}") return False except subprocess.TimeoutExpired: logger.warning(f" Timeout deleting course '{course_key}'") return False except Exception as e: logger.warning(f" Error deleting course '{course_key}': {e}") return False def list_courses_via_django(): """ List all course keys via Django ORM using tutor dev exec. Returns a list of course_key strings like 'course-v1:Org+Num+Run'. """ cmd = TUTOR_CMD + [ "exec", "lms", "python", "manage.py", "lms", "shell", "-c", ( "from openedx.core.djangoapps.content.course_overviews.models import CourseOverview; " "print('\\n'.join([str(c.id) for c in CourseOverview.objects.all()]))" ), ] try: result = run_tutor_command(cmd, timeout=60) # Course data may be in stdout or stderr (Django logs to stderr) output = result.stdout + result.stderr if result.returncode != 0: stderr_lower = result.stderr.lower() if "is not running" in stderr_lower or ("service" in stderr_lower and "not" in stderr_lower): logger.error( "LMS service is not running. Start it first:\n" f" {' '.join(TUTOR_CMD)} start\n" "Or clean only tenants without course cleanup:\n" " python cleanup.py --tenants-only" ) else: logger.warning(f"Failed to list courses (rc={result.returncode}): {result.stderr}") return [] # Filter out noise lines (warnings, info, etc.) — only keep course-v1: lines courses = [ line.strip() for line in output.strip().split("\n") if line.strip().startswith("course-v1:") ] return courses except Exception as e: logger.warning(f"Could not list courses: {e}") return [] def cleanup_courses(dry_run: bool = False, course_filter: str = None) -> list: """ Delete all courses (optionally filtered by org prefix). Returns list of deleted course keys. """ deleted = [] courses = list_courses_via_django() logger.info(f"Found {len(courses)} course(s): {courses}") for course_key in courses: # Optional: filter by org (e.g. only delete 'course' org courses) if course_filter and not course_key.startswith(f"course-v1:{course_filter}+"): logger.info(f" Skipping filtered course: {course_key}") continue if dry_run: logger.info(f"[DRY RUN] Would delete course: {course_key}") else: logger.info(f"Deleting course: {course_key}") if delete_course_via_tutor(course_key): deleted.append(course_key) logger.info(f" Deleted course: {course_key}") # Small delay to avoid overwhelming the system time.sleep(1) return deleted # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def main(): parser = argparse.ArgumentParser(description="Clean up all tenants, admins, and courses before e2e tests.") parser.add_argument("--tenants-only", action="store_true", help="Only delete tenants") parser.add_argument("--courses-only", action="store_true", help="Only delete courses") parser.add_argument("--admins-only", action="store_true", help="Only delete admin users") parser.add_argument("--search-only", action="store_true", help="Only clean Meilisearch search index") parser.add_argument("--dry-run", action="store_true", help="Show what would be deleted without deleting") parser.add_argument("--course-filter", default=None, help="Only delete courses with this org (e.g. 'course')") args = parser.parse_args() # Default: clean all three do_tenants = not args.courses_only and not args.admins_only and not args.search_only do_admins = not args.tenants_only and not args.courses_only and not args.search_only do_courses = not args.tenants_only and not args.admins_only and not args.search_only do_search = not args.tenants_only and not args.admins_only mode = "DRY RUN" if args.dry_run else "LIVE" logger.info(f"=== Cleanup [{mode}] ===") if do_tenants: logger.info("--- Cleaning up tenants ---") cleanup_tenants(dry_run=args.dry_run) if do_admins: logger.info("--- Cleaning up admin users ---") cleanup_admins(dry_run=args.dry_run) if do_courses: logger.info("--- Cleaning up courses (database) ---") cleanup_courses(dry_run=args.dry_run, course_filter=args.course_filter) if do_search: logger.info("--- Cleaning up Meilisearch course index ---") count = cleanup_meilisearch(dry_run=args.dry_run) logger.info(f" Meilisearch: {count} document(s) cleaned") if args.dry_run: logger.info("=== Dry run complete — no actual changes made ===") else: logger.info("=== Cleanup complete ===") if __name__ == "__main__": main()