Files
plugins/openedx-tenant-api/test_e2e/cleanup.py
DamarKusumo 2b7027e37d Add openedx-tenant-api plugin
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-10 08:20:57 +07:00

575 lines
22 KiB
Python

#!/usr/bin/env python3
"""
Cleanup script to delete ALL tenants, admin users, and courses before running e2e tests.
Usage:
python cleanup.py # Full cleanup (tenants + admins + courses)
python cleanup.py --tenants-only # Only delete tenants
python cleanup.py --admins-only # Only delete admin users
python cleanup.py --courses-only # Only delete courses
python cleanup.py --dry-run # Show what would be deleted without deleting
"""
import argparse
import logging
import os
import subprocess
import sys
import time
from pathlib import Path
import requests
import ast
from dotenv import load_dotenv
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__)
# Load .env from this directory
TEST_E2E_DIR = Path(__file__).parent
load_dotenv(TEST_E2E_DIR / ".env")
LMS_BASE_URL = "http://local.openedx.io:8000"
API_ADMIN_USERNAME = "admin"
API_ADMIN_PASSWORD = "admin123"
# Tutor command prefix - adjust if tutor binary is not in PATH or project dir differs
_TUTOR_RAW = "tutor dev"
def _get_tutor_cmd():
"""
Resolve the tutor executable as a subprocess argument list.
Returns e.g. ['C:\\...\\tutor.exe', 'dev'] or ['tutor', 'dev'].
Checks (in order):
1. TUTOR_CMD env var (e.g. 'tutor dev' or 'C:\\...\\tutor.exe dev')
2. .venv/Scripts/tutor.exe relative to this file (project venv)
3. .venv/Scripts/tutor.exe relative to CWD
4. Falls back to 'tutor dev' (relies on PATH)
"""
env_val = os.getenv("TUTOR_CMD", "").strip()
tutor_base = env_val if env_val else None
if not tutor_base:
# Check project .venv (this file is inside openedx-tenant-api/test_e2e)
project_root = Path(__file__).resolve().parent.parent.parent
venv_tutor = project_root / ".venv" / "Scripts" / "tutor.exe"
if venv_tutor.exists():
tutor_base = str(venv_tutor)
else:
# Check CWD .venv
cwd_tutor = Path.cwd() / ".venv" / "Scripts" / "tutor.exe"
if cwd_tutor.exists():
tutor_base = str(cwd_tutor)
else:
tutor_base = "tutor"
# Split into tokens so subprocess receives separate args
tokens = tutor_base.split()
# Ensure mode is always "dev"
mode = "dev"
if len(tokens) == 1:
tokens.append(mode)
elif tokens[-1] not in ("dev", "local"):
tokens.append(mode)
return tokens
TUTOR_CMD = _get_tutor_cmd()
def get_auth():
return (API_ADMIN_USERNAME, API_ADMIN_PASSWORD)
# ---------------------------------------------------------------------------
# Tenant operations (via REST API)
# ---------------------------------------------------------------------------
def list_tenants():
"""List all tenants via the tenant API."""
url = f"{LMS_BASE_URL}/api/tenant/v1/list"
resp = requests.get(url, auth=get_auth(), timeout=30)
resp.raise_for_status()
data = resp.json()
return data.get("tenants", [])
def delete_tenant(tenant_name: str):
"""Delete a single tenant via the tenant API."""
url = f"{LMS_BASE_URL}/api/tenant/v1/delete/{tenant_name}"
resp = requests.delete(url, auth=get_auth(), timeout=30)
resp.raise_for_status()
return resp.json()
def cleanup_tenants(dry_run: bool = False) -> list:
"""Delete all tenants. Returns list of deleted tenant names."""
deleted = []
try:
tenants = list_tenants()
logger.info(f"Found {len(tenants)} tenant(s): {[t['tenant_name'] for t in tenants]}")
for tenant in tenants:
name = tenant["tenant_name"]
if dry_run:
logger.info(f"[DRY RUN] Would delete tenant: {name}")
else:
logger.info(f"Deleting tenant: {name}")
try:
delete_tenant(name)
deleted.append(name)
logger.info(f" Deleted tenant: {name}")
except Exception as e:
logger.warning(f" Failed to delete tenant '{name}': {e}")
except Exception as e:
logger.warning(f"Could not list/delete tenants: {e}")
return deleted
# ---------------------------------------------------------------------------
# Admin user cleanup (via LMS Django shell)
# ---------------------------------------------------------------------------
def list_admins_via_django():
"""
List admin usernames (pattern: {tenant_name}_admin) via LMS Django shell.
Returns a list of usernames.
"""
cmd = TUTOR_CMD + [
"exec", "-T", "lms",
"python", "manage.py", "lms", "shell", "-c",
(
"from django.contrib.auth import get_user_model; "
"User = get_user_model(); "
"admins = list(User.objects.filter(username__endswith='_admin').values_list('username', flat=True)); "
"print(repr(admins))"
),
]
try:
result = run_tutor_command(cmd, timeout=60)
output = result.stdout + result.stderr
if result.returncode != 0:
stderr_lower = result.stderr.lower()
if "is not running" in stderr_lower or ("service" in stderr_lower and "not" in stderr_lower):
logger.error(
"LMS service is not running. Start it first:\n"
f" {' '.join(TUTOR_CMD)} start\n"
"Or clean only tenants/courses without admin cleanup:\n"
" python cleanup.py --tenants-only"
)
else:
logger.warning(f"Failed to list admins (rc={result.returncode}): {result.stderr}")
return []
# Parse the repr output — find a line that starts with '[' (the list repr)
for line in output.strip().split("\n"):
stripped = line.strip()
if stripped.startswith("[") and stripped.endswith("]"):
admins = ast.literal_eval(stripped)
return admins
return []
except Exception as e:
logger.warning(f"Could not list admins: {e}")
return []
def delete_admin_via_tutor(username: str, timeout: int = 60):
"""
Delete an admin user via LMS Django shell using direct SQL.
Uses SET FOREIGN_KEY_CHECKS=0 to bypass FK constraints from Open edX historical tables.
"""
escaped_user = username.replace("'", "''")
# Use %-style formatting for the shell command (compatible with Windows CMD)
cmd = TUTOR_CMD + [
"exec", "-T", "lms",
"python", "manage.py", "lms", "shell", "-c",
(
"from django.db import connection; "
"cursor = connection.cursor(); "
"cursor.execute('SET FOREIGN_KEY_CHECKS = 0'); "
"cursor.execute('DELETE FROM auth_user WHERE username = %s', ['" + escaped_user + "']); "
"cursor.execute('SET FOREIGN_KEY_CHECKS = 1'); "
"print('DELETED_" + escaped_user + "')"
),
]
try:
result = run_tutor_command(cmd, timeout=timeout)
output = result.stdout + result.stderr
marker = f"DELETED_{username}"
if result.returncode == 0 and marker in output:
return True
# If user didn't exist (rowcount=0), still consider it success
# Check stderr for FK errors (actual failure) vs empty (not found)
if "FOREIGN_KEY_CHECKS" in result.stderr and "Error" in result.stderr:
logger.warning(f" Failed to delete admin '{username}': {result.stderr.strip()[:200]}")
return False
# User not found is also OK
logger.info(f" Admin '{username}' not found (already deleted)")
return True
except subprocess.TimeoutExpired:
logger.warning(f" Timeout deleting admin '{username}'")
return False
except Exception as e:
logger.warning(f" Error deleting admin '{username}': {e}")
return False
def cleanup_admins(dry_run: bool = False) -> list:
"""
Delete all admin users (username ending with _admin) created by the tenant API.
Returns list of deleted usernames.
"""
deleted = []
admins = list_admins_via_django()
logger.info(f"Found {len(admins)} admin user(s): {admins}")
for username in admins:
if dry_run:
logger.info(f"[DRY RUN] Would delete admin: {username}")
else:
logger.info(f"Deleting admin: {username}")
if delete_admin_via_tutor(username):
deleted.append(username)
logger.info(f" Deleted admin: {username}")
return deleted
# ---------------------------------------------------------------------------
# Course operations (via CMS Django shell / modulestore)
# ---------------------------------------------------------------------------
def run_tutor_command(args: list, timeout: int = 120):
"""Run a tutor command and return the result."""
result = subprocess.run(
args,
capture_output=True,
text=True,
timeout=timeout,
)
return result
# ---------------------------------------------------------------------------
# Meilisearch course index cleanup (course discovery / search API)
# ---------------------------------------------------------------------------
def get_meilisearch_info():
"""
Get Meilisearch host URL and master key from LMS Django settings.
Returns (host_url, master_key). host_url defaults to http://127.0.0.1:7700
if not configured. Returns (None, None) on failure.
"""
# Get master key from Django settings (always set in Tutor)
cmd = TUTOR_CMD + [
"exec", "lms", "python", "-c",
"from django.conf import settings; k=getattr(settings,'MEILISEARCH_MASTER_KEY',None); print(k or '')",
]
try:
result = run_tutor_command(cmd, timeout=30)
output = result.stdout.strip()
# Last non-empty line is the key (after startup noise)
lines = [l.strip() for l in output.splitlines() if l.strip()]
master_key = lines[-1] if lines else None
if not master_key:
return None, None
except Exception as e:
logger.warning(f"Could not get Meilisearch key: {e}")
return None, None
# host_url: try Django setting, then env var, then default Docker host
cmd2 = TUTOR_CMD + [
"exec", "lms", "python", "-c",
"import os; print(os.environ.get('MEILISEARCH_HOST','notset'))",
]
host = "http://127.0.0.1:7700" # default: accessible from host machine
try:
result2 = run_tutor_command(cmd2, timeout=15)
lines2 = [l.strip() for l in result2.stdout.splitlines() if l.strip() and not l.startswith('[')]
if lines2:
resolved = lines2[-1]
if resolved != "notset":
host = resolved
except Exception:
pass
return host, master_key
def cleanup_meilisearch(dry_run: bool = False) -> int:
"""
Delete all documents from Meilisearch course discovery indexes.
This ensures courses no longer appear on the /courses page even though
they are removed from the database (modulestore / CourseOverview).
Returns number of documents deleted.
"""
host, master_key = get_meilisearch_info()
if not host or not master_key:
logger.warning("Meilisearch credentials not found, skipping Meilisearch cleanup")
return 0
# The course discovery index name pattern (tutor uses 'tutor_' prefix)
# We need to find the correct index name — query Meilisearch for all indexes
import urllib.request
import urllib.error
import json
headers = {
"Authorization": f"Bearer {master_key}",
"Content-Type": "application/json",
}
# List all indexes to find the course info index
try:
req = urllib.request.Request(
f"{host}/indexes",
headers=headers,
)
with urllib.request.urlopen(req, timeout=15) as resp:
data = json.loads(resp.read())
except Exception as e:
logger.warning(f"Could not list Meilisearch indexes: {e}")
return 0
indexes = data.get("results", [])
course_indexes = [idx["uid"] for idx in indexes if "course" in idx["uid"].lower()]
deleted_total = 0
for idx_uid in course_indexes:
try:
# Get current document count
req = urllib.request.Request(
f"{host}/indexes/{idx_uid}/stats",
headers=headers,
)
with urllib.request.urlopen(req, timeout=15) as resp:
stats = json.loads(resp.read())
doc_count = stats.get("numberOfDocuments", 0)
if dry_run:
logger.info(f"[DRY RUN] Would delete {doc_count} doc(s) from Meilisearch index: {idx_uid}")
deleted_total += doc_count
continue
# Delete all documents in the index using DELETE /indexes/{uid}/documents
req = urllib.request.Request(
f"{host}/indexes/{idx_uid}/documents",
headers=headers,
method="DELETE",
)
with urllib.request.urlopen(req, timeout=15) as resp:
task = json.loads(resp.read())
task_uid = task.get("taskUid")
logger.info(f" Meilisearch delete task submitted: {idx_uid} (taskUid={task_uid})")
deleted_total += doc_count
except Exception as e:
logger.warning(f" Could not delete from Meilisearch index '{idx_uid}': {e}")
return deleted_total
# ---------------------------------------------------------------------------
# Course operations (via CMS Django shell / modulestore)
# ---------------------------------------------------------------------------
"""
List all course keys via Django ORM using tutor dev exec.
Returns a list of course_key strings like 'course-v1:Org+Num+Run'.
"""
cmd = TUTOR_CMD + [
"exec", "lms",
"python", "manage.py", "lms",
"shell", "-c",
(
"from openedx.core.djangoapps.content.course_overviews.models import CourseOverview; "
"print('\\n'.join([str(c.id) for c in CourseOverview.objects.all()]))"
),
]
try:
result = run_tutor_command(cmd, timeout=60)
# Course data may be in stdout or stderr (Django logs to stderr)
output = result.stdout + result.stderr
if result.returncode != 0:
stderr_lower = result.stderr.lower()
if "is not running" in stderr_lower or ("service" in stderr_lower and "not" in stderr_lower):
logger.error(
"LMS service is not running. Start it first:\n"
f" {' '.join(TUTOR_CMD)} start\n"
"Or clean only tenants without course cleanup:\n"
" python cleanup.py --tenants-only"
)
else:
logger.warning(f"Failed to list courses (rc={result.returncode}): {result.stderr}")
return []
# Filter out noise lines (warnings, info, etc.) — only keep course-v1: lines
courses = [
line.strip()
for line in output.strip().split("\n")
if line.strip().startswith("course-v1:")
]
return courses
except Exception as e:
logger.warning(f"Could not list courses: {e}")
return []
def delete_course_via_tutor(course_key: str, timeout: int = 120):
"""
Delete a course via Django shell using the modulestore API (CMS container).
Uses CourseKey.from_string + store.delete_course() to bypass interactive prompts.
Also explicitly deletes CourseOverview records to avoid orphaned cache rows
that cause courses to still appear in Studio even after modulestore deletion.
"""
# Escape single quotes in course_key for shell (replace ' with '\'' )
escaped_key = course_key.replace("'", "'\\''")
cmd = TUTOR_CMD + [
"exec", "-T", "cms",
"python", "manage.py", "cms", "shell", "-c",
(
f"from xmodule.modulestore.django import modulestore; "
f"from opaque_keys.edx.keys import CourseKey; "
f"from openedx.core.djangoapps.content.course_overviews.models import CourseOverview; "
f"store = modulestore(); "
f"key = CourseKey.from_string('{escaped_key}'); "
f"store.delete_course(key, None); "
# Also explicitly delete CourseOverview to avoid orphaned cache rows
# that cause courses to still appear in Studio UI after modulestore deletion
f"CourseOverview.objects.filter(id=key).delete(); "
f"print(f'DELETED: ' + str(key))"
),
]
try:
result = run_tutor_command(cmd, timeout=timeout)
output = result.stdout + result.stderr
if result.returncode == 0 and f"DELETED: {course_key}" in output:
return True
err_lower = result.stderr.lower()
if "not found" in err_lower or "does not exist" in err_lower or "does not have" in err_lower:
logger.info(f" Course '{course_key}' not found (already deleted)")
return True
logger.warning(f" Failed to delete '{course_key}': {result.stderr.strip()[:200]}")
return False
except subprocess.TimeoutExpired:
logger.warning(f" Timeout deleting course '{course_key}'")
return False
except Exception as e:
logger.warning(f" Error deleting course '{course_key}': {e}")
return False
def list_courses_via_django():
"""
List all course keys via Django ORM using tutor dev exec.
Returns a list of course_key strings like 'course-v1:Org+Num+Run'.
"""
cmd = TUTOR_CMD + [
"exec", "lms",
"python", "manage.py", "lms",
"shell", "-c",
(
"from openedx.core.djangoapps.content.course_overviews.models import CourseOverview; "
"print('\\n'.join([str(c.id) for c in CourseOverview.objects.all()]))"
),
]
try:
result = run_tutor_command(cmd, timeout=60)
# Course data may be in stdout or stderr (Django logs to stderr)
output = result.stdout + result.stderr
if result.returncode != 0:
stderr_lower = result.stderr.lower()
if "is not running" in stderr_lower or ("service" in stderr_lower and "not" in stderr_lower):
logger.error(
"LMS service is not running. Start it first:\n"
f" {' '.join(TUTOR_CMD)} start\n"
"Or clean only tenants without course cleanup:\n"
" python cleanup.py --tenants-only"
)
else:
logger.warning(f"Failed to list courses (rc={result.returncode}): {result.stderr}")
return []
# Filter out noise lines (warnings, info, etc.) — only keep course-v1: lines
courses = [
line.strip()
for line in output.strip().split("\n")
if line.strip().startswith("course-v1:")
]
return courses
except Exception as e:
logger.warning(f"Could not list courses: {e}")
return []
def cleanup_courses(dry_run: bool = False, course_filter: str = None) -> list:
"""
Delete all courses (optionally filtered by org prefix).
Returns list of deleted course keys.
"""
deleted = []
courses = list_courses_via_django()
logger.info(f"Found {len(courses)} course(s): {courses}")
for course_key in courses:
# Optional: filter by org (e.g. only delete 'course' org courses)
if course_filter and not course_key.startswith(f"course-v1:{course_filter}+"):
logger.info(f" Skipping filtered course: {course_key}")
continue
if dry_run:
logger.info(f"[DRY RUN] Would delete course: {course_key}")
else:
logger.info(f"Deleting course: {course_key}")
if delete_course_via_tutor(course_key):
deleted.append(course_key)
logger.info(f" Deleted course: {course_key}")
# Small delay to avoid overwhelming the system
time.sleep(1)
return deleted
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser(description="Clean up all tenants, admins, and courses before e2e tests.")
parser.add_argument("--tenants-only", action="store_true", help="Only delete tenants")
parser.add_argument("--courses-only", action="store_true", help="Only delete courses")
parser.add_argument("--admins-only", action="store_true", help="Only delete admin users")
parser.add_argument("--search-only", action="store_true", help="Only clean Meilisearch search index")
parser.add_argument("--dry-run", action="store_true", help="Show what would be deleted without deleting")
parser.add_argument("--course-filter", default=None, help="Only delete courses with this org (e.g. 'course')")
args = parser.parse_args()
# Default: clean all three
do_tenants = not args.courses_only and not args.admins_only and not args.search_only
do_admins = not args.tenants_only and not args.courses_only and not args.search_only
do_courses = not args.tenants_only and not args.admins_only and not args.search_only
do_search = not args.tenants_only and not args.admins_only
mode = "DRY RUN" if args.dry_run else "LIVE"
logger.info(f"=== Cleanup [{mode}] ===")
if do_tenants:
logger.info("--- Cleaning up tenants ---")
cleanup_tenants(dry_run=args.dry_run)
if do_admins:
logger.info("--- Cleaning up admin users ---")
cleanup_admins(dry_run=args.dry_run)
if do_courses:
logger.info("--- Cleaning up courses (database) ---")
cleanup_courses(dry_run=args.dry_run, course_filter=args.course_filter)
if do_search:
logger.info("--- Cleaning up Meilisearch course index ---")
count = cleanup_meilisearch(dry_run=args.dry_run)
logger.info(f" Meilisearch: {count} document(s) cleaned")
if args.dry_run:
logger.info("=== Dry run complete — no actual changes made ===")
else:
logger.info("=== Cleanup complete ===")
if __name__ == "__main__":
main()