From 8d070e64220f3407d8c569e3338eabd69a52b3d5 Mon Sep 17 00:00:00 2001 From: Don Mitchell Date: Mon, 18 Aug 2014 08:31:56 -0400 Subject: [PATCH] Rewrite course url matcher to not be greedy LMS-11181 --- lms/envs/common.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lms/envs/common.py b/lms/envs/common.py index dbca00c584..7d3611e665 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -410,8 +410,12 @@ LMS_MIGRATION_ALLOWED_IPS = [] # These are standard regexes for pulling out info like course_ids, usage_ids, etc. # They are used so that URLs with deprecated-format strings still work. -COURSE_ID_PATTERN = r'(?P(?:[^/]+/[^/]+/[^/]+)|(?:[^/]+))' -COURSE_KEY_PATTERN = r'(?P(?:[^/]+/[^/]+/[^/]+)|(?:[^/]+))' +# Note: these intentionally greedily grab all chars up to the next slash including any pluses +# DHM: I really wanted to ensure the separators were the same (+ or /) but all patts I tried had +# too many inadvertent side effects :-( +COURSE_KEY_PATTERN = r'(?P[^/+]+(/|\+)[^/+]+(/|\+)[^/]+)' +COURSE_ID_PATTERN = COURSE_KEY_PATTERN.replace('course_key_string', 'course_id') + USAGE_KEY_PATTERN = r'(?P(?:i4x://?[^/]+/[^/]+/[^/]+/[^@]+(?:@[^/]+)?)|(?:[^/]+))' ASSET_KEY_PATTERN = r'(?P(?:/?c4x(:/)?/[^/]+/[^/]+/[^/]+/[^@]+(?:@[^/]+)?)|(?:[^/]+))' USAGE_ID_PATTERN = r'(?P(?:i4x://?[^/]+/[^/]+/[^/]+/[^@]+(?:@[^/]+)?)|(?:[^/]+))'