fix: protect video transcript handler from scraper (#34017)

Web scrapers do annoying stuff like visit urls they shouldn't know about and cause xblock handlers to break.
I tested this by:
Making sure video transcripts worked as normal while logged in
making sure that I got no 500s in my logs while attempting to view it logged out.
This commit is contained in:
connorhaugh
2024-01-08 18:14:03 -05:00
committed by GitHub
parent 83d104f50d
commit efaef4510e
2 changed files with 39 additions and 14 deletions

View File

@@ -12,9 +12,12 @@ import ddt
import freezegun
from django.core.files.base import ContentFile
from django.utils.timezone import now
from django.test import RequestFactory
from edxval import api
from xblock.django.request import DjangoWebobRequest
from webob import Request, Response
from common.djangoapps.student.tests.factories import UserFactory
from common.test.utils import normalize_repr
from openedx.core.djangoapps.contentserver.caching import del_cached_content
from xmodule.contentstore.content import StaticContent # lint-amnesty, pylint: disable=wrong-import-order
@@ -36,6 +39,7 @@ from xmodule.x_module import STUDENT_VIEW
from .helpers import BaseTestXmodule
from .test_video_xml import SOURCE_XML
TEST_USERNAME = 'test-user'
TRANSCRIPT = {"start": [10], "end": [100], "text": ["Hi, welcome to Edx."]}
BUMPER_TRANSCRIPT = {"start": [1], "end": [10], "text": ["A bumper"]}
SRT_content = textwrap.dedent("""
@@ -574,6 +578,16 @@ class TestTranscriptDownloadDispatch(TestVideo): # lint-amnesty, pylint: disabl
assert response.headers[attribute] == value
def _create_djangowebobrequest_object_for_url(url: str) -> DjangoWebobRequest:
"""
create a DjangoWebobRequest (The type of requests used by xblocks)
with a relevant user,
"""
django_request = RequestFactory().get(url)
django_request.user = UserFactory.create(username=TEST_USERNAME)
return DjangoWebobRequest(django_request)
@ddt.ddt
class TestTranscriptTranslationGetDispatch(TestVideo): # lint-amnesty, pylint: disable=test-inherits-tests
"""
@@ -620,7 +634,7 @@ class TestTranscriptTranslationGetDispatch(TestVideo): # lint-amnesty, pylint:
)
@ddt.unpack
def test_translation_fails(self, url, dispatch, status_code):
request = Request.blank(url)
request = _create_djangowebobrequest_object_for_url(url)
response = self.block.transcript(request=request, dispatch=dispatch)
assert response.status == status_code
@@ -635,7 +649,7 @@ class TestTranscriptTranslationGetDispatch(TestVideo): # lint-amnesty, pylint:
subs_id = _get_subs_id(good_sjson.name)
attach(self.block, subs_id)
request = Request.blank(url.format(subs_id))
request = _create_djangowebobrequest_object_for_url(url.format(subs_id))
response = self.block.transcript(request=request, dispatch=dispatch)
self.assertDictEqual(json.loads(response.body.decode('utf-8')), subs)
@@ -655,12 +669,12 @@ class TestTranscriptTranslationGetDispatch(TestVideo): # lint-amnesty, pylint:
self.block.youtube_id_1_0 = subs_id
self.block.youtube_id_0_75 = '0_75'
self.store.update_item(self.block, self.user.id)
request = Request.blank(f'/translation/uk?videoId={subs_id}')
request = _create_djangowebobrequest_object_for_url(f'/translation/uk?videoId={subs_id}')
response = self.block.transcript(request=request, dispatch='translation/uk')
self.assertDictEqual(json.loads(response.body.decode('utf-8')), subs)
# 0_75 subs are exist
request = Request.blank('/translation/uk?videoId={}'.format('0_75'))
request = _create_djangowebobrequest_object_for_url('/translation/uk?videoId={}'.format('0_75'))
response = self.block.transcript(request=request, dispatch='translation/uk')
calculated_0_75 = {
'end': [75],
@@ -674,7 +688,7 @@ class TestTranscriptTranslationGetDispatch(TestVideo): # lint-amnesty, pylint:
# 1_5 will be generated from 1_0
self.block.youtube_id_1_5 = '1_5'
self.store.update_item(self.block, self.user.id)
request = Request.blank('/translation/uk?videoId={}'.format('1_5'))
request = _create_djangowebobrequest_object_for_url('/translation/uk?videoId={}'.format('1_5'))
response = self.block.transcript(request=request, dispatch='translation/uk')
calculated_1_5 = {
'end': [150],
@@ -696,7 +710,7 @@ class TestTranscriptTranslationGetDispatch(TestVideo): # lint-amnesty, pylint:
attach(self.block, subs_id)
self.store.update_item(self.block, self.user.id)
request = Request.blank(url)
request = _create_djangowebobrequest_object_for_url(url)
response = self.block.transcript(request=request, dispatch=dispatch)
self.assertDictEqual(json.loads(response.body.decode('utf-8')), TRANSCRIPT)
@@ -713,7 +727,7 @@ class TestTranscriptTranslationGetDispatch(TestVideo): # lint-amnesty, pylint:
# manually clean youtube_id_1_0, as it has default value
self.block.youtube_id_1_0 = ""
request = Request.blank('/translation/uk')
request = _create_djangowebobrequest_object_for_url('/translation/uk')
response = self.block.transcript(request=request, dispatch='translation/uk')
self.assertDictEqual(json.loads(response.body.decode('utf-8')), subs)
@@ -731,21 +745,21 @@ class TestTranscriptTranslationGetDispatch(TestVideo): # lint-amnesty, pylint:
self.block.runtime.modulestore = test_modulestore
# Test youtube style en
request = Request.blank('/translation/en?videoId=12345')
request = _create_djangowebobrequest_object_for_url('/translation/en?videoId=12345')
response = self.block.transcript(request=request, dispatch='translation/en')
assert response.status == '307 Temporary Redirect'
assert ('Location', '/static/dummy/static/subs_12345.srt.sjson') in response.headerlist
# Test HTML5 video style
self.block.sub = 'OEoXaMPEzfM'
request = Request.blank('/translation/en')
request = _create_djangowebobrequest_object_for_url('/translation/en')
response = self.block.transcript(request=request, dispatch='translation/en')
assert response.status == '307 Temporary Redirect'
assert ('Location', '/static/dummy/static/subs_OEoXaMPEzfM.srt.sjson') in response.headerlist
# Test different language to ensure we are just ignoring it since we can't
# translate with static fallback
request = Request.blank('/translation/uk')
request = _create_djangowebobrequest_object_for_url('/translation/uk')
response = self.block.transcript(request=request, dispatch='translation/uk')
assert response.status == '404 Not Found'
@@ -773,7 +787,7 @@ class TestTranscriptTranslationGetDispatch(TestVideo): # lint-amnesty, pylint:
if attach:
attach(self.block, sub)
request = Request.blank(url)
request = _create_djangowebobrequest_object_for_url(url)
response = self.block.transcript(request=request, dispatch=dispatch)
assert response.status == status_code
if sub:
@@ -788,7 +802,7 @@ class TestTranscriptTranslationGetDispatch(TestVideo): # lint-amnesty, pylint:
self._set_static_asset_path()
# When course_id is not mocked out, these values would result in 307, as tested above.
request = Request.blank('/translation/en?videoId=12345')
request = _create_djangowebobrequest_object_for_url('/translation/en?videoId=12345')
response = self.block.transcript(request=request, dispatch='translation/en')
assert response.status == '404 Not Found'
@@ -819,7 +833,8 @@ class TestTranscriptTranslationGetDispatch(TestVideo): # lint-amnesty, pylint:
mock_get_video_transcript_data.return_value = transcript
# Make request to XModule transcript handler
response = self.block.transcript(request=Request.blank('/translation/en'), dispatch='translation/en')
request = _create_djangowebobrequest_object_for_url('/translation/en')
response = self.block.transcript(request=request, dispatch='translation/en')
# Expected headers
expected_headers = {
@@ -840,7 +855,10 @@ class TestTranscriptTranslationGetDispatch(TestVideo): # lint-amnesty, pylint:
Verify that val transcript is not returned when its feature is disabled.
"""
# Make request to XModule transcript handler
response = self.block.transcript(request=Request.blank('/translation/en'), dispatch='translation/en')
response = self.block.transcript(
request=_create_djangowebobrequest_object_for_url('/translation/en'),
dispatch='translation/en'
)
# Assert the actual response
assert response.status_code == 404

View File

@@ -315,6 +315,12 @@ class VideoStudentViewHandlers:
if dispatch.startswith('translation'):
language = dispatch.replace('translation', '').strip('/')
# Because scrapers hit video blocks, verify that a user exists.
# use the _request attr to get the django request object.
if not request._request.user: # pylint: disable=protected-access
log.info("Transcript: user must be logged or public view enabled to get transcript")
return Response(status=403)
if not language:
log.info("Invalid /translation request: no language.")
return Response(status=400)
@@ -324,6 +330,7 @@ class VideoStudentViewHandlers:
return Response(status=404)
if language != self.transcript_language:
self.transcript_language = language
try: