From 7698a277cdd2e7de4ded61b67fbb7ce898b41554 Mon Sep 17 00:00:00 2001 From: Mushtaq Ali Date: Wed, 1 Aug 2018 16:15:27 +0500 Subject: [PATCH] Scrape video thumbnails from youtube util - EDUCATOR-3109 --- .../contentstore/tests/test_video_utils.py | 372 ++++++++++++++++++ cms/djangoapps/contentstore/video_utils.py | 137 +++++++ .../contentstore/views/tests/test_videos.py | 21 - cms/djangoapps/contentstore/views/videos.py | 56 +-- 4 files changed, 510 insertions(+), 76 deletions(-) create mode 100644 cms/djangoapps/contentstore/tests/test_video_utils.py create mode 100644 cms/djangoapps/contentstore/video_utils.py diff --git a/cms/djangoapps/contentstore/tests/test_video_utils.py b/cms/djangoapps/contentstore/tests/test_video_utils.py new file mode 100644 index 0000000000..d772cd0947 --- /dev/null +++ b/cms/djangoapps/contentstore/tests/test_video_utils.py @@ -0,0 +1,372 @@ +#-*- coding: utf-8 -*- +""" +Unit tests for video utils. +""" + +from unittest import TestCase +from datetime import datetime +import ddt +import pytz +import requests +from django.conf import settings +from django.core.files.uploadedfile import UploadedFile +from django.test.utils import override_settings +from edxval.api import ( + create_profile, + create_video, + get_course_video_image_url, + update_video_image +) +from openedx.core.djangoapps.profile_images.tests.helpers import make_image_file + +from mock import patch + +from contentstore.tests.utils import CourseTestCase +from contentstore.video_utils import ( + download_youtube_video_thumbnail, + scrape_youtube_thumbnail, + validate_video_image, + YOUTUBE_THUMBNAIL_SIZES +) + + +class ValidateVideoImageTestCase(TestCase): + """ + Tests for `validate_video_image` method. + """ + def test_invalid_image_file_info(self): + """ + Test that when no file information is provided to validate_video_image, it gives proper error message. + """ + error = validate_video_image({}) + self.assertEquals(error, 'The image must have name, content type, and size information.') + + def test_corrupt_image_file(self): + """ + Test that when corrupt file is provided to validate_video_image, it gives proper error message. + """ + with open(settings.MEDIA_ROOT + '/test-corrupt-image.png', 'w+') as image_file: + uploaded_image_file = UploadedFile( + image_file, + content_type='image/png', + size=settings.VIDEO_IMAGE_SETTINGS['VIDEO_IMAGE_MIN_BYTES'] + ) + error = validate_video_image(uploaded_image_file) + self.assertEquals(error, 'There is a problem with this image file. Try to upload a different file.') + + +@ddt.ddt +class ScrapeVideoThumbnailsTestCase(CourseTestCase): + """ + Test cases for scraping video thumbnails from youtube. + """ + + def setUp(self): + super(ScrapeVideoThumbnailsTestCase, self).setUp() + course_ids = [unicode(self.course.id)] + profiles = ['youtube'] + created = datetime.now(pytz.utc) + previous_uploads = [ + { + 'edx_video_id': 'test1', + 'client_video_id': 'test1.mp4', + 'duration': 42.0, + 'status': 'upload', + 'courses': course_ids, + 'encoded_videos': [], + 'created': created + }, + { + 'edx_video_id': 'test-youtube-video-1', + 'client_video_id': 'test-youtube-id.mp4', + 'duration': 128.0, + 'status': 'file_complete', + 'courses': course_ids, + 'created': created, + 'encoded_videos': [ + { + 'profile': 'youtube', + 'url': '3_yD_cEKoCk', + 'file_size': 1600, + 'bitrate': 100, + } + ], + }, + { + 'edx_video_id': 'test-youtube-video-2', + 'client_video_id': 'test-youtube-id.mp4', + 'image': 'image2.jpg', + 'duration': 128.0, + 'status': 'file_complete', + 'courses': course_ids, + 'created': created, + 'encoded_videos': [ + { + 'profile': 'youtube', + 'url': '3_yD_cEKoCk', + 'file_size': 1600, + 'bitrate': 100, + } + ], + }, + ] + for profile in profiles: + create_profile(profile) + + for video in previous_uploads: + create_video(video) + + # Create video images. + with make_image_file() as image_file: + update_video_image( + 'test-youtube-video-2', unicode(self.course.id), image_file, 'image.jpg' + ) + + def mocked_youtube_thumbnail_response( + self, + mocked_content=None, + error_response=False, + image_width=settings.VIDEO_IMAGE_MIN_WIDTH, + image_height=settings.VIDEO_IMAGE_MIN_HEIGHT + ): + """ + Returns a mocked youtube thumbnail response. + """ + image_content = '' + with make_image_file(dimensions=(image_width, image_height), ) as image_file: + image_content = image_file.read() + + if mocked_content or error_response: + image_content = mocked_content + + mocked_response = requests.Response() + mocked_response.status_code = requests.codes.ok if image_content else requests.codes.not_found # pylint: disable=no-member + mocked_response._content = image_content # pylint: disable=protected-access + mocked_response.headers = {'content-type': 'image/jpeg'} + return mocked_response + + @override_settings(AWS_ACCESS_KEY_ID='test_key_id', AWS_SECRET_ACCESS_KEY='test_secret') + @patch('requests.get') + @ddt.data( + ( + { + 'maxresdefault': 'maxresdefault-result-image-content', + 'sddefault': 'sddefault-result-image-content', + 'hqdefault': 'hqdefault-result-image-content', + 'mqdefault': 'mqdefault-result-image-content', + 'default': 'default-result-image-content' + }, + 'maxresdefault-result-image-content' + ), + ( + { + 'maxresdefault': '', + 'sddefault': 'sddefault-result-image-content', + 'hqdefault': 'hqdefault-result-image-content', + 'mqdefault': 'mqdefault-result-image-content', + 'default': 'default-result-image-content' + }, + 'sddefault-result-image-content' + ), + ( + { + 'maxresdefault': '', + 'sddefault': '', + 'hqdefault': 'hqdefault-result-image-content', + 'mqdefault': 'mqdefault-result-image-content', + 'default': 'default-result-image-content' + }, + 'hqdefault-result-image-content' + ), + ( + { + 'maxresdefault': '', + 'sddefault': '', + 'hqdefault': '', + 'mqdefault': 'mqdefault-result-image-content', + 'default': 'default-result-image-content' + }, + 'mqdefault-result-image-content' + ), + ( + { + 'maxresdefault': '', + 'sddefault': '', + 'hqdefault': '', + 'mqdefault': '', + 'default': 'default-result-image-content' + }, + 'default-result-image-content' + ), + ) + @ddt.unpack + def test_youtube_video_thumbnail_download( + self, + thumbnail_content_data, + expected_thumbnail_content, + mocked_request + ): + """ + Test that we get highest resolution video thumbnail available from youtube. + """ + # Mock get youtube thumbnail responses. + def mocked_youtube_thumbnail_responses(resolutions): + """ + Returns a list of mocked responses containing youtube thumbnails. + """ + mocked_responses = [] + for resolution in YOUTUBE_THUMBNAIL_SIZES: + mocked_content = resolutions.get(resolution, '') + error_response = False if mocked_content else True + mocked_responses.append(self.mocked_youtube_thumbnail_response(mocked_content, error_response)) + return mocked_responses + + mocked_request.side_effect = mocked_youtube_thumbnail_responses(thumbnail_content_data) + + thumbnail_content, thumbnail_content_type = download_youtube_video_thumbnail('test-yt-id') + + # Verify that we get the expected thumbnail content. + self.assertEqual(thumbnail_content, expected_thumbnail_content) + self.assertEqual(thumbnail_content_type, 'image/jpeg') + + @override_settings(AWS_ACCESS_KEY_ID='test_key_id', AWS_SECRET_ACCESS_KEY='test_secret') + @patch('requests.get') + def test_scrape_youtube_thumbnail(self, mocked_request): + """ + Test that youtube thumbnails are correctly scrapped. + """ + course_id = unicode(self.course.id) + video1_edx_video_id = 'test-youtube-video-1' + video2_edx_video_id = 'test-youtube-video-2' + + # Mock get youtube thumbnail responses. + mocked_request.side_effect = [self.mocked_youtube_thumbnail_response()] + + # Verify that video1 has no image attached. + video1_image_url = get_course_video_image_url(course_id=course_id, edx_video_id=video1_edx_video_id) + self.assertIsNone(video1_image_url) + + # Verify that video2 has already image attached. + video2_image_url = get_course_video_image_url(course_id=course_id, edx_video_id=video2_edx_video_id) + self.assertIsNotNone(video2_image_url) + + # Scrape video thumbnails. + scrape_youtube_thumbnail(course_id, video1_edx_video_id, 'test-yt-id') + scrape_youtube_thumbnail(course_id, video2_edx_video_id, 'test-yt-id2') + + # Verify that now video1 image is attached. + video1_image_url = get_course_video_image_url(course_id=course_id, edx_video_id=video1_edx_video_id) + self.assertIsNotNone(video1_image_url) + + # Also verify that video2's image is not updated. + video2_image_url_latest = get_course_video_image_url(course_id=course_id, edx_video_id=video2_edx_video_id) + self.assertEqual(video2_image_url, video2_image_url_latest) + + @ddt.data( + ( + 100, + 100, + False + ), + ( + 640, + 360, + True + ) + ) + @override_settings(AWS_ACCESS_KEY_ID='test_key_id', AWS_SECRET_ACCESS_KEY='test_secret') + @patch('contentstore.video_utils.LOGGER') + @patch('requests.get') + @ddt.unpack + def test_scrape_youtube_thumbnail_logging( + self, + image_width, + image_height, + is_success, + mocked_request, + mock_logger + ): + """ + Test that we get correct logs in case of failure as well as success. + """ + course_id = unicode(self.course.id) + video1_edx_video_id = 'test-youtube-video-1' + mocked_request.side_effect = [ + self.mocked_youtube_thumbnail_response( + image_width=image_width, + image_height=image_height + ) + ] + scrape_youtube_thumbnail(course_id, video1_edx_video_id, 'test-yt-id') + if is_success: + mock_logger.info.assert_called_with( + 'VIDEOS: Scraping youtube video thumbnail for edx_video_id [%s] in course [%s]', + video1_edx_video_id, + course_id + ) + else: + mock_logger.info.assert_called_with( + 'VIDEOS: Scraping youtube video thumbnail failed for edx_video_id [%s] in course [%s] with error: %s', + video1_edx_video_id, + course_id, + 'This image file must be larger than 2 KB.' + ) + + @ddt.data( + ( + None, + 'image/jpeg', + 'This image file must be larger than {image_min_size}.'.format( + image_min_size=settings.VIDEO_IMAGE_MIN_FILE_SIZE_KB + ) + ), + ( + 'dummy-content', + None, + 'This image file type is not supported. Supported file types are {supported_file_formats}.'.format( + supported_file_formats=settings.VIDEO_IMAGE_SUPPORTED_FILE_FORMATS.keys() + ) + ), + ( + None, + None, + 'This image file type is not supported. Supported file types are {supported_file_formats}.'.format( + supported_file_formats=settings.VIDEO_IMAGE_SUPPORTED_FILE_FORMATS.keys() + ) + ), + ) + @patch('contentstore.video_utils.LOGGER') + @patch('contentstore.video_utils.download_youtube_video_thumbnail') + @ddt.unpack + def test_no_video_thumbnail_downloaded( + self, + image_content, + image_content_type, + error_message, + mock_download_youtube_thumbnail, + mock_logger + ): + """ + Test that when no thumbnail is downloaded, video image is not updated. + """ + mock_download_youtube_thumbnail.return_value = image_content, image_content_type + course_id = unicode(self.course.id) + video1_edx_video_id = 'test-youtube-video-1' + + # Verify that video1 has no image attached. + video1_image_url = get_course_video_image_url(course_id=course_id, edx_video_id=video1_edx_video_id) + self.assertIsNone(video1_image_url) + + # Scrape video thumbnail. + scrape_youtube_thumbnail(course_id, video1_edx_video_id, 'test-yt-id') + + mock_logger.info.assert_called_with( + 'VIDEOS: Scraping youtube video thumbnail failed for edx_video_id [%s] in course [%s] with error: %s', + video1_edx_video_id, + course_id, + error_message + ) + + # Verify that no image is attached to video1. + video1_image_url = get_course_video_image_url(course_id=course_id, edx_video_id=video1_edx_video_id) + self.assertIsNone(video1_image_url) diff --git a/cms/djangoapps/contentstore/video_utils.py b/cms/djangoapps/contentstore/video_utils.py new file mode 100644 index 0000000000..1021c410b7 --- /dev/null +++ b/cms/djangoapps/contentstore/video_utils.py @@ -0,0 +1,137 @@ +#-*- coding: utf-8 -*- +""" +Utils related to the videos. +""" +import logging +from urlparse import urljoin +import requests + +from django.conf import settings +from django.core.files.images import get_image_dimensions +from django.core.files.uploadedfile import SimpleUploadedFile +from django.utils.translation import ugettext as _ +from edxval.api import get_course_video_image_url, update_video_image + + +# Youtube thumbnail sizes. +# https://img.youtube.com/vi/{youtube_id}/{thumbnail_quality}.jpg +# High Quality Thumbnail - hqdefault (480x360 pixels) +# Medium Quality Thumbnail - mqdefault (320x180 pixels) +# Normal Quality Thumbnail - default (120x90 pixels) +# And additionally, the next two thumbnails may or may not exist. For HQ videos they exist. +# Standard Definition Thumbnail - sddefault (640x480 pixels) +# Maximum Resolution Thumbnail - maxresdefault (1920x1080 pixels) +YOUTUBE_THUMBNAIL_SIZES = ['maxresdefault', 'sddefault', 'hqdefault', 'mqdefault', 'default'] + +LOGGER = logging.getLogger(__name__) + + +def validate_video_image(image_file, skip_aspect_ratio=False): + """ + Validates video image file. + + Arguments: + image_file: The selected image file. + + Returns: + error (String or None): If there is error returns error message otherwise None. + """ + error = None + + if not all(hasattr(image_file, attr) for attr in ['name', 'content_type', 'size']): + error = _('The image must have name, content type, and size information.') + elif image_file.content_type not in settings.VIDEO_IMAGE_SUPPORTED_FILE_FORMATS.values(): + error = _('This image file type is not supported. Supported file types are {supported_file_formats}.').format( + supported_file_formats=settings.VIDEO_IMAGE_SUPPORTED_FILE_FORMATS.keys() + ) + elif image_file.size > settings.VIDEO_IMAGE_SETTINGS['VIDEO_IMAGE_MAX_BYTES']: + error = _('This image file must be smaller than {image_max_size}.').format( + image_max_size=settings.VIDEO_IMAGE_MAX_FILE_SIZE_MB + ) + elif image_file.size < settings.VIDEO_IMAGE_SETTINGS['VIDEO_IMAGE_MIN_BYTES']: + error = _('This image file must be larger than {image_min_size}.').format( + image_min_size=settings.VIDEO_IMAGE_MIN_FILE_SIZE_KB + ) + else: + try: + image_file_width, image_file_height = get_image_dimensions(image_file) + except TypeError: + return _('There is a problem with this image file. Try to upload a different file.') + if image_file_width is None or image_file_height is None: + return _('There is a problem with this image file. Try to upload a different file.') + image_file_aspect_ratio = abs(image_file_width / float(image_file_height) - settings.VIDEO_IMAGE_ASPECT_RATIO) + if image_file_width < settings.VIDEO_IMAGE_MIN_WIDTH or image_file_height < settings.VIDEO_IMAGE_MIN_HEIGHT: + error = _('Recommended image resolution is {image_file_max_width}x{image_file_max_height}. ' + 'The minimum resolution is {image_file_min_width}x{image_file_min_height}.').format( + image_file_max_width=settings.VIDEO_IMAGE_MAX_WIDTH, + image_file_max_height=settings.VIDEO_IMAGE_MAX_HEIGHT, + image_file_min_width=settings.VIDEO_IMAGE_MIN_WIDTH, + image_file_min_height=settings.VIDEO_IMAGE_MIN_HEIGHT + ) + elif not skip_aspect_ratio and image_file_aspect_ratio > settings.VIDEO_IMAGE_ASPECT_RATIO_ERROR_MARGIN: + error = _('This image file must have an aspect ratio of {video_image_aspect_ratio_text}.').format( + video_image_aspect_ratio_text=settings.VIDEO_IMAGE_ASPECT_RATIO_TEXT + ) + else: + try: + image_file.name.encode('ascii') + except UnicodeEncodeError: + error = _('The image file name can only contain letters, numbers, hyphens (-), and underscores (_).') + return error + + +def download_youtube_video_thumbnail(youtube_id): + """ + Download highest resoultion video thumbnail available from youtube. + """ + thumbnail_content = thumbnail_content_type = None + # Download highest resoultion thumbnail available. + for thumbnail_quality in YOUTUBE_THUMBNAIL_SIZES: + thumbnail_url = urljoin('https://img.youtube.com', '/vi/{youtube_id}/{thumbnail_quality}.jpg'.format( + youtube_id=youtube_id, thumbnail_quality=thumbnail_quality + )) + response = requests.get(thumbnail_url) + if response.status_code == requests.codes.ok: # pylint: disable=no-member + thumbnail_content = response.content + thumbnail_content_type = response.headers['content-type'] + # If best available resolution is found, skip looking for lower resolutions. + break + return thumbnail_content, thumbnail_content_type + + +def validate_and_update_video_image(course_key_string, edx_video_id, image_file, image_filename): + """ + Validates image content and updates video image. + """ + error = validate_video_image(image_file, skip_aspect_ratio=True) + if error: + LOGGER.info( + 'VIDEOS: Scraping youtube video thumbnail failed for edx_video_id [%s] in course [%s] with error: %s', + edx_video_id, + course_key_string, + error + ) + return + + update_video_image(edx_video_id, course_key_string, image_file, image_filename) + LOGGER.info( + 'VIDEOS: Scraping youtube video thumbnail for edx_video_id [%s] in course [%s]', edx_video_id, course_key_string + ) + + +def scrape_youtube_thumbnail(course_id, edx_video_id, youtube_id): + """ + Scrapes youtube thumbnail for a given video. + """ + # Scrape when course video image does not exist for edx_video_id. + if not get_course_video_image_url(course_id, edx_video_id): + thumbnail_content, thumbnail_content_type = download_youtube_video_thumbnail(youtube_id) + supported_content_types = {v: k for k, v in settings.VIDEO_IMAGE_SUPPORTED_FILE_FORMATS.iteritems()} + image_filename = '{youtube_id}{image_extention}'.format( + youtube_id=youtube_id, + image_extention=supported_content_types.get( + thumbnail_content_type, supported_content_types['image/jpeg'] + ) + ) + image_file = SimpleUploadedFile(image_filename, thumbnail_content, thumbnail_content_type) + validate_and_update_video_image(course_id, edx_video_id, image_file, image_filename) diff --git a/cms/djangoapps/contentstore/views/tests/test_videos.py b/cms/djangoapps/contentstore/views/tests/test_videos.py index 94136bf3bf..4a6176baa8 100644 --- a/cms/djangoapps/contentstore/views/tests/test_videos.py +++ b/cms/djangoapps/contentstore/views/tests/test_videos.py @@ -29,7 +29,6 @@ from contentstore.tests.utils import CourseTestCase from contentstore.utils import reverse_course_url from contentstore.views.videos import ( _get_default_video_image_url, - validate_video_image, VIDEO_IMAGE_UPLOAD_ENABLED, WAFFLE_SWITCHES, TranscriptProvider @@ -784,26 +783,6 @@ class VideoImageTestCase(VideoUploadTestBase, CourseTestCase): response = self.client.post(video_image_upload_url, {}) self.verify_error_message(response, 'An image file is required.') - def test_invalid_image_file_info(self): - """ - Test that when no file information is provided to validate_video_image, it gives proper error message. - """ - error = validate_video_image({}) - self.assertEquals(error, 'The image must have name, content type, and size information.') - - def test_corrupt_image_file(self): - """ - Test that when corrupt file is provided to validate_video_image, it gives proper error message. - """ - with open(settings.MEDIA_ROOT + '/test-corrupt-image.png', 'w+') as file: - image_file = UploadedFile( - file, - content_type='image/png', - size=settings.VIDEO_IMAGE_SETTINGS['VIDEO_IMAGE_MIN_BYTES'] - ) - error = validate_video_image(image_file) - self.assertEquals(error, 'There is a problem with this image file. Try to upload a different file.') - @override_switch(VIDEO_IMAGE_UPLOAD_ENABLED, True) def test_no_video_image(self): """ diff --git a/cms/djangoapps/contentstore/views/videos.py b/cms/djangoapps/contentstore/views/videos.py index 102f699e00..da8a065982 100644 --- a/cms/djangoapps/contentstore/views/videos.py +++ b/cms/djangoapps/contentstore/views/videos.py @@ -14,7 +14,6 @@ from boto import s3 from django.conf import settings from django.contrib.auth.decorators import login_required from django.contrib.staticfiles.storage import staticfiles_storage -from django.core.files.images import get_image_dimensions from django.urls import reverse from django.http import HttpResponse, HttpResponseNotFound from django.utils.translation import ugettext as _ @@ -40,6 +39,7 @@ from xmodule.video_module.transcripts_utils import Transcript from contentstore.models import VideoUploadConfig from contentstore.utils import reverse_course_url +from contentstore.video_utils import validate_video_image from edxmako.shortcuts import render_to_response from openedx.core.djangoapps.video_config.models import VideoTranscriptEnabledFlag from openedx.core.djangoapps.waffle_utils import WaffleSwitchNamespace @@ -187,60 +187,6 @@ def videos_handler(request, course_key_string, edx_video_id=None): return videos_post(course, request) -def validate_video_image(image_file): - """ - Validates video image file. - - Arguments: - image_file: The selected image file. - - Returns: - error (String or None): If there is error returns error message otherwise None. - """ - error = None - - if not all(hasattr(image_file, attr) for attr in ['name', 'content_type', 'size']): - error = _('The image must have name, content type, and size information.') - elif image_file.content_type not in settings.VIDEO_IMAGE_SUPPORTED_FILE_FORMATS.values(): - error = _('This image file type is not supported. Supported file types are {supported_file_formats}.').format( - supported_file_formats=settings.VIDEO_IMAGE_SUPPORTED_FILE_FORMATS.keys() - ) - elif image_file.size > settings.VIDEO_IMAGE_SETTINGS['VIDEO_IMAGE_MAX_BYTES']: - error = _('This image file must be smaller than {image_max_size}.').format( - image_max_size=settings.VIDEO_IMAGE_MAX_FILE_SIZE_MB - ) - elif image_file.size < settings.VIDEO_IMAGE_SETTINGS['VIDEO_IMAGE_MIN_BYTES']: - error = _('This image file must be larger than {image_min_size}.').format( - image_min_size=settings.VIDEO_IMAGE_MIN_FILE_SIZE_KB - ) - else: - try: - image_file_width, image_file_height = get_image_dimensions(image_file) - except TypeError: - return _('There is a problem with this image file. Try to upload a different file.') - if image_file_width is None or image_file_height is None: - return _('There is a problem with this image file. Try to upload a different file.') - image_file_aspect_ratio = abs(image_file_width / float(image_file_height) - settings.VIDEO_IMAGE_ASPECT_RATIO) - if image_file_width < settings.VIDEO_IMAGE_MIN_WIDTH or image_file_height < settings.VIDEO_IMAGE_MIN_HEIGHT: - error = _('Recommended image resolution is {image_file_max_width}x{image_file_max_height}. ' - 'The minimum resolution is {image_file_min_width}x{image_file_min_height}.').format( - image_file_max_width=settings.VIDEO_IMAGE_MAX_WIDTH, - image_file_max_height=settings.VIDEO_IMAGE_MAX_HEIGHT, - image_file_min_width=settings.VIDEO_IMAGE_MIN_WIDTH, - image_file_min_height=settings.VIDEO_IMAGE_MIN_HEIGHT - ) - elif image_file_aspect_ratio > settings.VIDEO_IMAGE_ASPECT_RATIO_ERROR_MARGIN: - error = _('This image file must have an aspect ratio of {video_image_aspect_ratio_text}.').format( - video_image_aspect_ratio_text=settings.VIDEO_IMAGE_ASPECT_RATIO_TEXT - ) - else: - try: - image_file.name.encode('ascii') - except UnicodeEncodeError: - error = _('The image file name can only contain letters, numbers, hyphens (-), and underscores (_).') - return error - - @expect_json @login_required @require_POST