Scrape video thumbnails from youtube util - EDUCATOR-3109

This commit is contained in:
Mushtaq Ali
2018-08-01 16:15:27 +05:00
committed by M. Rehan
parent 565670323d
commit 7698a277cd
4 changed files with 510 additions and 76 deletions

View File

@@ -0,0 +1,372 @@
#-*- coding: utf-8 -*-
"""
Unit tests for video utils.
"""
from unittest import TestCase
from datetime import datetime
import ddt
import pytz
import requests
from django.conf import settings
from django.core.files.uploadedfile import UploadedFile
from django.test.utils import override_settings
from edxval.api import (
create_profile,
create_video,
get_course_video_image_url,
update_video_image
)
from openedx.core.djangoapps.profile_images.tests.helpers import make_image_file
from mock import patch
from contentstore.tests.utils import CourseTestCase
from contentstore.video_utils import (
download_youtube_video_thumbnail,
scrape_youtube_thumbnail,
validate_video_image,
YOUTUBE_THUMBNAIL_SIZES
)
class ValidateVideoImageTestCase(TestCase):
"""
Tests for `validate_video_image` method.
"""
def test_invalid_image_file_info(self):
"""
Test that when no file information is provided to validate_video_image, it gives proper error message.
"""
error = validate_video_image({})
self.assertEquals(error, 'The image must have name, content type, and size information.')
def test_corrupt_image_file(self):
"""
Test that when corrupt file is provided to validate_video_image, it gives proper error message.
"""
with open(settings.MEDIA_ROOT + '/test-corrupt-image.png', 'w+') as image_file:
uploaded_image_file = UploadedFile(
image_file,
content_type='image/png',
size=settings.VIDEO_IMAGE_SETTINGS['VIDEO_IMAGE_MIN_BYTES']
)
error = validate_video_image(uploaded_image_file)
self.assertEquals(error, 'There is a problem with this image file. Try to upload a different file.')
@ddt.ddt
class ScrapeVideoThumbnailsTestCase(CourseTestCase):
"""
Test cases for scraping video thumbnails from youtube.
"""
def setUp(self):
super(ScrapeVideoThumbnailsTestCase, self).setUp()
course_ids = [unicode(self.course.id)]
profiles = ['youtube']
created = datetime.now(pytz.utc)
previous_uploads = [
{
'edx_video_id': 'test1',
'client_video_id': 'test1.mp4',
'duration': 42.0,
'status': 'upload',
'courses': course_ids,
'encoded_videos': [],
'created': created
},
{
'edx_video_id': 'test-youtube-video-1',
'client_video_id': 'test-youtube-id.mp4',
'duration': 128.0,
'status': 'file_complete',
'courses': course_ids,
'created': created,
'encoded_videos': [
{
'profile': 'youtube',
'url': '3_yD_cEKoCk',
'file_size': 1600,
'bitrate': 100,
}
],
},
{
'edx_video_id': 'test-youtube-video-2',
'client_video_id': 'test-youtube-id.mp4',
'image': 'image2.jpg',
'duration': 128.0,
'status': 'file_complete',
'courses': course_ids,
'created': created,
'encoded_videos': [
{
'profile': 'youtube',
'url': '3_yD_cEKoCk',
'file_size': 1600,
'bitrate': 100,
}
],
},
]
for profile in profiles:
create_profile(profile)
for video in previous_uploads:
create_video(video)
# Create video images.
with make_image_file() as image_file:
update_video_image(
'test-youtube-video-2', unicode(self.course.id), image_file, 'image.jpg'
)
def mocked_youtube_thumbnail_response(
self,
mocked_content=None,
error_response=False,
image_width=settings.VIDEO_IMAGE_MIN_WIDTH,
image_height=settings.VIDEO_IMAGE_MIN_HEIGHT
):
"""
Returns a mocked youtube thumbnail response.
"""
image_content = ''
with make_image_file(dimensions=(image_width, image_height), ) as image_file:
image_content = image_file.read()
if mocked_content or error_response:
image_content = mocked_content
mocked_response = requests.Response()
mocked_response.status_code = requests.codes.ok if image_content else requests.codes.not_found # pylint: disable=no-member
mocked_response._content = image_content # pylint: disable=protected-access
mocked_response.headers = {'content-type': 'image/jpeg'}
return mocked_response
@override_settings(AWS_ACCESS_KEY_ID='test_key_id', AWS_SECRET_ACCESS_KEY='test_secret')
@patch('requests.get')
@ddt.data(
(
{
'maxresdefault': 'maxresdefault-result-image-content',
'sddefault': 'sddefault-result-image-content',
'hqdefault': 'hqdefault-result-image-content',
'mqdefault': 'mqdefault-result-image-content',
'default': 'default-result-image-content'
},
'maxresdefault-result-image-content'
),
(
{
'maxresdefault': '',
'sddefault': 'sddefault-result-image-content',
'hqdefault': 'hqdefault-result-image-content',
'mqdefault': 'mqdefault-result-image-content',
'default': 'default-result-image-content'
},
'sddefault-result-image-content'
),
(
{
'maxresdefault': '',
'sddefault': '',
'hqdefault': 'hqdefault-result-image-content',
'mqdefault': 'mqdefault-result-image-content',
'default': 'default-result-image-content'
},
'hqdefault-result-image-content'
),
(
{
'maxresdefault': '',
'sddefault': '',
'hqdefault': '',
'mqdefault': 'mqdefault-result-image-content',
'default': 'default-result-image-content'
},
'mqdefault-result-image-content'
),
(
{
'maxresdefault': '',
'sddefault': '',
'hqdefault': '',
'mqdefault': '',
'default': 'default-result-image-content'
},
'default-result-image-content'
),
)
@ddt.unpack
def test_youtube_video_thumbnail_download(
self,
thumbnail_content_data,
expected_thumbnail_content,
mocked_request
):
"""
Test that we get highest resolution video thumbnail available from youtube.
"""
# Mock get youtube thumbnail responses.
def mocked_youtube_thumbnail_responses(resolutions):
"""
Returns a list of mocked responses containing youtube thumbnails.
"""
mocked_responses = []
for resolution in YOUTUBE_THUMBNAIL_SIZES:
mocked_content = resolutions.get(resolution, '')
error_response = False if mocked_content else True
mocked_responses.append(self.mocked_youtube_thumbnail_response(mocked_content, error_response))
return mocked_responses
mocked_request.side_effect = mocked_youtube_thumbnail_responses(thumbnail_content_data)
thumbnail_content, thumbnail_content_type = download_youtube_video_thumbnail('test-yt-id')
# Verify that we get the expected thumbnail content.
self.assertEqual(thumbnail_content, expected_thumbnail_content)
self.assertEqual(thumbnail_content_type, 'image/jpeg')
@override_settings(AWS_ACCESS_KEY_ID='test_key_id', AWS_SECRET_ACCESS_KEY='test_secret')
@patch('requests.get')
def test_scrape_youtube_thumbnail(self, mocked_request):
"""
Test that youtube thumbnails are correctly scrapped.
"""
course_id = unicode(self.course.id)
video1_edx_video_id = 'test-youtube-video-1'
video2_edx_video_id = 'test-youtube-video-2'
# Mock get youtube thumbnail responses.
mocked_request.side_effect = [self.mocked_youtube_thumbnail_response()]
# Verify that video1 has no image attached.
video1_image_url = get_course_video_image_url(course_id=course_id, edx_video_id=video1_edx_video_id)
self.assertIsNone(video1_image_url)
# Verify that video2 has already image attached.
video2_image_url = get_course_video_image_url(course_id=course_id, edx_video_id=video2_edx_video_id)
self.assertIsNotNone(video2_image_url)
# Scrape video thumbnails.
scrape_youtube_thumbnail(course_id, video1_edx_video_id, 'test-yt-id')
scrape_youtube_thumbnail(course_id, video2_edx_video_id, 'test-yt-id2')
# Verify that now video1 image is attached.
video1_image_url = get_course_video_image_url(course_id=course_id, edx_video_id=video1_edx_video_id)
self.assertIsNotNone(video1_image_url)
# Also verify that video2's image is not updated.
video2_image_url_latest = get_course_video_image_url(course_id=course_id, edx_video_id=video2_edx_video_id)
self.assertEqual(video2_image_url, video2_image_url_latest)
@ddt.data(
(
100,
100,
False
),
(
640,
360,
True
)
)
@override_settings(AWS_ACCESS_KEY_ID='test_key_id', AWS_SECRET_ACCESS_KEY='test_secret')
@patch('contentstore.video_utils.LOGGER')
@patch('requests.get')
@ddt.unpack
def test_scrape_youtube_thumbnail_logging(
self,
image_width,
image_height,
is_success,
mocked_request,
mock_logger
):
"""
Test that we get correct logs in case of failure as well as success.
"""
course_id = unicode(self.course.id)
video1_edx_video_id = 'test-youtube-video-1'
mocked_request.side_effect = [
self.mocked_youtube_thumbnail_response(
image_width=image_width,
image_height=image_height
)
]
scrape_youtube_thumbnail(course_id, video1_edx_video_id, 'test-yt-id')
if is_success:
mock_logger.info.assert_called_with(
'VIDEOS: Scraping youtube video thumbnail for edx_video_id [%s] in course [%s]',
video1_edx_video_id,
course_id
)
else:
mock_logger.info.assert_called_with(
'VIDEOS: Scraping youtube video thumbnail failed for edx_video_id [%s] in course [%s] with error: %s',
video1_edx_video_id,
course_id,
'This image file must be larger than 2 KB.'
)
@ddt.data(
(
None,
'image/jpeg',
'This image file must be larger than {image_min_size}.'.format(
image_min_size=settings.VIDEO_IMAGE_MIN_FILE_SIZE_KB
)
),
(
'dummy-content',
None,
'This image file type is not supported. Supported file types are {supported_file_formats}.'.format(
supported_file_formats=settings.VIDEO_IMAGE_SUPPORTED_FILE_FORMATS.keys()
)
),
(
None,
None,
'This image file type is not supported. Supported file types are {supported_file_formats}.'.format(
supported_file_formats=settings.VIDEO_IMAGE_SUPPORTED_FILE_FORMATS.keys()
)
),
)
@patch('contentstore.video_utils.LOGGER')
@patch('contentstore.video_utils.download_youtube_video_thumbnail')
@ddt.unpack
def test_no_video_thumbnail_downloaded(
self,
image_content,
image_content_type,
error_message,
mock_download_youtube_thumbnail,
mock_logger
):
"""
Test that when no thumbnail is downloaded, video image is not updated.
"""
mock_download_youtube_thumbnail.return_value = image_content, image_content_type
course_id = unicode(self.course.id)
video1_edx_video_id = 'test-youtube-video-1'
# Verify that video1 has no image attached.
video1_image_url = get_course_video_image_url(course_id=course_id, edx_video_id=video1_edx_video_id)
self.assertIsNone(video1_image_url)
# Scrape video thumbnail.
scrape_youtube_thumbnail(course_id, video1_edx_video_id, 'test-yt-id')
mock_logger.info.assert_called_with(
'VIDEOS: Scraping youtube video thumbnail failed for edx_video_id [%s] in course [%s] with error: %s',
video1_edx_video_id,
course_id,
error_message
)
# Verify that no image is attached to video1.
video1_image_url = get_course_video_image_url(course_id=course_id, edx_video_id=video1_edx_video_id)
self.assertIsNone(video1_image_url)

View File

@@ -0,0 +1,137 @@
#-*- coding: utf-8 -*-
"""
Utils related to the videos.
"""
import logging
from urlparse import urljoin
import requests
from django.conf import settings
from django.core.files.images import get_image_dimensions
from django.core.files.uploadedfile import SimpleUploadedFile
from django.utils.translation import ugettext as _
from edxval.api import get_course_video_image_url, update_video_image
# Youtube thumbnail sizes.
# https://img.youtube.com/vi/{youtube_id}/{thumbnail_quality}.jpg
# High Quality Thumbnail - hqdefault (480x360 pixels)
# Medium Quality Thumbnail - mqdefault (320x180 pixels)
# Normal Quality Thumbnail - default (120x90 pixels)
# And additionally, the next two thumbnails may or may not exist. For HQ videos they exist.
# Standard Definition Thumbnail - sddefault (640x480 pixels)
# Maximum Resolution Thumbnail - maxresdefault (1920x1080 pixels)
YOUTUBE_THUMBNAIL_SIZES = ['maxresdefault', 'sddefault', 'hqdefault', 'mqdefault', 'default']
LOGGER = logging.getLogger(__name__)
def validate_video_image(image_file, skip_aspect_ratio=False):
"""
Validates video image file.
Arguments:
image_file: The selected image file.
Returns:
error (String or None): If there is error returns error message otherwise None.
"""
error = None
if not all(hasattr(image_file, attr) for attr in ['name', 'content_type', 'size']):
error = _('The image must have name, content type, and size information.')
elif image_file.content_type not in settings.VIDEO_IMAGE_SUPPORTED_FILE_FORMATS.values():
error = _('This image file type is not supported. Supported file types are {supported_file_formats}.').format(
supported_file_formats=settings.VIDEO_IMAGE_SUPPORTED_FILE_FORMATS.keys()
)
elif image_file.size > settings.VIDEO_IMAGE_SETTINGS['VIDEO_IMAGE_MAX_BYTES']:
error = _('This image file must be smaller than {image_max_size}.').format(
image_max_size=settings.VIDEO_IMAGE_MAX_FILE_SIZE_MB
)
elif image_file.size < settings.VIDEO_IMAGE_SETTINGS['VIDEO_IMAGE_MIN_BYTES']:
error = _('This image file must be larger than {image_min_size}.').format(
image_min_size=settings.VIDEO_IMAGE_MIN_FILE_SIZE_KB
)
else:
try:
image_file_width, image_file_height = get_image_dimensions(image_file)
except TypeError:
return _('There is a problem with this image file. Try to upload a different file.')
if image_file_width is None or image_file_height is None:
return _('There is a problem with this image file. Try to upload a different file.')
image_file_aspect_ratio = abs(image_file_width / float(image_file_height) - settings.VIDEO_IMAGE_ASPECT_RATIO)
if image_file_width < settings.VIDEO_IMAGE_MIN_WIDTH or image_file_height < settings.VIDEO_IMAGE_MIN_HEIGHT:
error = _('Recommended image resolution is {image_file_max_width}x{image_file_max_height}. '
'The minimum resolution is {image_file_min_width}x{image_file_min_height}.').format(
image_file_max_width=settings.VIDEO_IMAGE_MAX_WIDTH,
image_file_max_height=settings.VIDEO_IMAGE_MAX_HEIGHT,
image_file_min_width=settings.VIDEO_IMAGE_MIN_WIDTH,
image_file_min_height=settings.VIDEO_IMAGE_MIN_HEIGHT
)
elif not skip_aspect_ratio and image_file_aspect_ratio > settings.VIDEO_IMAGE_ASPECT_RATIO_ERROR_MARGIN:
error = _('This image file must have an aspect ratio of {video_image_aspect_ratio_text}.').format(
video_image_aspect_ratio_text=settings.VIDEO_IMAGE_ASPECT_RATIO_TEXT
)
else:
try:
image_file.name.encode('ascii')
except UnicodeEncodeError:
error = _('The image file name can only contain letters, numbers, hyphens (-), and underscores (_).')
return error
def download_youtube_video_thumbnail(youtube_id):
"""
Download highest resoultion video thumbnail available from youtube.
"""
thumbnail_content = thumbnail_content_type = None
# Download highest resoultion thumbnail available.
for thumbnail_quality in YOUTUBE_THUMBNAIL_SIZES:
thumbnail_url = urljoin('https://img.youtube.com', '/vi/{youtube_id}/{thumbnail_quality}.jpg'.format(
youtube_id=youtube_id, thumbnail_quality=thumbnail_quality
))
response = requests.get(thumbnail_url)
if response.status_code == requests.codes.ok: # pylint: disable=no-member
thumbnail_content = response.content
thumbnail_content_type = response.headers['content-type']
# If best available resolution is found, skip looking for lower resolutions.
break
return thumbnail_content, thumbnail_content_type
def validate_and_update_video_image(course_key_string, edx_video_id, image_file, image_filename):
"""
Validates image content and updates video image.
"""
error = validate_video_image(image_file, skip_aspect_ratio=True)
if error:
LOGGER.info(
'VIDEOS: Scraping youtube video thumbnail failed for edx_video_id [%s] in course [%s] with error: %s',
edx_video_id,
course_key_string,
error
)
return
update_video_image(edx_video_id, course_key_string, image_file, image_filename)
LOGGER.info(
'VIDEOS: Scraping youtube video thumbnail for edx_video_id [%s] in course [%s]', edx_video_id, course_key_string
)
def scrape_youtube_thumbnail(course_id, edx_video_id, youtube_id):
"""
Scrapes youtube thumbnail for a given video.
"""
# Scrape when course video image does not exist for edx_video_id.
if not get_course_video_image_url(course_id, edx_video_id):
thumbnail_content, thumbnail_content_type = download_youtube_video_thumbnail(youtube_id)
supported_content_types = {v: k for k, v in settings.VIDEO_IMAGE_SUPPORTED_FILE_FORMATS.iteritems()}
image_filename = '{youtube_id}{image_extention}'.format(
youtube_id=youtube_id,
image_extention=supported_content_types.get(
thumbnail_content_type, supported_content_types['image/jpeg']
)
)
image_file = SimpleUploadedFile(image_filename, thumbnail_content, thumbnail_content_type)
validate_and_update_video_image(course_id, edx_video_id, image_file, image_filename)

View File

@@ -29,7 +29,6 @@ from contentstore.tests.utils import CourseTestCase
from contentstore.utils import reverse_course_url
from contentstore.views.videos import (
_get_default_video_image_url,
validate_video_image,
VIDEO_IMAGE_UPLOAD_ENABLED,
WAFFLE_SWITCHES,
TranscriptProvider
@@ -784,26 +783,6 @@ class VideoImageTestCase(VideoUploadTestBase, CourseTestCase):
response = self.client.post(video_image_upload_url, {})
self.verify_error_message(response, 'An image file is required.')
def test_invalid_image_file_info(self):
"""
Test that when no file information is provided to validate_video_image, it gives proper error message.
"""
error = validate_video_image({})
self.assertEquals(error, 'The image must have name, content type, and size information.')
def test_corrupt_image_file(self):
"""
Test that when corrupt file is provided to validate_video_image, it gives proper error message.
"""
with open(settings.MEDIA_ROOT + '/test-corrupt-image.png', 'w+') as file:
image_file = UploadedFile(
file,
content_type='image/png',
size=settings.VIDEO_IMAGE_SETTINGS['VIDEO_IMAGE_MIN_BYTES']
)
error = validate_video_image(image_file)
self.assertEquals(error, 'There is a problem with this image file. Try to upload a different file.')
@override_switch(VIDEO_IMAGE_UPLOAD_ENABLED, True)
def test_no_video_image(self):
"""

View File

@@ -14,7 +14,6 @@ from boto import s3
from django.conf import settings
from django.contrib.auth.decorators import login_required
from django.contrib.staticfiles.storage import staticfiles_storage
from django.core.files.images import get_image_dimensions
from django.urls import reverse
from django.http import HttpResponse, HttpResponseNotFound
from django.utils.translation import ugettext as _
@@ -40,6 +39,7 @@ from xmodule.video_module.transcripts_utils import Transcript
from contentstore.models import VideoUploadConfig
from contentstore.utils import reverse_course_url
from contentstore.video_utils import validate_video_image
from edxmako.shortcuts import render_to_response
from openedx.core.djangoapps.video_config.models import VideoTranscriptEnabledFlag
from openedx.core.djangoapps.waffle_utils import WaffleSwitchNamespace
@@ -187,60 +187,6 @@ def videos_handler(request, course_key_string, edx_video_id=None):
return videos_post(course, request)
def validate_video_image(image_file):
"""
Validates video image file.
Arguments:
image_file: The selected image file.
Returns:
error (String or None): If there is error returns error message otherwise None.
"""
error = None
if not all(hasattr(image_file, attr) for attr in ['name', 'content_type', 'size']):
error = _('The image must have name, content type, and size information.')
elif image_file.content_type not in settings.VIDEO_IMAGE_SUPPORTED_FILE_FORMATS.values():
error = _('This image file type is not supported. Supported file types are {supported_file_formats}.').format(
supported_file_formats=settings.VIDEO_IMAGE_SUPPORTED_FILE_FORMATS.keys()
)
elif image_file.size > settings.VIDEO_IMAGE_SETTINGS['VIDEO_IMAGE_MAX_BYTES']:
error = _('This image file must be smaller than {image_max_size}.').format(
image_max_size=settings.VIDEO_IMAGE_MAX_FILE_SIZE_MB
)
elif image_file.size < settings.VIDEO_IMAGE_SETTINGS['VIDEO_IMAGE_MIN_BYTES']:
error = _('This image file must be larger than {image_min_size}.').format(
image_min_size=settings.VIDEO_IMAGE_MIN_FILE_SIZE_KB
)
else:
try:
image_file_width, image_file_height = get_image_dimensions(image_file)
except TypeError:
return _('There is a problem with this image file. Try to upload a different file.')
if image_file_width is None or image_file_height is None:
return _('There is a problem with this image file. Try to upload a different file.')
image_file_aspect_ratio = abs(image_file_width / float(image_file_height) - settings.VIDEO_IMAGE_ASPECT_RATIO)
if image_file_width < settings.VIDEO_IMAGE_MIN_WIDTH or image_file_height < settings.VIDEO_IMAGE_MIN_HEIGHT:
error = _('Recommended image resolution is {image_file_max_width}x{image_file_max_height}. '
'The minimum resolution is {image_file_min_width}x{image_file_min_height}.').format(
image_file_max_width=settings.VIDEO_IMAGE_MAX_WIDTH,
image_file_max_height=settings.VIDEO_IMAGE_MAX_HEIGHT,
image_file_min_width=settings.VIDEO_IMAGE_MIN_WIDTH,
image_file_min_height=settings.VIDEO_IMAGE_MIN_HEIGHT
)
elif image_file_aspect_ratio > settings.VIDEO_IMAGE_ASPECT_RATIO_ERROR_MARGIN:
error = _('This image file must have an aspect ratio of {video_image_aspect_ratio_text}.').format(
video_image_aspect_ratio_text=settings.VIDEO_IMAGE_ASPECT_RATIO_TEXT
)
else:
try:
image_file.name.encode('ascii')
except UnicodeEncodeError:
error = _('The image file name can only contain letters, numbers, hyphens (-), and underscores (_).')
return error
@expect_json
@login_required
@require_POST