262 lines
7.8 KiB
Python
262 lines
7.8 KiB
Python
"""
|
|
This contains functions and classes used to evaluate if images are acceptable (do not show improper content, etc), and
|
|
to send them to S3.
|
|
"""
|
|
|
|
try:
|
|
from PIL import Image
|
|
ENABLE_PIL = True
|
|
except:
|
|
ENABLE_PIL = False
|
|
|
|
from urlparse import urlparse
|
|
import requests
|
|
from boto.s3.connection import S3Connection
|
|
from boto.s3.key import Key
|
|
from django.conf import settings
|
|
import pickle
|
|
import logging
|
|
import re
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
#Domains where any image linked to can be trusted to have acceptable content.
|
|
TRUSTED_IMAGE_DOMAINS = [
|
|
'wikipedia',
|
|
'edxuploads.s3.amazonaws.com',
|
|
'wikimedia',
|
|
]
|
|
|
|
#Suffixes that are allowed in image urls
|
|
ALLOWABLE_IMAGE_SUFFIXES = [
|
|
'jpg',
|
|
'png',
|
|
'gif',
|
|
'jpeg'
|
|
]
|
|
|
|
#Maximum allowed dimensions (x and y) for an uploaded image
|
|
MAX_ALLOWED_IMAGE_DIM = 1000
|
|
|
|
#Dimensions to which image is resized before it is evaluated for color count, etc
|
|
MAX_IMAGE_DIM = 150
|
|
|
|
#Maximum number of colors that should be counted in ImageProperties
|
|
MAX_COLORS_TO_COUNT = 16
|
|
|
|
#Maximum number of colors allowed in an uploaded image
|
|
MAX_COLORS = 400
|
|
|
|
class ImageProperties(object):
|
|
"""
|
|
Class to check properties of an image and to validate if they are allowed.
|
|
"""
|
|
def __init__(self, image_data):
|
|
"""
|
|
Initializes class variables
|
|
@param image: Image object (from PIL)
|
|
@return: None
|
|
"""
|
|
self.image = Image.open(image_data)
|
|
image_size = self.image.size
|
|
self.image_too_large = False
|
|
if image_size[0] > MAX_ALLOWED_IMAGE_DIM or image_size[1] > MAX_ALLOWED_IMAGE_DIM:
|
|
self.image_too_large = True
|
|
if image_size[0] > MAX_IMAGE_DIM or image_size[1] > MAX_IMAGE_DIM:
|
|
self.image = self.image.resize((MAX_IMAGE_DIM, MAX_IMAGE_DIM))
|
|
self.image_size = self.image.size
|
|
|
|
def count_colors(self):
|
|
"""
|
|
Counts the number of colors in an image, and matches them to the max allowed
|
|
@return: boolean true if color count is acceptable, false otherwise
|
|
"""
|
|
colors = self.image.getcolors(MAX_COLORS_TO_COUNT)
|
|
if colors is None:
|
|
color_count = MAX_COLORS_TO_COUNT
|
|
else:
|
|
color_count = len(colors)
|
|
|
|
too_many_colors = (color_count <= MAX_COLORS)
|
|
return too_many_colors
|
|
|
|
def check_if_rgb_is_skin(self, rgb):
|
|
"""
|
|
Checks if a given input rgb tuple/list is a skin tone
|
|
@param rgb: RGB tuple
|
|
@return: Boolean true false
|
|
"""
|
|
colors_okay = False
|
|
try:
|
|
r = rgb[0]
|
|
g = rgb[1]
|
|
b = rgb[2]
|
|
check_r = (r > 60)
|
|
check_g = (r * 0.4) < g < (r * 0.85)
|
|
check_b = (r * 0.2) < b < (r * 0.7)
|
|
colors_okay = check_r and check_b and check_g
|
|
except:
|
|
pass
|
|
|
|
return colors_okay
|
|
|
|
def get_skin_ratio(self):
|
|
"""
|
|
Gets the ratio of skin tone colors in an image
|
|
@return: True if the ratio is low enough to be acceptable, false otherwise
|
|
"""
|
|
colors = self.image.getcolors(MAX_COLORS_TO_COUNT)
|
|
is_okay = True
|
|
if colors is not None:
|
|
skin = sum([count for count, rgb in colors if self.check_if_rgb_is_skin(rgb)])
|
|
total_colored_pixels = sum([count for count, rgb in colors])
|
|
bad_color_val = float(skin) / total_colored_pixels
|
|
if bad_color_val > .4:
|
|
is_okay = False
|
|
|
|
return is_okay
|
|
|
|
def run_tests(self):
|
|
"""
|
|
Does all available checks on an image to ensure that it is okay (size, skin ratio, colors)
|
|
@return: Boolean indicating whether or not image passes all checks
|
|
"""
|
|
image_is_okay = False
|
|
try:
|
|
image_is_okay = self.count_colors() and self.get_skin_ratio() and not self.image_too_large
|
|
except:
|
|
log.exception("Could not run image tests.")
|
|
|
|
return image_is_okay
|
|
|
|
|
|
class URLProperties(object):
|
|
"""
|
|
Checks to see if a URL points to acceptable content. Added to check if students are submitting reasonable
|
|
links to the peer grading image functionality of the external grading service.
|
|
"""
|
|
def __init__(self, url_string):
|
|
self.url_string = url_string
|
|
|
|
def check_if_parses(self):
|
|
"""
|
|
Check to see if a URL parses properly
|
|
@return: success (True if parses, false if not)
|
|
"""
|
|
success = False
|
|
try:
|
|
self.parsed_url = urlparse(self.url_string)
|
|
success = True
|
|
except:
|
|
pass
|
|
|
|
return success
|
|
|
|
def check_suffix(self):
|
|
"""
|
|
Checks the suffix of a url to make sure that it is allowed
|
|
@return: True if suffix is okay, false if not
|
|
"""
|
|
good_suffix = False
|
|
for suffix in ALLOWABLE_IMAGE_SUFFIXES:
|
|
if self.url_string.endswith(suffix):
|
|
good_suffix = True
|
|
break
|
|
return good_suffix
|
|
|
|
def run_tests(self):
|
|
"""
|
|
Runs all available url tests
|
|
@return: True if URL passes tests, false if not.
|
|
"""
|
|
url_is_okay = self.check_suffix() and self.check_if_parses() and self.check_domain()
|
|
return url_is_okay
|
|
|
|
def check_domain(self):
|
|
"""
|
|
Checks to see if url is from a trusted domain
|
|
"""
|
|
success = False
|
|
for domain in TRUSTED_IMAGE_DOMAINS:
|
|
if domain in self.url_string:
|
|
success = True
|
|
return success
|
|
return success
|
|
|
|
def run_url_tests(url_string):
|
|
"""
|
|
Creates a URLProperties object and runs all tests
|
|
@param url_string: A URL in string format
|
|
@return: Boolean indicating whether or not URL has passed all tests
|
|
"""
|
|
url_properties = URLProperties(url_string)
|
|
return url_properties.run_tests()
|
|
|
|
|
|
def run_image_tests(image):
|
|
"""
|
|
Runs all available image tests
|
|
@param image: PIL Image object
|
|
@return: Boolean indicating whether or not all tests have been passed
|
|
"""
|
|
success = False
|
|
try:
|
|
image_properties = ImageProperties(image)
|
|
success = image_properties.run_tests()
|
|
except:
|
|
log.exception("Cannot run image tests in combined open ended xmodule. May be an issue with a particular image,"
|
|
"or an issue with the deployment configuration of PIL/Pillow")
|
|
return success
|
|
|
|
|
|
def upload_to_s3(file_to_upload, keyname):
|
|
'''
|
|
Upload file to S3 using provided keyname.
|
|
|
|
Returns:
|
|
public_url: URL to access uploaded file
|
|
'''
|
|
|
|
#This commented out code is kept here in case we change the uploading method and require images to be
|
|
#converted before they are sent to S3.
|
|
#TODO: determine if commented code is needed and remove
|
|
#im = Image.open(file_to_upload)
|
|
#out_im = cStringIO.StringIO()
|
|
#im.save(out_im, 'PNG')
|
|
|
|
try:
|
|
conn = S3Connection(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY)
|
|
bucketname = str(settings.AWS_STORAGE_BUCKET_NAME)
|
|
bucket = conn.create_bucket(bucketname.lower())
|
|
|
|
k = Key(bucket)
|
|
k.key = keyname
|
|
k.set_metadata('filename', file_to_upload.name)
|
|
k.set_contents_from_file(file_to_upload)
|
|
|
|
#This commented out code is kept here in case we change the uploading method and require images to be
|
|
#converted before they are sent to S3.
|
|
#k.set_contents_from_string(out_im.getvalue())
|
|
#k.set_metadata("Content-Type", 'images/png')
|
|
|
|
k.set_acl("public-read")
|
|
public_url = k.generate_url(60 * 60 * 24 * 365) # URL timeout in seconds.
|
|
|
|
return True, public_url
|
|
except:
|
|
return False, "Could not connect to S3."
|
|
|
|
|
|
def get_from_s3(s3_public_url):
|
|
"""
|
|
Gets an image from a given S3 url
|
|
@param s3_public_url: The URL where an image is located
|
|
@return: The image data
|
|
"""
|
|
r = requests.get(s3_public_url, timeout=2)
|
|
data = r.text
|
|
return data
|
|
|
|
|
|
|