edx-platform/common/lib/xmodule/xmodule/open_ended_image_submission.py

"""
This contains functions and classes used to evaluate if images are acceptable (do not show improper content, etc), and
to send them to S3.
"""

try:
    from PIL import Image
    ENABLE_PIL = True
except:
    ENABLE_PIL = False

from urlparse import urlparse
import requests
from boto.s3.connection import S3Connection
from boto.s3.key import Key
from django.conf import settings
import pickle
import logging
import re

log = logging.getLogger(__name__)

#Domains where any image linked to can be trusted to have acceptable content.
TRUSTED_IMAGE_DOMAINS = [
    'wikipedia',
    'edxuploads.s3.amazonaws.com',
    'wikimedia',
]

#Suffixes that are allowed in image urls
ALLOWABLE_IMAGE_SUFFIXES = [
    'jpg',
    'png',
    'gif',
    'jpeg'
]

#Maximum allowed dimensions (x and y) for an uploaded image
MAX_ALLOWED_IMAGE_DIM = 1000

#Dimensions to which image is resized before it is evaluated for color count, etc
MAX_IMAGE_DIM = 150

#Maximum number of colors that should be counted in ImageProperties
MAX_COLORS_TO_COUNT = 16

#Maximum number of colors allowed in an uploaded image
MAX_COLORS = 400

class ImageProperties(object):
    """
    Class to check properties of an image and to validate if they are allowed.
    """
    def __init__(self, image_data):
        """
        Initializes class variables
        @param image: Image object (from PIL)
        @return: None
        """
        self.image = Image.open(image_data)
        image_size = self.image.size
        self.image_too_large = False
        if image_size[0] > MAX_ALLOWED_IMAGE_DIM or image_size[1] > MAX_ALLOWED_IMAGE_DIM:
            self.image_too_large = True
        if image_size[0] > MAX_IMAGE_DIM or image_size[1] > MAX_IMAGE_DIM:
            self.image = self.image.resize((MAX_IMAGE_DIM, MAX_IMAGE_DIM))
            self.image_size = self.image.size

    def count_colors(self):
        """
        Counts the number of colors in an image, and matches them to the max allowed
        @return: boolean true if color count is acceptable, false otherwise
        """
        colors = self.image.getcolors(MAX_COLORS_TO_COUNT)
        if colors is None:
            color_count = MAX_COLORS_TO_COUNT
        else:
            color_count = len(colors)

        too_many_colors = (color_count <= MAX_COLORS)
        return too_many_colors

    def check_if_rgb_is_skin(self, rgb):
        """
        Checks if a given input rgb tuple/list is a skin tone
        @param rgb: RGB tuple
        @return: Boolean true false
        """
        colors_okay = False
        try:
            r = rgb[0]
            g = rgb[1]
            b = rgb[2]
            check_r = (r > 60)
            check_g =  (r * 0.4) < g < (r * 0.85)
            check_b = (r * 0.2) < b < (r * 0.7)
            colors_okay = check_r and check_b and check_g
        except:
            pass

        return colors_okay

    def get_skin_ratio(self):
        """
        Gets the ratio of skin tone colors in an image
        @return: True if the ratio is low enough to be acceptable, false otherwise
        """
        colors = self.image.getcolors(MAX_COLORS_TO_COUNT)
        is_okay = True
        if colors is not None:
            skin = sum([count for count, rgb in colors if self.check_if_rgb_is_skin(rgb)])
            total_colored_pixels = sum([count for count, rgb in colors])
            bad_color_val = float(skin) / total_colored_pixels
            if bad_color_val > .4:
                is_okay = False

        return is_okay

    def run_tests(self):
        """
        Does all available checks on an image to ensure that it is okay (size, skin ratio, colors)
        @return: Boolean indicating whether or not image passes all checks
        """
        image_is_okay = False
        try:
            image_is_okay = self.count_colors() and self.get_skin_ratio() and not self.image_too_large
        except:
            log.exception("Could not run image tests.")

        return image_is_okay


class URLProperties(object):
    """
    Checks to see if a URL points to acceptable content.  Added to check if students are submitting reasonable
    links to the peer grading image functionality of the external grading service.
    """
    def __init__(self, url_string):
        self.url_string = url_string

    def check_if_parses(self):
        """
        Check to see if a URL parses properly
        @return: success (True if parses, false if not)
        """
        success = False
        try:
            self.parsed_url = urlparse(self.url_string)
            success = True
        except:
            pass

        return success

    def check_suffix(self):
        """
        Checks the suffix of a url to make sure that it is allowed
        @return: True if suffix is okay, false if not
        """
        good_suffix = False
        for suffix in ALLOWABLE_IMAGE_SUFFIXES:
            if self.url_string.endswith(suffix):
                good_suffix = True
                break
        return good_suffix

    def run_tests(self):
        """
        Runs all available url tests
        @return: True if URL passes tests, false if not.
        """
        url_is_okay = self.check_suffix() and self.check_if_parses() and self.check_domain()
        return url_is_okay

    def check_domain(self):
        """
        Checks to see if url is from a trusted domain
        """
        success = False
        for domain in TRUSTED_IMAGE_DOMAINS:
            if domain in self.url_string:
                success = True
                return success
        return success

def run_url_tests(url_string):
    """
    Creates a URLProperties object and runs all tests
    @param url_string: A URL in string format
    @return: Boolean indicating whether or not URL has passed all tests
    """
    url_properties = URLProperties(url_string)
    return url_properties.run_tests()


def run_image_tests(image):
    """
    Runs all available image tests
    @param image: PIL Image object
    @return: Boolean indicating whether or not all tests have been passed
    """
    success = False
    try:
        image_properties = ImageProperties(image)
        success = image_properties.run_tests()
    except:
        log.exception("Cannot run image tests in combined open ended xmodule.  May be an issue with a particular image,"
                    "or an issue with the deployment configuration of PIL/Pillow")
    return success


def upload_to_s3(file_to_upload, keyname):
    '''
    Upload file to S3 using provided keyname.

    Returns:
        public_url: URL to access uploaded file
    '''

    #This commented out code is kept here in case we change the uploading method and require images to be
    #converted before they are sent to S3.
    #TODO: determine if commented code is needed and remove
    #im = Image.open(file_to_upload)
    #out_im = cStringIO.StringIO()
    #im.save(out_im, 'PNG')

    try:
        conn = S3Connection(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY)
        bucketname = str(settings.AWS_STORAGE_BUCKET_NAME)
        bucket = conn.create_bucket(bucketname.lower())

        k = Key(bucket)
        k.key = keyname
        k.set_metadata('filename', file_to_upload.name)
        k.set_contents_from_file(file_to_upload)

        #This commented out code is kept here in case we change the uploading method and require images to be
        #converted before they are sent to S3.
        #k.set_contents_from_string(out_im.getvalue())
        #k.set_metadata("Content-Type", 'images/png')

        k.set_acl("public-read")
        public_url = k.generate_url(60 * 60 * 24 * 365) # URL timeout in seconds.

        return True, public_url
    except:
        return False, "Could not connect to S3."


def get_from_s3(s3_public_url):
    """
    Gets an image from a given S3 url
    @param s3_public_url: The URL where an image is located
    @return: The image data
    """
    r = requests.get(s3_public_url, timeout=2)
    data = r.text
    return data