Merge pull request #15510 from edx/ri/EDUCATOR-810-unicode-decode-error
EDUCATOR-810 fix UnicodeDecodeError
This commit is contained in:
@@ -932,6 +932,9 @@ INSTALLED_APPS = (
|
||||
# Site configuration for theming and behavioral modification
|
||||
'openedx.core.djangoapps.site_configuration',
|
||||
|
||||
# Ability to detect and special-case crawler behavior
|
||||
'openedx.core.djangoapps.crawlers',
|
||||
|
||||
# comment common
|
||||
'django_comment_common',
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
This module handles the detection of crawlers, so that we can handle them
|
||||
appropriately in other parts of the code.
|
||||
"""
|
||||
import six
|
||||
from config_models.models import ConfigurationModel
|
||||
from django.db import models
|
||||
|
||||
@@ -39,6 +40,14 @@ class CrawlersConfig(ConfigurationModel):
|
||||
if (not req_user_agent) or (not crawler_agents):
|
||||
return False
|
||||
|
||||
# The crawler_agents list we pull from our model always has unicode objects, but the
|
||||
# req_user_agent we get from HTTP headers ultimately comes to us via WSGI. That
|
||||
# value is an ISO-8859-1 encoded byte string in Python 2.7 (and in the HTTP spec), but
|
||||
# it will be a unicode str when we move to Python 3.x. This code should work under
|
||||
# either version.
|
||||
if isinstance(req_user_agent, six.binary_type):
|
||||
crawler_agents = [crawler_agent.encode('iso-8859-1') for crawler_agent in crawler_agents]
|
||||
|
||||
# We perform prefix matching of the crawler agent here so that we don't
|
||||
# have to worry about version bumps.
|
||||
return any(
|
||||
|
||||
0
openedx/core/djangoapps/crawlers/tests/__init__.py
Normal file
0
openedx/core/djangoapps/crawlers/tests/__init__.py
Normal file
41
openedx/core/djangoapps/crawlers/tests/test_models.py
Normal file
41
openedx/core/djangoapps/crawlers/tests/test_models.py
Normal file
@@ -0,0 +1,41 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Tests that the request came from a crawler or not.
|
||||
"""
|
||||
import ddt
|
||||
from django.test import TestCase
|
||||
from django.http import HttpRequest
|
||||
from ..models import CrawlersConfig
|
||||
|
||||
|
||||
@ddt.ddt
|
||||
class CrawlersConfigTest(TestCase):
|
||||
|
||||
def setUp(self):
|
||||
super(CrawlersConfigTest, self).setUp()
|
||||
CrawlersConfig(known_user_agents='edX-downloader,crawler_foo', enabled=True).save()
|
||||
|
||||
@ddt.data(
|
||||
"Mozilla/5.0 (Linux; Android 5.1; Nexus 5 Build/LMY47I; wv) AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Version/4.0 Chrome/47.0.2526.100 Mobile Safari/537.36 edX/org.edx.mobile/2.0.0",
|
||||
"Le Héros des Deux Mondes",
|
||||
)
|
||||
def test_req_user_agent_is_not_crawler(self, req_user_agent):
|
||||
"""
|
||||
verify that the request did not come from a crawler.
|
||||
"""
|
||||
fake_request = HttpRequest()
|
||||
fake_request.META['HTTP_USER_AGENT'] = req_user_agent
|
||||
self.assertFalse(CrawlersConfig.is_crawler(fake_request))
|
||||
|
||||
@ddt.data(
|
||||
u"edX-downloader",
|
||||
"crawler_foo".encode("utf-8")
|
||||
)
|
||||
def test_req_user_agent_is_crawler(self, req_user_agent):
|
||||
"""
|
||||
verify that the request came from a crawler.
|
||||
"""
|
||||
fake_request = HttpRequest()
|
||||
fake_request.META['HTTP_USER_AGENT'] = req_user_agent
|
||||
self.assertTrue(CrawlersConfig.is_crawler(fake_request))
|
||||
Reference in New Issue
Block a user