Merge pull request #21750 from edx/aj/prevent-unicode-error

Prevent unicode error
This commit is contained in:
Awais Jibran
2019-09-27 13:25:10 +05:00
committed by GitHub
2 changed files with 45 additions and 7 deletions

View File

@@ -1,3 +1,4 @@
# coding=utf-8
"""
Tests capa util
"""
@@ -5,14 +6,23 @@ from __future__ import absolute_import
import unittest
import ddt
from lxml import etree
from capa.tests.helpers import test_capa_system
from capa.util import compare_with_tolerance, get_inner_html_from_xpath, remove_markup, sanitize_html
from capa.util import (
compare_with_tolerance,
contextualize_text,
get_inner_html_from_xpath,
remove_markup,
sanitize_html
)
@ddt.ddt
class UtilTest(unittest.TestCase):
"""Tests for util"""
def setUp(self):
super(UtilTest, self).setUp()
self.system = test_capa_system()
@@ -138,3 +148,24 @@ class UtilTest(unittest.TestCase):
remove_markup("The <mark>Truth</mark> is <em>Out There</em> & you need to <strong>find</strong> it"),
"The Truth is Out There &amp; you need to find it"
)
@ddt.data(
'When the root level failš the whole hierarchy wont work anymore.',
'あなたあなたあなた'
)
def test_contextualize_text(self, context_value):
"""Verify that variable substitution works as intended with non-ascii characters."""
key = 'answer0'
text = '$answer0'
context = {key: context_value}
contextual_text = contextualize_text(text, context)
self.assertEqual(context_value, contextual_text)
def test_contextualize_text_with_non_ascii_context(self):
"""Verify that variable substitution works as intended with non-ascii characters."""
key = u'あなた$a $b'
text = '$' + key
context = {'a': u'あなたあなたあなた', 'b': u'あなたhi'}
expected_text = '$あなたあなたあなたあなた あなたhi'
contextual_text = contextualize_text(text, context)
self.assertEqual(expected_text, contextual_text)

View File

@@ -100,20 +100,27 @@ def contextualize_text(text, context): # private
Takes a string with variables. E.g. $a+$b.
Does a substitution of those variables from the context
"""
def convert_to_str(value):
"""The method tries to convert unicode/non-ascii values into string"""
try:
return str(value)
except UnicodeEncodeError:
return value.encode('utf8', errors='ignore')
if not text:
return text
for key in sorted(context, key=len, reverse=True):
# TODO (vshnayder): This whole replacement thing is a big hack
# right now--context contains not just the vars defined in the
# program, but also e.g. a reference to the numpy module.
# Should be a separate dict of variables that should be
# replaced.
if '$' + key in text:
try:
s = str(context[key])
except UnicodeEncodeError:
s = context[key].encode('utf8', errors='ignore')
text = text.replace('$' + key, s)
context_key = '$' + key
if context_key in text:
text = convert_to_str(text)
context_value = convert_to_str(context[key])
text = text.replace(context_key, context_value)
return text