Prevent unicode error
This PR prevents `UnicodeDecodeError` when replacing unicode with bytes string. Example: PROD-680
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
# coding=utf-8
|
||||
"""
|
||||
Tests capa util
|
||||
"""
|
||||
@@ -5,14 +6,23 @@ from __future__ import absolute_import
|
||||
|
||||
import unittest
|
||||
|
||||
import ddt
|
||||
from lxml import etree
|
||||
|
||||
from capa.tests.helpers import test_capa_system
|
||||
from capa.util import compare_with_tolerance, get_inner_html_from_xpath, remove_markup, sanitize_html
|
||||
from capa.util import (
|
||||
compare_with_tolerance,
|
||||
contextualize_text,
|
||||
get_inner_html_from_xpath,
|
||||
remove_markup,
|
||||
sanitize_html
|
||||
)
|
||||
|
||||
|
||||
@ddt.ddt
|
||||
class UtilTest(unittest.TestCase):
|
||||
"""Tests for util"""
|
||||
|
||||
def setUp(self):
|
||||
super(UtilTest, self).setUp()
|
||||
self.system = test_capa_system()
|
||||
@@ -138,3 +148,24 @@ class UtilTest(unittest.TestCase):
|
||||
remove_markup("The <mark>Truth</mark> is <em>Out There</em> & you need to <strong>find</strong> it"),
|
||||
"The Truth is Out There & you need to find it"
|
||||
)
|
||||
|
||||
@ddt.data(
|
||||
'When the root level failš the whole hierarchy won’t work anymore.',
|
||||
'あなたあなたあなた'
|
||||
)
|
||||
def test_contextualize_text(self, context_value):
|
||||
"""Verify that variable substitution works as intended with non-ascii characters."""
|
||||
key = 'answer0'
|
||||
text = '$answer0'
|
||||
context = {key: context_value}
|
||||
contextual_text = contextualize_text(text, context)
|
||||
self.assertEqual(context_value, contextual_text)
|
||||
|
||||
def test_contextualize_text_with_non_ascii_context(self):
|
||||
"""Verify that variable substitution works as intended with non-ascii characters."""
|
||||
key = u'あなた$a $b'
|
||||
text = '$' + key
|
||||
context = {'a': u'あなたあなたあなた', 'b': u'あなたhi'}
|
||||
expected_text = '$あなたあなたあなたあなた あなたhi'
|
||||
contextual_text = contextualize_text(text, context)
|
||||
self.assertEqual(expected_text, contextual_text)
|
||||
|
||||
@@ -100,20 +100,27 @@ def contextualize_text(text, context): # private
|
||||
Takes a string with variables. E.g. $a+$b.
|
||||
Does a substitution of those variables from the context
|
||||
"""
|
||||
def convert_to_str(value):
|
||||
"""The method tries to convert unicode/non-ascii values into string"""
|
||||
try:
|
||||
return str(value)
|
||||
except UnicodeEncodeError:
|
||||
return value.encode('utf8', errors='ignore')
|
||||
|
||||
if not text:
|
||||
return text
|
||||
|
||||
for key in sorted(context, key=len, reverse=True):
|
||||
# TODO (vshnayder): This whole replacement thing is a big hack
|
||||
# right now--context contains not just the vars defined in the
|
||||
# program, but also e.g. a reference to the numpy module.
|
||||
# Should be a separate dict of variables that should be
|
||||
# replaced.
|
||||
if '$' + key in text:
|
||||
try:
|
||||
s = str(context[key])
|
||||
except UnicodeEncodeError:
|
||||
s = context[key].encode('utf8', errors='ignore')
|
||||
text = text.replace('$' + key, s)
|
||||
context_key = '$' + key
|
||||
if context_key in text:
|
||||
text = convert_to_str(text)
|
||||
context_value = convert_to_str(context[key])
|
||||
text = text.replace(context_key, context_value)
|
||||
return text
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user