diff --git a/common/lib/capa/capa/tests/test_util.py b/common/lib/capa/capa/tests/test_util.py
index ed910146ef..3ebd40e80f 100644
--- a/common/lib/capa/capa/tests/test_util.py
+++ b/common/lib/capa/capa/tests/test_util.py
@@ -1,3 +1,4 @@
+# coding=utf-8
"""
Tests capa util
"""
@@ -5,14 +6,23 @@ from __future__ import absolute_import
import unittest
+import ddt
from lxml import etree
from capa.tests.helpers import test_capa_system
-from capa.util import compare_with_tolerance, get_inner_html_from_xpath, remove_markup, sanitize_html
+from capa.util import (
+ compare_with_tolerance,
+ contextualize_text,
+ get_inner_html_from_xpath,
+ remove_markup,
+ sanitize_html
+)
+@ddt.ddt
class UtilTest(unittest.TestCase):
"""Tests for util"""
+
def setUp(self):
super(UtilTest, self).setUp()
self.system = test_capa_system()
@@ -138,3 +148,24 @@ class UtilTest(unittest.TestCase):
remove_markup("The Truth is Out There & you need to find it"),
"The Truth is Out There & you need to find it"
)
+
+ @ddt.data(
+ 'When the root level failš the whole hierarchy won’t work anymore.',
+ 'あなたあなたあなた'
+ )
+ def test_contextualize_text(self, context_value):
+ """Verify that variable substitution works as intended with non-ascii characters."""
+ key = 'answer0'
+ text = '$answer0'
+ context = {key: context_value}
+ contextual_text = contextualize_text(text, context)
+ self.assertEqual(context_value, contextual_text)
+
+ def test_contextualize_text_with_non_ascii_context(self):
+ """Verify that variable substitution works as intended with non-ascii characters."""
+ key = u'あなた$a $b'
+ text = '$' + key
+ context = {'a': u'あなたあなたあなた', 'b': u'あなたhi'}
+ expected_text = '$あなたあなたあなたあなた あなたhi'
+ contextual_text = contextualize_text(text, context)
+ self.assertEqual(expected_text, contextual_text)
diff --git a/common/lib/capa/capa/util.py b/common/lib/capa/capa/util.py
index f7d8f5b466..2b700e4c18 100644
--- a/common/lib/capa/capa/util.py
+++ b/common/lib/capa/capa/util.py
@@ -100,20 +100,27 @@ def contextualize_text(text, context): # private
Takes a string with variables. E.g. $a+$b.
Does a substitution of those variables from the context
"""
+ def convert_to_str(value):
+ """The method tries to convert unicode/non-ascii values into string"""
+ try:
+ return str(value)
+ except UnicodeEncodeError:
+ return value.encode('utf8', errors='ignore')
+
if not text:
return text
+
for key in sorted(context, key=len, reverse=True):
# TODO (vshnayder): This whole replacement thing is a big hack
# right now--context contains not just the vars defined in the
# program, but also e.g. a reference to the numpy module.
# Should be a separate dict of variables that should be
# replaced.
- if '$' + key in text:
- try:
- s = str(context[key])
- except UnicodeEncodeError:
- s = context[key].encode('utf8', errors='ignore')
- text = text.replace('$' + key, s)
+ context_key = '$' + key
+ if context_key in text:
+ text = convert_to_str(text)
+ context_value = convert_to_str(context[key])
+ text = text.replace(context_key, context_value)
return text