fix _flatten_to_ascii for str inputs

This commit is contained in:
Jason Bau
2013-09-20 09:59:21 -07:00
parent 29710661ec
commit 11ecd162d4
2 changed files with 15 additions and 4 deletions

View File

@@ -514,8 +514,16 @@ class ShibUtilFnTest(TestCase):
"""
def test__flatten_to_ascii(self):
DIACRITIC = u"àèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÿÄËÏÖÜŸåÅçÇ" # pylint: disable=C0103
STR_DIACRI = "àèìòùÀÈÌÒÙáéíóúýÁÉÍÓÚÝâêîôûÂÊÎÔÛãñõÃÑÕäëïöüÿÄËÏÖÜŸåÅçÇ" # pylint: disable=C0103
FLATTENED = u"aeiouAEIOUaeiouyAEIOUYaeiouAEIOUanoANOaeiouyAEIOUYaAcC" # pylint: disable=C0103
self.assertEqual(_flatten_to_ascii(u'jas\xf6n'), u'jason') # umlaut
self.assertEqual(_flatten_to_ascii(u'Jason\u5305'), u'Jason') # mandarin, so it just gets dropped
self.assertEqual(_flatten_to_ascii(u'abc'), u'abc') # pass through
self.assertEqual(_flatten_to_ascii(DIACRITIC), FLATTENED)
unicode_test = _flatten_to_ascii(DIACRITIC)
self.assertEqual(unicode_test, FLATTENED)
self.assertIsInstance(unicode_test, unicode)
str_test = _flatten_to_ascii(STR_DIACRI)
self.assertEqual(str_test, FLATTENED)
self.assertIsInstance(str_test, str)

View File

@@ -233,10 +233,13 @@ def _flatten_to_ascii(txt):
"""
Flattens possibly unicode txt to ascii (django username limitation)
@param name:
@return:
@return: the flattened txt (in the same type as was originally passed in)
"""
return unicodedata.normalize('NFKD', txt).encode('ASCII', 'ignore')
if isinstance(txt, str):
txt = txt.decode('utf-8')
return unicodedata.normalize('NFKD', txt).encode('ASCII', 'ignore')
else:
return unicode(unicodedata.normalize('NFKD', txt).encode('ASCII', 'ignore'))
@ensure_csrf_cookie
@cache_if_anonymous