diff --git a/CHANGES.txt b/CHANGES.txt index 9f0c0860b84dfd09114d5ec6f231e31d4a57ab65_Q0hBTkdFUy50eHQ=..b8745da03913c8cc8f24977c7ca2f1476bbe70e9_Q0hBTkdFUy50eHQ= 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,5 +1,7 @@ Version 2.1.0 released XXXX-XX-XX +* Fixed str/unicode mismatches when using ensure_ascii=False + http://code.google.com/p/simplejson/issues/detail?id=48 * Fixed error message when parsing an array with trailing comma with speedups http://code.google.com/p/simplejson/issues/detail?id=46 * Refactor decoder errors to raise JSONDecodeError instead of ValueError diff --git a/simplejson/encoder.py b/simplejson/encoder.py index 9f0c0860b84dfd09114d5ec6f231e31d4a57ab65_c2ltcGxlanNvbi9lbmNvZGVyLnB5..b8745da03913c8cc8f24977c7ca2f1476bbe70e9_c2ltcGxlanNvbi9lbmNvZGVyLnB5 100644 --- a/simplejson/encoder.py +++ b/simplejson/encoder.py @@ -36,5 +36,7 @@ """Return a JSON representation of a Python string """ + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') def replace(match): return ESCAPE_DCT[match.group(0)] @@ -39,6 +41,6 @@ def replace(match): return ESCAPE_DCT[match.group(0)] - return '"' + ESCAPE.sub(replace, s) + '"' + return u'"' + ESCAPE.sub(replace, s) + u'"' def py_encode_basestring_ascii(s): @@ -202,7 +204,10 @@ chunks = self.iterencode(o, _one_shot=True) if not isinstance(chunks, (list, tuple)): chunks = list(chunks) - return ''.join(chunks) + if self.ensure_ascii: + return ''.join(chunks) + else: + return u''.join(chunks) def iterencode(self, o, _one_shot=False): """Encode the given object and yield each string diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py index 9f0c0860b84dfd09114d5ec6f231e31d4a57ab65_c2ltcGxlanNvbi90ZXN0cy90ZXN0X3VuaWNvZGUucHk=..b8745da03913c8cc8f24977c7ca2f1476bbe70e9_c2ltcGxlanNvbi90ZXN0cy90ZXN0X3VuaWNvZGUucHk= 100644 --- a/simplejson/tests/test_unicode.py +++ b/simplejson/tests/test_unicode.py @@ -78,4 +78,22 @@ def test_unicode_preservation(self): self.assertEquals(type(json.loads(u'""')), unicode) self.assertEquals(type(json.loads(u'"a"')), unicode) - self.assertEquals(type(json.loads(u'["a"]')[0]), unicode) \ No newline at end of file + self.assertEquals(type(json.loads(u'["a"]')[0]), unicode) + + def test_ensure_ascii_false_returns_unicode(self): + # http://code.google.com/p/simplejson/issues/detail?id=48 + self.assertEquals(type(json.dumps([], ensure_ascii=False)), unicode) + self.assertEquals(type(json.dumps(0, ensure_ascii=False)), unicode) + self.assertEquals(type(json.dumps({}, ensure_ascii=False)), unicode) + self.assertEquals(type(json.dumps("", ensure_ascii=False)), unicode) + + def test_ensure_ascii_false_bytestring_encoding(self): + # http://code.google.com/p/simplejson/issues/detail?id=48 + doc1 = {u'quux': 'Arr\xc3\xaat sur images'} + doc2 = {u'quux': u'Arr\xeat sur images'} + doc_ascii = '{"quux": "Arr\\u00eat sur images"}' + doc_unicode = u'{"quux": "Arr\xeat sur images"}' + self.assertEquals(json.dumps(doc1), doc_ascii) + self.assertEquals(json.dumps(doc2), doc_ascii) + self.assertEquals(json.dumps(doc1, ensure_ascii=False), doc_unicode) + self.assertEquals(json.dumps(doc2, ensure_ascii=False), doc_unicode)