# HG changeset patch # User Bob Ippolito <bob@redivi.com> # Date 1305512632 25200 # Sun May 15 19:23:52 2011 -0700 # Node ID 31f4f0fb52d83e3c4e414840469610809dc7e2ff # Parent 94d8f87763fc1bf0796d88918d9b1586e564e246 Force unicode linebreak characters to be escaped (U+2028 and U+2029) diff --git a/CHANGES.txt b/CHANGES.txt --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,5 +1,7 @@ Version 2.1.7 released 2011-XX-XX +* Force unicode linebreak characters to be escaped (U+2028 and U+2029) + http://timelessrepo.com/json-isnt-a-javascript-subset * Moved documentation from a git submodule to http://simplejson.readthedocs.org/ diff --git a/simplejson/encoder.py b/simplejson/encoder.py --- a/simplejson/encoder.py +++ b/simplejson/encoder.py @@ -13,7 +13,7 @@ from simplejson.decoder import PosInf -ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') +ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') HAS_UTF8 = re.compile(r'[\x80-\xff]') ESCAPE_DCT = { @@ -24,6 +24,8 @@ '\n': '\\n', '\r': '\\r', '\t': '\\t', + u'\u2028': '\\u2028', + u'\u2029': '\\u2029', } for i in range(0x20): #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py --- a/simplejson/tests/test_unicode.py +++ b/simplejson/tests/test_unicode.py @@ -97,3 +97,13 @@ self.assertEquals(json.dumps(doc2), doc_ascii) self.assertEquals(json.dumps(doc1, ensure_ascii=False), doc_unicode) self.assertEquals(json.dumps(doc2, ensure_ascii=False), doc_unicode) + + def test_ensure_ascii_linebreak_encoding(self): + # http://timelessrepo.com/json-isnt-a-javascript-subset + s1 = u'\u2029\u2028' + s2 = s1.encode('utf8') + expect = '"\\u2029\\u2028"' + self.assertEquals(json.dumps(s1), expect) + self.assertEquals(json.dumps(s2), expect) + self.assertEquals(json.dumps(s1, ensure_ascii=False), expect) + self.assertEquals(json.dumps(s2, ensure_ascii=False), expect)