# HG changeset patch # User Bob Ippolito <bob@redivi.com> # Date 1209864075 0 # Sun May 04 01:21:15 2008 +0000 # Node ID 8fa1e5d581ca2a3a4646c816bd9984ea44514b2d # Parent 895f171bfaec9d08d4afe2c973d45bda826b8902 backport fixes from py2.6 issue2750 branch git-svn-id: http://simplejson.googlecode.com/svn/trunk@93 a4795897-2c25-0410-b006-0d3caba88fa1 diff --git a/simplejson/decoder.py b/simplejson/decoder.py --- a/simplejson/decoder.py +++ b/simplejson/decoder.py @@ -6,9 +6,9 @@ from simplejson.scanner import Scanner, pattern try: - from simplejson import _speedups -except: - _speedups = None + from simplejson._speedups import scanstring as c_scanstring +except ImportError: + pass FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL @@ -83,7 +83,7 @@ DEFAULT_ENCODING = "utf-8" -def scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): +def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): if encoding is None: encoding = DEFAULT_ENCODING chunks = [] @@ -147,8 +147,10 @@ # Use speedup -if _speedups is not None: - scanstring = _speedups.scanstring +try: + scanstring = c_scanstring +except NameError: + scanstring = py_scanstring def JSONString(match, context): encoding = getattr(context, 'encoding', None) diff --git a/simplejson/encoder.py b/simplejson/encoder.py --- a/simplejson/encoder.py +++ b/simplejson/encoder.py @@ -4,12 +4,13 @@ import re try: - from simplejson import _speedups + from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii except ImportError: - _speedups = None + pass ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') -ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])') +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') +HAS_UTF8 = re.compile(r'[\x80-\xff]') ESCAPE_DCT = { '\\': '\\\\', '"': '\\"', @@ -55,7 +56,9 @@ return '"' + ESCAPE.sub(replace, s) + '"' -def encode_basestring_ascii(s): +def py_encode_basestring_ascii(s): + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') def replace(match): s = match.group(0) try: @@ -74,10 +77,9 @@ try: - encode_basestring_ascii = _speedups.encode_basestring_ascii - _need_utf8 = True -except AttributeError: - _need_utf8 = False + encode_basestring_ascii = c_encode_basestring_ascii +except NameError: + encode_basestring_ascii = py_encode_basestring_ascii class JSONEncoder(object): @@ -240,7 +242,7 @@ items = dct.iteritems() _encoding = self.encoding _do_decode = (_encoding is not None - and not (_need_utf8 and _encoding == 'utf-8')) + and not (_encoding == 'utf-8')) for key, value in items: if isinstance(key, str): if _do_decode: @@ -286,7 +288,7 @@ encoder = encode_basestring _encoding = self.encoding if (_encoding is not None and isinstance(o, str) - and not (_need_utf8 and _encoding == 'utf-8')): + and not (_encoding == 'utf-8')): o = o.decode(_encoding) yield encoder(o) elif o is None: @@ -352,7 +354,7 @@ if isinstance(o, str): _encoding = self.encoding if (_encoding is not None - and not (_encoding == 'utf-8' and _need_utf8)): + and not (_encoding == 'utf-8')): o = o.decode(_encoding) if self.ensure_ascii: return encode_basestring_ascii(o) diff --git a/simplejson/tests/test_encode_basestring_ascii.py b/simplejson/tests/test_encode_basestring_ascii.py new file mode 100644 --- /dev/null +++ b/simplejson/tests/test_encode_basestring_ascii.py @@ -0,0 +1,36 @@ +from unittest import TestCase + +import simplejson.encoder + +CASES = [ + (u'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), + (u'controls', '"controls"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'), + (u' s p a c e d ', '" s p a c e d "'), + (u'\U0001d120', '"\\ud834\\udd20"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), + (u"`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'), + (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), +] + +class TestEncodeBaseStringAscii(TestCase): + def test_py_encode_basestring_ascii(self): + self._test_encode_basestring_ascii(simplejson.encoder.py_encode_basestring_ascii) + + def test_c_encode_basestring_ascii(self): + self._test_encode_basestring_ascii(simplejson.encoder.c_encode_basestring_ascii) + + def _test_encode_basestring_ascii(self, encode_basestring_ascii): + fname = encode_basestring_ascii.__name__ + for input_string, expect in CASES: + result = encode_basestring_ascii(input_string) + self.assertEquals(result, expect, + '%r != %r for %s(%r)' % (result, expect, fname, input_string)) diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py new file mode 100644 --- /dev/null +++ b/simplejson/tests/test_scanstring.py @@ -0,0 +1,102 @@ +import sys +import decimal +from unittest import TestCase + +import simplejson.decoder + +class TestScanString(TestCase): + def test_py_scanstring(self): + self._test_scanstring(simplejson.decoder.py_scanstring) + + def test_c_scanstring(self): + self._test_scanstring(simplejson.decoder.c_scanstring) + + def _test_scanstring(self, scanstring): + self.assertEquals( + scanstring('"z\\ud834\\udd20x"', 1, None, True), + (u'z\U0001d120x', 16)) + + if sys.maxunicode == 65535: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 6)) + else: + self.assertEquals( + scanstring(u'"z\U0001d120x"', 1, None, True), + (u'z\U0001d120x', 5)) + + self.assertEquals( + scanstring('"\\u007b"', 1, None, True), + (u'{', 8)) + + self.assertEquals( + scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True), + (u'A JSON payload should be an object or array, not a string.', 60)) + + self.assertEquals( + scanstring('["Unclosed array"', 2, None, True), + (u'Unclosed array', 17)) + + self.assertEquals( + scanstring('["extra comma",]', 2, None, True), + (u'extra comma', 14)) + + self.assertEquals( + scanstring('["double extra comma",,]', 2, None, True), + (u'double extra comma', 21)) + + self.assertEquals( + scanstring('["Comma after the close"],', 2, None, True), + (u'Comma after the close', 24)) + + self.assertEquals( + scanstring('["Extra close"]]', 2, None, True), + (u'Extra close', 14)) + + self.assertEquals( + scanstring('{"Extra comma": true,}', 2, None, True), + (u'Extra comma', 14)) + + self.assertEquals( + scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True), + (u'Extra value after close', 26)) + + self.assertEquals( + scanstring('{"Illegal expression": 1 + 2}', 2, None, True), + (u'Illegal expression', 21)) + + self.assertEquals( + scanstring('{"Illegal invocation": alert()}', 2, None, True), + (u'Illegal invocation', 21)) + + self.assertEquals( + scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True), + (u'Numbers cannot have leading zeroes', 37)) + + self.assertEquals( + scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True), + (u'Numbers cannot be hex', 24)) + + self.assertEquals( + scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True), + (u'Too deep', 30)) + + self.assertEquals( + scanstring('{"Missing colon" null}', 2, None, True), + (u'Missing colon', 16)) + + self.assertEquals( + scanstring('{"Double colon":: null}', 2, None, True), + (u'Double colon', 15)) + + self.assertEquals( + scanstring('{"Comma instead of colon", null}', 2, None, True), + (u'Comma instead of colon', 25)) + + self.assertEquals( + scanstring('["Colon instead of comma": false]', 2, None, True), + (u'Colon instead of comma', 25)) + + self.assertEquals( + scanstring('["Bad value", truth]', 2, None, True), + (u'Bad value', 12))