diff --git a/simplejson/decoder.py b/simplejson/decoder.py index d6fe30941ff24b8900f39d6789d3eb16a3e6ce95_c2ltcGxlanNvbi9kZWNvZGVyLnB5..d5cb52eeda7e68e75131fc897620fd1872d6b4fe_c2ltcGxlanNvbi9kZWNvZGVyLnB5 100644 --- a/simplejson/decoder.py +++ b/simplejson/decoder.py @@ -154,5 +154,9 @@ if len(esc) != 4: msg = "Invalid \\uXXXX escape" raise JSONDecodeError(msg, s, end) - uni = int(esc, 16) + try: + uni = int(esc, 16) + except ValueError: + msg = "Invalid \\uXXXX escape" + raise JSONDecodeError(msg, s, end) # Check for surrogate pair on UCS-4 systems @@ -158,5 +162,22 @@ # Check for surrogate pair on UCS-4 systems - if 0xd800 <= uni <= 0xdbff and _maxunicode > 65535: - msg = "Invalid \\uXXXX\\uXXXX surrogate pair" - if not s[end + 5:end + 7] == '\\u': + if _maxunicode > 65535: + unimask = uni & 0xfc00 + if unimask == 0xd800: + msg = "Invalid \\uXXXX\\uXXXX surrogate pair" + if not s[end + 5:end + 7] == '\\u': + raise JSONDecodeError(msg, s, end) + esc2 = s[end + 7:end + 11] + if len(esc2) != 4: + raise JSONDecodeError(msg, s, end) + try: + uni2 = int(esc2, 16) + except ValueError: + raise JSONDecodeError(msg, s, end) + if uni2 & 0xfc00 != 0xdc00: + msg = "Unpaired high surrogate" + raise JSONDecodeError(msg, s, end) + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + next_end += 6 + elif unimask == 0xdc00: + msg = "Unpaired low surrogate" raise JSONDecodeError(msg, s, end) @@ -162,10 +183,4 @@ raise JSONDecodeError(msg, s, end) - esc2 = s[end + 7:end + 11] - if len(esc2) != 4: - raise JSONDecodeError(msg, s, end) - uni2 = int(esc2, 16) - uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) - next_end += 6 char = unichr(uni) end = next_end # Append the unescaped character diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py index d6fe30941ff24b8900f39d6789d3eb16a3e6ce95_c2ltcGxlanNvbi90ZXN0cy90ZXN0X3VuaWNvZGUucHk=..d5cb52eeda7e68e75131fc897620fd1872d6b4fe_c2ltcGxlanNvbi90ZXN0cy90ZXN0X3VuaWNvZGUucHk= 100644 --- a/simplejson/tests/test_unicode.py +++ b/simplejson/tests/test_unicode.py @@ -1,3 +1,4 @@ +import sys from unittest import TestCase import simplejson as json @@ -108,3 +109,37 @@ self.assertEquals(json.dumps(s2), expect) self.assertEquals(json.dumps(s1, ensure_ascii=False), expect) self.assertEquals(json.dumps(s2, ensure_ascii=False), expect) + + def test_invalid_escape_sequences(self): + # incomplete escape sequence + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u12') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u123') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1234') + # invalid escape sequence + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u123x"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u12x4"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1x34"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ux234"') + if sys.maxunicode > 65535: + # unpaired low surrogate + self.assertRaises(json.JSONDecodeError, json.loads, '"\\udc00"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\udcff"') + # unpaired high surrogate + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800x"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800xx"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800xxxxxx"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u00"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u000"') + # invalid escape sequence for low surrogate + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u000x"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u00x0"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0x00"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\ux000"') + # invalid value for low surrogate + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0000"') + self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\ufc00"') \ No newline at end of file