Skip to content
Snippets Groups Projects
Commit 4d9b1fc3c56a authored by Bob Ippolito's avatar Bob Ippolito
Browse files

improve truncated input error messages, use JSONDecodeError instead of StopIteration (#61)

parent 08df7633a87b
No related branches found
No related tags found
No related merge requests found
Version 3.1.0 released XXXX-XX-XX
* Improve error messages for certain kinds of truncated input
http://bugs.python.org/issue16009
* Moved JSONDecodeError to json.scanner (still available for import
from json.decoder)
* Changed scanner to use JSONDecodeError directly rather than
StopIteration to improve error messages
Version 3.0.9 released 2013-02-21
* Fix an off-by-one error in the colno property of JSONDecodeError
......
......@@ -36,9 +36,9 @@
# General substitutions.
project = 'simplejson'
copyright = '2012, Bob Ippolito'
copyright = '2013, Bob Ippolito'
# The default replacements for |version| and |release|, also used in various
# other places throughout the built documents.
#
# The short X.Y version.
......@@ -40,7 +40,7 @@
# The default replacements for |version| and |release|, also used in various
# other places throughout the built documents.
#
# The short X.Y version.
version = '3.0'
version = '3.1'
# The full version, including alpha/beta/rc tags.
......@@ -46,5 +46,5 @@
# The full version, including alpha/beta/rc tags.
release = '3.0.9'
release = '3.1'
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
......
......@@ -8,7 +8,7 @@
DistutilsPlatformError
IS_PYPY = hasattr(sys, 'pypy_translation_info')
VERSION = '3.0.9'
VERSION = '3.1.0'
DESCRIPTION = "Simple, fast, extensible JSON encoder/decoder for Python"
with open('README.rst', 'r') as f:
......
......@@ -99,7 +99,7 @@
Expecting property name: line 1 column 3 (char 2)
"""
from __future__ import absolute_import
__version__ = '3.0.9'
__version__ = '3.1.0'
__all__ = [
'dump', 'dumps', 'load', 'loads',
'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
......@@ -110,7 +110,8 @@
from decimal import Decimal
from .decoder import JSONDecoder, JSONDecodeError
from .scanner import JSONDecodeError
from .decoder import JSONDecoder
from .encoder import JSONEncoder, JSONEncoderForHTML
def _import_OrderedDict():
import collections
......
This diff is collapsed.
......@@ -5,7 +5,8 @@
import sys
import struct
from .compat import fromhex, b, u, text_type, binary_type, PY3, unichr
from .scanner import make_scanner
from .scanner import make_scanner, JSONDecodeError
def _import_c_scanstring():
try:
from ._speedups import scanstring
......@@ -14,6 +15,8 @@
return None
c_scanstring = _import_c_scanstring()
# NOTE (3.1.0): JSONDecodeError may still be imported from this module for
# compatibility, but it was never in the __all__
__all__ = ['JSONDecoder']
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
......@@ -29,57 +32,6 @@
NaN, PosInf, NegInf = _floatconstants()
class JSONDecodeError(ValueError):
"""Subclass of ValueError with the following additional properties:
msg: The unformatted error message
doc: The JSON document being parsed
pos: The start index of doc where parsing failed
end: The end index of doc where parsing failed (may be None)
lineno: The line corresponding to pos
colno: The column corresponding to pos
endlineno: The line corresponding to end (may be None)
endcolno: The column corresponding to end (may be None)
"""
def __init__(self, msg, doc, pos, end=None):
ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
self.msg = msg
self.doc = doc
self.pos = pos
self.end = end
self.lineno, self.colno = linecol(doc, pos)
if end is not None:
self.endlineno, self.endcolno = linecol(doc, end)
else:
self.endlineno, self.endcolno = None, None
def linecol(doc, pos):
lineno = doc.count('\n', 0, pos) + 1
if lineno == 1:
colno = pos + 1
else:
colno = pos - doc.rindex('\n', 0, pos)
return lineno, colno
def errmsg(msg, doc, pos, end=None):
# Note that this function is called from _speedups
lineno, colno = linecol(doc, pos)
if end is None:
#fmt = '{0}: line {1} column {2} (char {3})'
#return fmt.format(msg, lineno, colno, pos)
fmt = '%s: line %d column %d (char %d)'
return fmt % (msg, lineno, colno, pos)
endlineno, endcolno = linecol(doc, end)
#fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
#return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
_CONSTANTS = {
'-Infinity': NegInf,
'Infinity': PosInf,
......@@ -128,8 +80,7 @@
break
elif terminator != '\\':
if strict:
msg = "Invalid control character %r at" % (terminator,)
#msg = "Invalid control character {0!r} at".format(terminator)
msg = "Invalid control character %r at"
raise JSONDecodeError(msg, s, end)
else:
_append(terminator)
......@@ -144,8 +95,8 @@
try:
char = _b[esc]
except KeyError:
msg = "Invalid \\escape: " + repr(esc)
msg = "Invalid \\X escape sequence %r"
raise JSONDecodeError(msg, s, end)
end += 1
else:
# Unicode escape sequence
......@@ -148,7 +99,8 @@
raise JSONDecodeError(msg, s, end)
end += 1
else:
# Unicode escape sequence
msg = "Invalid \\uXXXX escape sequence"
esc = s[end + 1:end + 5]
next_end = end + 5
if len(esc) != 4:
......@@ -152,8 +104,7 @@
esc = s[end + 1:end + 5]
next_end = end + 5
if len(esc) != 4:
msg = "Invalid \\uXXXX escape"
raise JSONDecodeError(msg, s, end)
try:
uni = int(esc, 16)
except ValueError:
......@@ -156,10 +107,9 @@
raise JSONDecodeError(msg, s, end)
try:
uni = int(esc, 16)
except ValueError:
msg = "Invalid \\uXXXX escape"
raise JSONDecodeError(msg, s, end)
# Check for surrogate pair on UCS-4 systems
if _maxunicode > 65535:
unimask = uni & 0xfc00
if unimask == 0xd800:
......@@ -161,9 +111,9 @@
raise JSONDecodeError(msg, s, end)
# Check for surrogate pair on UCS-4 systems
if _maxunicode > 65535:
unimask = uni & 0xfc00
if unimask == 0xd800:
msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
msg = "Unpaired high surrogate"
if not s[end + 5:end + 7] == '\\u':
raise JSONDecodeError(msg, s, end)
esc2 = s[end + 7:end + 11]
......@@ -174,7 +124,6 @@
except ValueError:
raise JSONDecodeError(msg, s, end)
if uni2 & 0xfc00 != 0xdc00:
msg = "Unpaired high surrogate"
raise JSONDecodeError(msg, s, end)
uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
next_end += 6
......@@ -246,5 +195,4 @@
except IndexError:
pass
try:
value, end = scan_once(s, end)
......@@ -250,6 +198,4 @@
value, end = scan_once(s, end)
except StopIteration:
raise JSONDecodeError("Expecting object", s, end)
pairs.append((key, value))
try:
......@@ -264,7 +210,7 @@
if nextchar == '}':
break
elif nextchar != ',':
raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1)
try:
nextchar = s[end]
......@@ -301,5 +247,7 @@
# Look-ahead for trivial empty array
if nextchar == ']':
return values, end + 1
elif nextchar == '':
raise JSONDecodeError("Expecting value or ']'", s, end)
_append = values.append
while True:
......@@ -304,4 +252,3 @@
_append = values.append
while True:
try:
value, end = scan_once(s, end)
......@@ -307,6 +254,4 @@
value, end = scan_once(s, end)
except StopIteration:
raise JSONDecodeError("Expecting object", s, end)
_append(value)
nextchar = s[end:end + 1]
if nextchar in _ws:
......@@ -316,7 +261,7 @@
if nextchar == ']':
break
elif nextchar != ',':
raise JSONDecodeError("Expecting ',' delimiter", s, end)
raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1)
try:
if s[end] in _ws:
......@@ -445,8 +390,4 @@
"""
if _PY3 and not isinstance(s, text_type):
raise TypeError("Input string must be text, not bytes")
try:
obj, end = self.scan_once(s, idx=_w(s, idx).end())
except StopIteration:
raise JSONDecodeError("No JSON object could be decoded", s, idx)
return obj, end
return self.scan_once(s, idx=_w(s, idx).end())
......@@ -9,9 +9,9 @@
return None
c_make_scanner = _import_c_make_scanner()
__all__ = ['make_scanner']
__all__ = ['make_scanner', 'JSONDecodeError']
NUMBER_RE = re.compile(
r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
(re.VERBOSE | re.MULTILINE | re.DOTALL))
......@@ -13,8 +13,55 @@
NUMBER_RE = re.compile(
r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
(re.VERBOSE | re.MULTILINE | re.DOTALL))
class JSONDecodeError(ValueError):
"""Subclass of ValueError with the following additional properties:
msg: The unformatted error message
doc: The JSON document being parsed
pos: The start index of doc where parsing failed
end: The end index of doc where parsing failed (may be None)
lineno: The line corresponding to pos
colno: The column corresponding to pos
endlineno: The line corresponding to end (may be None)
endcolno: The column corresponding to end (may be None)
"""
# Note that this exception is used from _speedups
def __init__(self, msg, doc, pos, end=None):
ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
self.msg = msg
self.doc = doc
self.pos = pos
self.end = end
self.lineno, self.colno = linecol(doc, pos)
if end is not None:
self.endlineno, self.endcolno = linecol(doc, end)
else:
self.endlineno, self.endcolno = None, None
def linecol(doc, pos):
lineno = doc.count('\n', 0, pos) + 1
if lineno == 1:
colno = pos + 1
else:
colno = pos - doc.rindex('\n', 0, pos)
return lineno, colno
def errmsg(msg, doc, pos, end=None):
lineno, colno = linecol(doc, pos)
msg = msg.replace('%r', repr(doc[pos:pos + 1]))
if end is None:
fmt = '%s: line %d column %d (char %d)'
return fmt % (msg, lineno, colno, pos)
endlineno, endcolno = linecol(doc, end)
fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
def py_make_scanner(context):
parse_object = context.parse_object
parse_array = context.parse_array
......@@ -30,6 +77,7 @@
memo = context.memo
def _scan_once(string, idx):
errmsg = 'Expecting value'
try:
nextchar = string[idx]
except IndexError:
......@@ -33,7 +81,7 @@
try:
nextchar = string[idx]
except IndexError:
raise StopIteration
raise JSONDecodeError(errmsg, string, idx)
if nextchar == '"':
return parse_string(string, idx + 1, encoding, strict)
......@@ -64,7 +112,7 @@
elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
return parse_constant('-Infinity'), idx + 9
else:
raise StopIteration
raise JSONDecodeError(errmsg, string, idx)
def scan_once(string, idx):
try:
......
......@@ -99,7 +99,6 @@
except json.JSONDecodeError:
pass
else:
#self.fail("Expected failure for fail{0}.json: {1!r}".format(idx, doc))
self.fail("Expected failure for fail%d.json: %r" % (idx, doc))
def test_array_decoder_issue46(self):
......@@ -117,3 +116,43 @@
self.fail("Unexpected exception raised %r %s" % (e, e))
else:
self.fail("Unexpected success parsing '[,]'")
def test_truncated_input(self):
test_cases = [
('', 'Expecting value', 0),
('[', "Expecting value or ']'", 1),
('[42', "Expecting ',' delimiter", 3),
('[42,', 'Expecting value', 4),
('["', 'Unterminated string starting at', 1),
('["spam', 'Unterminated string starting at', 1),
('["spam"', "Expecting ',' delimiter", 7),
('["spam",', 'Expecting value', 8),
('{', 'Expecting property name enclosed in double quotes', 1),
('{"', 'Unterminated string starting at', 1),
('{"spam', 'Unterminated string starting at', 1),
('{"spam"', "Expecting ':' delimiter", 7),
('{"spam":', 'Expecting value', 8),
('{"spam":42', "Expecting ',' delimiter", 10),
('{"spam":42,', 'Expecting property name enclosed in double quotes',
11),
('"', 'Unterminated string starting at', 0),
('"spam', 'Unterminated string starting at', 0),
('[,', "Expecting value", 1),
]
for data, msg, idx in test_cases:
try:
json.loads(data)
except json.JSONDecodeError:
e = sys.exc_info()[1]
self.assertEqual(
e.msg[:len(msg)],
msg,
"%r doesn't start with %r for %r" % (e.msg, msg, data))
self.assertEqual(
e.pos, idx,
"pos %r != %r for %r" % (e.pos, idx, data))
except Exception:
e = sys.exc_info()[1]
self.fail("Unexpected exception raised %r %s" % (e, e))
else:
self.fail("Unexpected success parsing '%r'" % (data,))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment