diff --git a/simplejson/decoder.py b/simplejson/decoder.py index 78543dd76290bfd36ae18700f314682b715b4ea1_c2ltcGxlanNvbi9kZWNvZGVyLnB5..376baf4b841e9b3909ffe036a5482ac769dacd99_c2ltcGxlanNvbi9kZWNvZGVyLnB5 100644 --- a/simplejson/decoder.py +++ b/simplejson/decoder.py @@ -4,7 +4,7 @@ import re import sys -from simplejson.scanner import Scanner, pattern +from simplejson.scanner import make_scanner, pattern try: from simplejson._speedups import scanstring as c_scanstring except ImportError: @@ -58,8 +58,8 @@ rval = c[s] else: rval = fn(s) - return rval, None -pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) + return rval, match.end() +pattern(r'(-?Infinity|NaN|true|false|null)')(JSONConstant) def JSONNumber(match, context): @@ -63,11 +63,12 @@ def JSONNumber(match, context): - match = JSONNumber.regex.match(match.string, *match.span()) - integer, frac, exp = match.groups() + # m1 = JSONNumber.regex.match(match.string, *match.span()) + # assert m1.groups()[:3] == match.groups()[:3] + integer, frac, exp = match.groups()[:3] if frac or exp: fn = getattr(context, 'parse_float', None) or float res = fn(integer + (frac or '') + (exp or '')) else: fn = getattr(context, 'parse_int', None) or int res = fn(integer) @@ -68,10 +69,10 @@ if frac or exp: fn = getattr(context, 'parse_float', None) or float res = fn(integer + (frac or '') + (exp or '')) else: fn = getattr(context, 'parse_int', None) or int res = fn(integer) - return res, None + return res, match.end() pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) @@ -149,6 +150,6 @@ # Use speedup if available scanstring = c_scanstring or py_scanstring -def JSONString(match, context): +def JSONString((string, end), context): encoding = getattr(context, 'encoding', None) strict = getattr(context, 'strict', True) @@ -153,6 +154,6 @@ encoding = getattr(context, 'encoding', None) strict = getattr(context, 'strict', True) - return scanstring(match.string, match.end(), encoding, strict) + return scanstring(string, end, encoding, strict) pattern(r'"')(JSONString) @@ -156,5 +157,6 @@ pattern(r'"')(JSONString) -WHITESPACE = re.compile(r'\s*', FLAGS) +WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) +WHITESPACE_STR = ' \t\n\r' @@ -160,3 +162,3 @@ -def JSONObject(match, context, _w=WHITESPACE.match): +def JSONObject((s, end), context, _w=WHITESPACE.match, _ws=WHITESPACE_STR): pairs = {} @@ -162,4 +164,2 @@ pairs = {} - s = match.string - end = _w(s, match.end()).end() nextchar = s[end:end + 1] @@ -165,5 +165,3 @@ nextchar = s[end:end + 1] - # Trivial empty object - if nextchar == '}': - return pairs, end + 1 + # Normally we expect nextchar == '"' if nextchar != '"': @@ -169,5 +167,12 @@ if nextchar != '"': - raise ValueError(errmsg("Expecting property name", s, end)) + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] + # Trivial empty object + if nextchar == '}': + return pairs, end + 1 + elif nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end)) end += 1 encoding = getattr(context, 'encoding', None) strict = getattr(context, 'strict', True) @@ -171,6 +176,6 @@ end += 1 encoding = getattr(context, 'encoding', None) strict = getattr(context, 'strict', True) - iterscan = JSONScanner.iterscan + scan_once = JSONScanner while True: key, end = scanstring(s, end, encoding, strict) @@ -175,4 +180,6 @@ while True: key, end = scanstring(s, end, encoding, strict) - end = _w(s, end).end() + + # To skip some function call overhead we optimize the fast paths where + # the JSON key separator is ": " or just ":". if s[end:end + 1] != ':': @@ -178,4 +185,8 @@ if s[end:end + 1] != ':': - raise ValueError(errmsg("Expecting : delimiter", s, end)) - end = _w(s, end + 1).end() + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise ValueError(errmsg("Expecting : delimiter", s, end)) + + end += 1 + try: @@ -181,5 +192,13 @@ try: - value, end = iterscan(s, idx=end, context=context).next() + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end).end() + except IndexError: + pass + + try: + value, end = scan_once(s, end, context) except StopIteration: raise ValueError(errmsg("Expecting object", s, end)) pairs[key] = value @@ -183,5 +202,4 @@ except StopIteration: raise ValueError(errmsg("Expecting object", s, end)) pairs[key] = value - end = _w(s, end).end() nextchar = s[end:end + 1] @@ -187,2 +205,5 @@ nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] end += 1 @@ -188,3 +209,4 @@ end += 1 + if nextchar == '}': break @@ -189,4 +211,4 @@ if nextchar == '}': break - if nextchar != ',': + elif nextchar != ',': raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) @@ -192,6 +214,14 @@ raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) - end = _w(s, end).end() + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end).end() + except IndexError: + pass + nextchar = s[end:end + 1] end += 1 if nextchar != '"': raise ValueError(errmsg("Expecting property name", s, end - 1)) @@ -194,7 +224,8 @@ nextchar = s[end:end + 1] end += 1 if nextchar != '"': raise ValueError(errmsg("Expecting property name", s, end - 1)) + object_hook = getattr(context, 'object_hook', None) if object_hook is not None: pairs = object_hook(pairs) @@ -202,5 +233,5 @@ pattern(r'{')(JSONObject) -def JSONArray(match, context, _w=WHITESPACE.match): +def JSONArray((s, end), context, _w=WHITESPACE.match, _ws=WHITESPACE_STR): values = [] @@ -206,4 +237,6 @@ values = [] - s = match.string - end = _w(s, match.end()).end() + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] # Look-ahead for trivial empty array @@ -209,4 +242,3 @@ # Look-ahead for trivial empty array - nextchar = s[end:end + 1] if nextchar == ']': return values, end + 1 @@ -211,5 +243,5 @@ if nextchar == ']': return values, end + 1 - iterscan = JSONScanner.iterscan + scan_once = JSONScanner while True: try: @@ -214,6 +246,6 @@ while True: try: - value, end = iterscan(s, idx=end, context=context).next() + value, end = scan_once(s, end, context) except StopIteration: raise ValueError(errmsg("Expecting object", s, end)) values.append(value) @@ -217,5 +249,4 @@ except StopIteration: raise ValueError(errmsg("Expecting object", s, end)) values.append(value) - end = _w(s, end).end() nextchar = s[end:end + 1] @@ -221,6 +252,9 @@ nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] end += 1 if nextchar == ']': break if nextchar != ',': raise ValueError(errmsg("Expecting , delimiter", s, end)) @@ -222,9 +256,17 @@ end += 1 if nextchar == ']': break if nextchar != ',': raise ValueError(errmsg("Expecting , delimiter", s, end)) - end = _w(s, end).end() + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end).end() + except IndexError: + pass + return values, end pattern(r'\[')(JSONArray) @@ -237,7 +279,7 @@ JSONNumber, ] -JSONScanner = Scanner(ANYTHING) +JSONScanner = make_scanner(ANYTHING) class JSONDecoder(object): @@ -270,7 +312,6 @@ their corresponding ``float`` values, which is outside the JSON spec. """ - _scanner = Scanner(ANYTHING) __all__ = ['__init__', 'decode', 'raw_decode'] def __init__(self, encoding=None, object_hook=None, parse_float=None, @@ -330,5 +371,6 @@ This can be used to decode a JSON document from a string that may have extraneous data at the end. """ - kw.setdefault('context', self) + idx = kw.get('idx', 0) + context = kw.get('context', self) try: @@ -334,5 +376,5 @@ try: - obj, end = self._scanner.iterscan(s, **kw).next() + obj, end = JSONScanner(s, idx, context) except StopIteration: raise ValueError("No JSON object could be decoded") return obj, end diff --git a/simplejson/scanner.py b/simplejson/scanner.py index 78543dd76290bfd36ae18700f314682b715b4ea1_c2ltcGxlanNvbi9zY2FubmVyLnB5..376baf4b841e9b3909ffe036a5482ac769dacd99_c2ltcGxlanNvbi9zY2FubmVyLnB5 100644 --- a/simplejson/scanner.py +++ b/simplejson/scanner.py @@ -8,7 +8,7 @@ import sre_constants from sre_constants import BRANCH, SUBPATTERN -__all__ = ['Scanner', 'pattern'] +__all__ = ['make_scanner', 'pattern'] FLAGS = (VERBOSE | MULTILINE | DOTALL) @@ -12,24 +12,24 @@ FLAGS = (VERBOSE | MULTILINE | DOTALL) -class Scanner(object): - def __init__(self, lexicon, flags=FLAGS): - self.actions = [None] - # Combine phrases into a compound pattern - s = sre_parse.Pattern() - s.flags = flags - p = [] - for idx, token in enumerate(lexicon): - phrase = token.pattern - try: - subpattern = sre_parse.SubPattern(s, - [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))]) - except sre_constants.error: - raise - p.append(subpattern) - self.actions.append(token) - - s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work - p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) - self.scanner = sre_compile.compile(p) +def make_scanner(lexicon, flags=FLAGS): + actions = [None] + # Combine phrases into a compound pattern + s = sre_parse.Pattern() + s.flags = flags + charpatterns = {} + p = [] + idx = 0 + for token in lexicon: + if token.pattern in (r'\[', r'{', r'"'): + charpatterns[token.pattern[-1]] = token + idx += 1 + phrase = token.pattern + try: + subpattern = sre_parse.SubPattern(s, + [(SUBPATTERN, (idx, sre_parse.parse(phrase, flags)))]) + except sre_constants.error: + raise + p.append(subpattern) + actions.append(token) @@ -35,26 +35,5 @@ - def iterscan(self, string, idx=0, context=None): - """ - Yield match, end_idx for each match - """ - match = self.scanner.scanner(string, idx).match - actions = self.actions - lastend = idx - end = len(string) - while True: - m = match() - if m is None: - break - matchbegin, matchend = m.span() - if lastend == matchend: - break - action = actions[m.lastindex] - if action is not None: - rval, next_pos = action(m, context) - if next_pos is not None and next_pos != matchend: - # "fast forward" the scanner - matchend = next_pos - match = self.scanner.scanner(string, matchend).match - yield rval, matchend - lastend = matchend + s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work + p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) + scanner = sre_compile.compile(p).scanner @@ -60,4 +39,20 @@ + def _scan_once(string, idx=0, context=None): + try: + action = charpatterns[string[idx]] + except KeyError: + pass + except IndexError: + raise StopIteration + else: + return action((string, idx + 1), context) + + m = scanner(string, idx).match() + if m is None or m.end() == idx: + raise StopIteration + return actions[m.lastindex](m, context) + + return _scan_once def pattern(pattern, flags=FLAGS): def decorator(fn): diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py index 78543dd76290bfd36ae18700f314682b715b4ea1_c2ltcGxlanNvbi90ZXN0cy90ZXN0X2RlY29kZS5weQ==..376baf4b841e9b3909ffe036a5482ac769dacd99_c2ltcGxlanNvbi90ZXN0cy90ZXN0X2RlY29kZS5weQ== 100644 --- a/simplejson/tests/test_decode.py +++ b/simplejson/tests/test_decode.py @@ -13,3 +13,10 @@ rval = S.loads('1', parse_int=float) self.assert_(isinstance(rval, float)) self.assertEquals(rval, 1.0) + + def test_decoder_optimizations(self): + # Several optimizations were made that skip over calls to + # the whitespace regex, so this test is designed to try and + # exercise the uncommon cases. The array cases are already covered. + rval = S.loads('{ "key" : "value" , "k":"v" }') + self.assertEquals(rval, {"key":"value", "k":"v"})