Skip to content
Snippets Groups Projects
Commit 376baf4b841e authored by Bob Ippolito's avatar Bob Ippolito
Browse files

ugly hacks to decoder to avoid dispatch for roughly 2x speedup

git-svn-id: http://simplejson.googlecode.com/svn/trunk@100 a4795897-2c25-0410-b006-0d3caba88fa1
parent 78543dd76290
No related branches found
No related tags found
No related merge requests found
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
import re import re
import sys import sys
from simplejson.scanner import Scanner, pattern from simplejson.scanner import make_scanner, pattern
try: try:
from simplejson._speedups import scanstring as c_scanstring from simplejson._speedups import scanstring as c_scanstring
except ImportError: except ImportError:
...@@ -58,8 +58,8 @@ ...@@ -58,8 +58,8 @@
rval = c[s] rval = c[s]
else: else:
rval = fn(s) rval = fn(s)
return rval, None return rval, match.end()
pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) pattern(r'(-?Infinity|NaN|true|false|null)')(JSONConstant)
def JSONNumber(match, context): def JSONNumber(match, context):
...@@ -63,11 +63,12 @@ ...@@ -63,11 +63,12 @@
def JSONNumber(match, context): def JSONNumber(match, context):
match = JSONNumber.regex.match(match.string, *match.span()) # m1 = JSONNumber.regex.match(match.string, *match.span())
integer, frac, exp = match.groups() # assert m1.groups()[:3] == match.groups()[:3]
integer, frac, exp = match.groups()[:3]
if frac or exp: if frac or exp:
fn = getattr(context, 'parse_float', None) or float fn = getattr(context, 'parse_float', None) or float
res = fn(integer + (frac or '') + (exp or '')) res = fn(integer + (frac or '') + (exp or ''))
else: else:
fn = getattr(context, 'parse_int', None) or int fn = getattr(context, 'parse_int', None) or int
res = fn(integer) res = fn(integer)
...@@ -68,10 +69,10 @@ ...@@ -68,10 +69,10 @@
if frac or exp: if frac or exp:
fn = getattr(context, 'parse_float', None) or float fn = getattr(context, 'parse_float', None) or float
res = fn(integer + (frac or '') + (exp or '')) res = fn(integer + (frac or '') + (exp or ''))
else: else:
fn = getattr(context, 'parse_int', None) or int fn = getattr(context, 'parse_int', None) or int
res = fn(integer) res = fn(integer)
return res, None return res, match.end()
pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)
...@@ -149,6 +150,6 @@ ...@@ -149,6 +150,6 @@
# Use speedup if available # Use speedup if available
scanstring = c_scanstring or py_scanstring scanstring = c_scanstring or py_scanstring
def JSONString(match, context): def JSONString((string, end), context):
encoding = getattr(context, 'encoding', None) encoding = getattr(context, 'encoding', None)
strict = getattr(context, 'strict', True) strict = getattr(context, 'strict', True)
...@@ -153,6 +154,6 @@ ...@@ -153,6 +154,6 @@
encoding = getattr(context, 'encoding', None) encoding = getattr(context, 'encoding', None)
strict = getattr(context, 'strict', True) strict = getattr(context, 'strict', True)
return scanstring(match.string, match.end(), encoding, strict) return scanstring(string, end, encoding, strict)
pattern(r'"')(JSONString) pattern(r'"')(JSONString)
...@@ -156,5 +157,6 @@ ...@@ -156,5 +157,6 @@
pattern(r'"')(JSONString) pattern(r'"')(JSONString)
WHITESPACE = re.compile(r'\s*', FLAGS) WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
WHITESPACE_STR = ' \t\n\r'
...@@ -160,3 +162,3 @@ ...@@ -160,3 +162,3 @@
def JSONObject(match, context, _w=WHITESPACE.match): def JSONObject((s, end), context, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
pairs = {} pairs = {}
...@@ -162,7 +164,10 @@ ...@@ -162,7 +164,10 @@
pairs = {} pairs = {}
s = match.string nextchar = s[end:end + 1]
end = _w(s, match.end()).end() # Normally we expect nextchar == '"'
if nextchar != '"':
if nextchar in _ws:
end = _w(s, end).end()
nextchar = s[end:end + 1] nextchar = s[end:end + 1]
# Trivial empty object # Trivial empty object
if nextchar == '}': if nextchar == '}':
return pairs, end + 1 return pairs, end + 1
...@@ -165,9 +170,9 @@ ...@@ -165,9 +170,9 @@
nextchar = s[end:end + 1] nextchar = s[end:end + 1]
# Trivial empty object # Trivial empty object
if nextchar == '}': if nextchar == '}':
return pairs, end + 1 return pairs, end + 1
if nextchar != '"': elif nextchar != '"':
raise ValueError(errmsg("Expecting property name", s, end)) raise ValueError(errmsg("Expecting property name", s, end))
end += 1 end += 1
encoding = getattr(context, 'encoding', None) encoding = getattr(context, 'encoding', None)
strict = getattr(context, 'strict', True) strict = getattr(context, 'strict', True)
...@@ -170,7 +175,7 @@ ...@@ -170,7 +175,7 @@
raise ValueError(errmsg("Expecting property name", s, end)) raise ValueError(errmsg("Expecting property name", s, end))
end += 1 end += 1
encoding = getattr(context, 'encoding', None) encoding = getattr(context, 'encoding', None)
strict = getattr(context, 'strict', True) strict = getattr(context, 'strict', True)
iterscan = JSONScanner.iterscan scan_once = JSONScanner
while True: while True:
key, end = scanstring(s, end, encoding, strict) key, end = scanstring(s, end, encoding, strict)
...@@ -175,5 +180,9 @@ ...@@ -175,5 +180,9 @@
while True: while True:
key, end = scanstring(s, end, encoding, strict) key, end = scanstring(s, end, encoding, strict)
# To skip some function call overhead we optimize the fast paths where
# the JSON key separator is ": " or just ":".
if s[end:end + 1] != ':':
end = _w(s, end).end() end = _w(s, end).end()
if s[end:end + 1] != ':': if s[end:end + 1] != ':':
raise ValueError(errmsg("Expecting : delimiter", s, end)) raise ValueError(errmsg("Expecting : delimiter", s, end))
...@@ -177,5 +186,7 @@ ...@@ -177,5 +186,7 @@
end = _w(s, end).end() end = _w(s, end).end()
if s[end:end + 1] != ':': if s[end:end + 1] != ':':
raise ValueError(errmsg("Expecting : delimiter", s, end)) raise ValueError(errmsg("Expecting : delimiter", s, end))
end = _w(s, end + 1).end()
end += 1
try: try:
...@@ -181,5 +192,13 @@ ...@@ -181,5 +192,13 @@
try: try:
value, end = iterscan(s, idx=end, context=context).next() if s[end] in _ws:
end += 1
if s[end] in _ws:
end = _w(s, end).end()
except IndexError:
pass
try:
value, end = scan_once(s, end, context)
except StopIteration: except StopIteration:
raise ValueError(errmsg("Expecting object", s, end)) raise ValueError(errmsg("Expecting object", s, end))
pairs[key] = value pairs[key] = value
...@@ -183,6 +202,8 @@ ...@@ -183,6 +202,8 @@
except StopIteration: except StopIteration:
raise ValueError(errmsg("Expecting object", s, end)) raise ValueError(errmsg("Expecting object", s, end))
pairs[key] = value pairs[key] = value
nextchar = s[end:end + 1]
if nextchar in _ws:
end = _w(s, end).end() end = _w(s, end).end()
nextchar = s[end:end + 1] nextchar = s[end:end + 1]
end += 1 end += 1
...@@ -186,5 +207,6 @@ ...@@ -186,5 +207,6 @@
end = _w(s, end).end() end = _w(s, end).end()
nextchar = s[end:end + 1] nextchar = s[end:end + 1]
end += 1 end += 1
if nextchar == '}': if nextchar == '}':
break break
...@@ -189,4 +211,4 @@ ...@@ -189,4 +211,4 @@
if nextchar == '}': if nextchar == '}':
break break
if nextchar != ',': elif nextchar != ',':
raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
...@@ -192,2 +214,7 @@ ...@@ -192,2 +214,7 @@
raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
try:
if s[end] in _ws:
end += 1
if s[end] in _ws:
end = _w(s, end).end() end = _w(s, end).end()
...@@ -193,5 +220,8 @@ ...@@ -193,5 +220,8 @@
end = _w(s, end).end() end = _w(s, end).end()
except IndexError:
pass
nextchar = s[end:end + 1] nextchar = s[end:end + 1]
end += 1 end += 1
if nextchar != '"': if nextchar != '"':
raise ValueError(errmsg("Expecting property name", s, end - 1)) raise ValueError(errmsg("Expecting property name", s, end - 1))
...@@ -194,7 +224,8 @@ ...@@ -194,7 +224,8 @@
nextchar = s[end:end + 1] nextchar = s[end:end + 1]
end += 1 end += 1
if nextchar != '"': if nextchar != '"':
raise ValueError(errmsg("Expecting property name", s, end - 1)) raise ValueError(errmsg("Expecting property name", s, end - 1))
object_hook = getattr(context, 'object_hook', None) object_hook = getattr(context, 'object_hook', None)
if object_hook is not None: if object_hook is not None:
pairs = object_hook(pairs) pairs = object_hook(pairs)
...@@ -202,5 +233,5 @@ ...@@ -202,5 +233,5 @@
pattern(r'{')(JSONObject) pattern(r'{')(JSONObject)
def JSONArray(match, context, _w=WHITESPACE.match): def JSONArray((s, end), context, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
values = [] values = []
...@@ -206,4 +237,6 @@ ...@@ -206,4 +237,6 @@
values = [] values = []
s = match.string nextchar = s[end:end + 1]
end = _w(s, match.end()).end() if nextchar in _ws:
end = _w(s, end).end()
nextchar = s[end:end + 1]
# Look-ahead for trivial empty array # Look-ahead for trivial empty array
...@@ -209,4 +242,3 @@ ...@@ -209,4 +242,3 @@
# Look-ahead for trivial empty array # Look-ahead for trivial empty array
nextchar = s[end:end + 1]
if nextchar == ']': if nextchar == ']':
return values, end + 1 return values, end + 1
...@@ -211,5 +243,5 @@ ...@@ -211,5 +243,5 @@
if nextchar == ']': if nextchar == ']':
return values, end + 1 return values, end + 1
iterscan = JSONScanner.iterscan scan_once = JSONScanner
while True: while True:
try: try:
...@@ -214,6 +246,6 @@ ...@@ -214,6 +246,6 @@
while True: while True:
try: try:
value, end = iterscan(s, idx=end, context=context).next() value, end = scan_once(s, end, context)
except StopIteration: except StopIteration:
raise ValueError(errmsg("Expecting object", s, end)) raise ValueError(errmsg("Expecting object", s, end))
values.append(value) values.append(value)
...@@ -217,6 +249,8 @@ ...@@ -217,6 +249,8 @@
except StopIteration: except StopIteration:
raise ValueError(errmsg("Expecting object", s, end)) raise ValueError(errmsg("Expecting object", s, end))
values.append(value) values.append(value)
nextchar = s[end:end + 1]
if nextchar in _ws:
end = _w(s, end).end() end = _w(s, end).end()
nextchar = s[end:end + 1] nextchar = s[end:end + 1]
end += 1 end += 1
...@@ -224,4 +258,9 @@ ...@@ -224,4 +258,9 @@
break break
if nextchar != ',': if nextchar != ',':
raise ValueError(errmsg("Expecting , delimiter", s, end)) raise ValueError(errmsg("Expecting , delimiter", s, end))
try:
if s[end] in _ws:
end += 1
if s[end] in _ws:
end = _w(s, end).end() end = _w(s, end).end()
...@@ -227,4 +266,7 @@ ...@@ -227,4 +266,7 @@
end = _w(s, end).end() end = _w(s, end).end()
except IndexError:
pass
return values, end return values, end
pattern(r'\[')(JSONArray) pattern(r'\[')(JSONArray)
...@@ -237,7 +279,7 @@ ...@@ -237,7 +279,7 @@
JSONNumber, JSONNumber,
] ]
JSONScanner = Scanner(ANYTHING) JSONScanner = make_scanner(ANYTHING)
class JSONDecoder(object): class JSONDecoder(object):
...@@ -270,7 +312,6 @@ ...@@ -270,7 +312,6 @@
their corresponding ``float`` values, which is outside the JSON spec. their corresponding ``float`` values, which is outside the JSON spec.
""" """
_scanner = Scanner(ANYTHING)
__all__ = ['__init__', 'decode', 'raw_decode'] __all__ = ['__init__', 'decode', 'raw_decode']
def __init__(self, encoding=None, object_hook=None, parse_float=None, def __init__(self, encoding=None, object_hook=None, parse_float=None,
...@@ -330,5 +371,6 @@ ...@@ -330,5 +371,6 @@
This can be used to decode a JSON document from a string that may This can be used to decode a JSON document from a string that may
have extraneous data at the end. have extraneous data at the end.
""" """
kw.setdefault('context', self) idx = kw.get('idx', 0)
context = kw.get('context', self)
try: try:
...@@ -334,5 +376,5 @@ ...@@ -334,5 +376,5 @@
try: try:
obj, end = self._scanner.iterscan(s, **kw).next() obj, end = JSONScanner(s, idx, context)
except StopIteration: except StopIteration:
raise ValueError("No JSON object could be decoded") raise ValueError("No JSON object could be decoded")
return obj, end return obj, end
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
import sre_constants import sre_constants
from sre_constants import BRANCH, SUBPATTERN from sre_constants import BRANCH, SUBPATTERN
__all__ = ['Scanner', 'pattern'] __all__ = ['make_scanner', 'pattern']
FLAGS = (VERBOSE | MULTILINE | DOTALL) FLAGS = (VERBOSE | MULTILINE | DOTALL)
...@@ -12,9 +12,8 @@ ...@@ -12,9 +12,8 @@
FLAGS = (VERBOSE | MULTILINE | DOTALL) FLAGS = (VERBOSE | MULTILINE | DOTALL)
class Scanner(object): def make_scanner(lexicon, flags=FLAGS):
def __init__(self, lexicon, flags=FLAGS): actions = [None]
self.actions = [None]
# Combine phrases into a compound pattern # Combine phrases into a compound pattern
s = sre_parse.Pattern() s = sre_parse.Pattern()
s.flags = flags s.flags = flags
...@@ -18,4 +17,5 @@ ...@@ -18,4 +17,5 @@
# Combine phrases into a compound pattern # Combine phrases into a compound pattern
s = sre_parse.Pattern() s = sre_parse.Pattern()
s.flags = flags s.flags = flags
charpatterns = {}
p = [] p = []
...@@ -21,5 +21,9 @@ ...@@ -21,5 +21,9 @@
p = [] p = []
for idx, token in enumerate(lexicon): idx = 0
for token in lexicon:
if token.pattern in (r'\[', r'{', r'"'):
charpatterns[token.pattern[-1]] = token
idx += 1
phrase = token.pattern phrase = token.pattern
try: try:
subpattern = sre_parse.SubPattern(s, subpattern = sre_parse.SubPattern(s,
...@@ -23,7 +27,7 @@ ...@@ -23,7 +27,7 @@
phrase = token.pattern phrase = token.pattern
try: try:
subpattern = sre_parse.SubPattern(s, subpattern = sre_parse.SubPattern(s,
[(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))]) [(SUBPATTERN, (idx, sre_parse.parse(phrase, flags)))])
except sre_constants.error: except sre_constants.error:
raise raise
p.append(subpattern) p.append(subpattern)
...@@ -27,7 +31,7 @@ ...@@ -27,7 +31,7 @@
except sre_constants.error: except sre_constants.error:
raise raise
p.append(subpattern) p.append(subpattern)
self.actions.append(token) actions.append(token)
s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
...@@ -31,5 +35,5 @@ ...@@ -31,5 +35,5 @@
s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
self.scanner = sre_compile.compile(p) scanner = sre_compile.compile(p).scanner
...@@ -35,26 +39,11 @@ ...@@ -35,26 +39,11 @@
def iterscan(self, string, idx=0, context=None): def _scan_once(string, idx=0, context=None):
""" try:
Yield match, end_idx for each match action = charpatterns[string[idx]]
""" except KeyError:
match = self.scanner.scanner(string, idx).match pass
actions = self.actions except IndexError:
lastend = idx raise StopIteration
end = len(string) else:
while True: return action((string, idx + 1), context)
m = match()
if m is None:
break
matchbegin, matchend = m.span()
if lastend == matchend:
break
action = actions[m.lastindex]
if action is not None:
rval, next_pos = action(m, context)
if next_pos is not None and next_pos != matchend:
# "fast forward" the scanner
matchend = next_pos
match = self.scanner.scanner(string, matchend).match
yield rval, matchend
lastend = matchend
...@@ -60,4 +49,10 @@ ...@@ -60,4 +49,10 @@
m = scanner(string, idx).match()
if m is None or m.end() == idx:
raise StopIteration
return actions[m.lastindex](m, context)
return _scan_once
def pattern(pattern, flags=FLAGS): def pattern(pattern, flags=FLAGS):
def decorator(fn): def decorator(fn):
......
...@@ -13,3 +13,10 @@ ...@@ -13,3 +13,10 @@
rval = S.loads('1', parse_int=float) rval = S.loads('1', parse_int=float)
self.assert_(isinstance(rval, float)) self.assert_(isinstance(rval, float))
self.assertEquals(rval, 1.0) self.assertEquals(rval, 1.0)
def test_decoder_optimizations(self):
# Several optimizations were made that skip over calls to
# the whitespace regex, so this test is designed to try and
# exercise the uncommon cases. The array cases are already covered.
rval = S.loads('{ "key" : "value" , "k":"v" }')
self.assertEquals(rval, {"key":"value", "k":"v"})
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment