# HG changeset patch
# User Bob Ippolito <bob@redivi.com>
# Date 1222137286 0
#      Tue Sep 23 02:34:46 2008 +0000
# Node ID 376baf4b841e9b3909ffe036a5482ac769dacd99
# Parent  78543dd76290bfd36ae18700f314682b715b4ea1
ugly hacks to decoder to avoid dispatch for roughly 2x speedup

git-svn-id: http://simplejson.googlecode.com/svn/trunk@100 a4795897-2c25-0410-b006-0d3caba88fa1

diff --git a/simplejson/decoder.py b/simplejson/decoder.py
--- a/simplejson/decoder.py
+++ b/simplejson/decoder.py
@@ -4,7 +4,7 @@
 import re
 import sys
 
-from simplejson.scanner import Scanner, pattern
+from simplejson.scanner import make_scanner, pattern
 try:
     from simplejson._speedups import scanstring as c_scanstring
 except ImportError:
@@ -58,20 +58,21 @@
         rval = c[s]
     else:
         rval = fn(s)
-    return rval, None
-pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant)
+    return rval, match.end()
+pattern(r'(-?Infinity|NaN|true|false|null)')(JSONConstant)
 
 
 def JSONNumber(match, context):
-    match = JSONNumber.regex.match(match.string, *match.span())
-    integer, frac, exp = match.groups()
+    # m1 = JSONNumber.regex.match(match.string, *match.span())
+    # assert m1.groups()[:3] == match.groups()[:3]
+    integer, frac, exp = match.groups()[:3]
     if frac or exp:
         fn = getattr(context, 'parse_float', None) or float
         res = fn(integer + (frac or '') + (exp or ''))
     else:
         fn = getattr(context, 'parse_int', None) or int
         res = fn(integer)
-    return res, None
+    return res, match.end()
 pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)
 
 
@@ -149,52 +150,82 @@
 # Use speedup if available
 scanstring = c_scanstring or py_scanstring
 
-def JSONString(match, context):
+def JSONString((string, end), context):
     encoding = getattr(context, 'encoding', None)
     strict = getattr(context, 'strict', True)
-    return scanstring(match.string, match.end(), encoding, strict)
+    return scanstring(string, end, encoding, strict)
 pattern(r'"')(JSONString)
 
 
-WHITESPACE = re.compile(r'\s*', FLAGS)
+WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
+WHITESPACE_STR = ' \t\n\r'
 
-def JSONObject(match, context, _w=WHITESPACE.match):
+def JSONObject((s, end), context, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
     pairs = {}
-    s = match.string
-    end = _w(s, match.end()).end()
     nextchar = s[end:end + 1]
-    # Trivial empty object
-    if nextchar == '}':
-        return pairs, end + 1
+    # Normally we expect nextchar == '"'
     if nextchar != '"':
-        raise ValueError(errmsg("Expecting property name", s, end))
+        if nextchar in _ws:
+            end = _w(s, end).end()
+            nextchar = s[end:end + 1]
+        # Trivial empty object
+        if nextchar == '}':
+            return pairs, end + 1
+        elif nextchar != '"':
+            raise ValueError(errmsg("Expecting property name", s, end))
     end += 1
     encoding = getattr(context, 'encoding', None)
     strict = getattr(context, 'strict', True)
-    iterscan = JSONScanner.iterscan
+    scan_once = JSONScanner
     while True:
         key, end = scanstring(s, end, encoding, strict)
-        end = _w(s, end).end()
+
+        # To skip some function call overhead we optimize the fast paths where
+        # the JSON key separator is ": " or just ":".
         if s[end:end + 1] != ':':
-            raise ValueError(errmsg("Expecting : delimiter", s, end))
-        end = _w(s, end + 1).end()
+            end = _w(s, end).end()
+            if s[end:end + 1] != ':':
+                raise ValueError(errmsg("Expecting : delimiter", s, end))
+
+        end += 1
+
         try:
-            value, end = iterscan(s, idx=end, context=context).next()
+            if s[end] in _ws:
+                end += 1
+                if s[end] in _ws:
+                    end = _w(s, end).end()
+        except IndexError:
+            pass
+
+        try:
+            value, end = scan_once(s, end, context)
         except StopIteration:
             raise ValueError(errmsg("Expecting object", s, end))
         pairs[key] = value
-        end = _w(s, end).end()
         nextchar = s[end:end + 1]
+        if nextchar in _ws:
+            end = _w(s, end).end()
+            nextchar = s[end:end + 1]
         end += 1
+
         if nextchar == '}':
             break
-        if nextchar != ',':
+        elif nextchar != ',':
             raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
-        end = _w(s, end).end()
+
+        try:
+            if s[end] in _ws:
+                end += 1
+                if s[end] in _ws:
+                    end = _w(s, end).end()
+        except IndexError:
+            pass
+
         nextchar = s[end:end + 1]
         end += 1
         if nextchar != '"':
             raise ValueError(errmsg("Expecting property name", s, end - 1))
+
     object_hook = getattr(context, 'object_hook', None)
     if object_hook is not None:
         pairs = object_hook(pairs)
@@ -202,29 +233,40 @@
 pattern(r'{')(JSONObject)
 
 
-def JSONArray(match, context, _w=WHITESPACE.match):
+def JSONArray((s, end), context, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
     values = []
-    s = match.string
-    end = _w(s, match.end()).end()
+    nextchar = s[end:end + 1]
+    if nextchar in _ws:
+        end = _w(s, end).end()
+        nextchar = s[end:end + 1]
     # Look-ahead for trivial empty array
-    nextchar = s[end:end + 1]
     if nextchar == ']':
         return values, end + 1
-    iterscan = JSONScanner.iterscan
+    scan_once = JSONScanner
     while True:
         try:
-            value, end = iterscan(s, idx=end, context=context).next()
+            value, end = scan_once(s, end, context)
         except StopIteration:
             raise ValueError(errmsg("Expecting object", s, end))
         values.append(value)
-        end = _w(s, end).end()
         nextchar = s[end:end + 1]
+        if nextchar in _ws:
+            end = _w(s, end).end()
+            nextchar = s[end:end + 1]
         end += 1
         if nextchar == ']':
             break
         if nextchar != ',':
             raise ValueError(errmsg("Expecting , delimiter", s, end))
-        end = _w(s, end).end()
+        
+        try:
+            if s[end] in _ws:
+                end += 1
+                if s[end] in _ws:
+                    end = _w(s, end).end()
+        except IndexError:
+            pass
+
     return values, end
 pattern(r'\[')(JSONArray)
 
@@ -237,7 +279,7 @@
     JSONNumber,
 ]
 
-JSONScanner = Scanner(ANYTHING)
+JSONScanner = make_scanner(ANYTHING)
 
 
 class JSONDecoder(object):
@@ -270,7 +312,6 @@
     their corresponding ``float`` values, which is outside the JSON spec.
     """
 
-    _scanner = Scanner(ANYTHING)
     __all__ = ['__init__', 'decode', 'raw_decode']
 
     def __init__(self, encoding=None, object_hook=None, parse_float=None,
@@ -330,9 +371,10 @@
         This can be used to decode a JSON document from a string that may
         have extraneous data at the end.
         """
-        kw.setdefault('context', self)
+        idx = kw.get('idx', 0)
+        context = kw.get('context', self)
         try:
-            obj, end = self._scanner.iterscan(s, **kw).next()
+            obj, end = JSONScanner(s, idx, context)
         except StopIteration:
             raise ValueError("No JSON object could be decoded")
         return obj, end
diff --git a/simplejson/scanner.py b/simplejson/scanner.py
--- a/simplejson/scanner.py
+++ b/simplejson/scanner.py
@@ -8,56 +8,51 @@
 import sre_constants
 from sre_constants import BRANCH, SUBPATTERN
 
-__all__ = ['Scanner', 'pattern']
+__all__ = ['make_scanner', 'pattern']
 
 FLAGS = (VERBOSE | MULTILINE | DOTALL)
 
-class Scanner(object):
-    def __init__(self, lexicon, flags=FLAGS):
-        self.actions = [None]
-        # Combine phrases into a compound pattern
-        s = sre_parse.Pattern()
-        s.flags = flags
-        p = []
-        for idx, token in enumerate(lexicon):
-            phrase = token.pattern
-            try:
-                subpattern = sre_parse.SubPattern(s,
-                    [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
-            except sre_constants.error:
-                raise
-            p.append(subpattern)
-            self.actions.append(token)
-
-        s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work
-        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
-        self.scanner = sre_compile.compile(p)
+def make_scanner(lexicon, flags=FLAGS):
+    actions = [None]
+    # Combine phrases into a compound pattern
+    s = sre_parse.Pattern()
+    s.flags = flags
+    charpatterns = {}
+    p = []
+    idx = 0
+    for token in lexicon:
+        if token.pattern in (r'\[', r'{', r'"'):
+            charpatterns[token.pattern[-1]] = token
+        idx += 1
+        phrase = token.pattern
+        try:
+            subpattern = sre_parse.SubPattern(s,
+                [(SUBPATTERN, (idx, sre_parse.parse(phrase, flags)))])
+        except sre_constants.error:
+            raise
+        p.append(subpattern)
+        actions.append(token)
 
-    def iterscan(self, string, idx=0, context=None):
-        """
-        Yield match, end_idx for each match
-        """
-        match = self.scanner.scanner(string, idx).match
-        actions = self.actions
-        lastend = idx
-        end = len(string)
-        while True:
-            m = match()
-            if m is None:
-                break
-            matchbegin, matchend = m.span()
-            if lastend == matchend:
-                break
-            action = actions[m.lastindex]
-            if action is not None:
-                rval, next_pos = action(m, context)
-                if next_pos is not None and next_pos != matchend:
-                    # "fast forward" the scanner
-                    matchend = next_pos
-                    match = self.scanner.scanner(string, matchend).match
-                yield rval, matchend
-            lastend = matchend
+    s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work
+    p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
+    scanner = sre_compile.compile(p).scanner
 
+    def _scan_once(string, idx=0, context=None):
+        try:
+            action = charpatterns[string[idx]]
+        except KeyError:
+            pass
+        except IndexError:
+            raise StopIteration
+        else:
+            return action((string, idx + 1), context)
+        
+        m = scanner(string, idx).match()
+        if m is None or m.end() == idx:
+            raise StopIteration
+        return actions[m.lastindex](m, context)
+    
+    return _scan_once
 
 def pattern(pattern, flags=FLAGS):
     def decorator(fn):
diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py
--- a/simplejson/tests/test_decode.py
+++ b/simplejson/tests/test_decode.py
@@ -13,3 +13,10 @@
         rval = S.loads('1', parse_int=float)
         self.assert_(isinstance(rval, float))
         self.assertEquals(rval, 1.0)
+
+    def test_decoder_optimizations(self):
+        # Several optimizations were made that skip over calls to 
+        # the whitespace regex, so this test is designed to try and
+        # exercise the uncommon cases. The array cases are already covered.
+        rval = S.loads('{   "key"    :    "value"    ,  "k":"v"    }')
+        self.assertEquals(rval, {"key":"value", "k":"v"})