Skip to content
Snippets Groups Projects
Commit 1f749686d39e authored by Bob Ippolito's avatar Bob Ippolito
Browse files
git-svn-id: http://simplejson.googlecode.com/svn/trunk@206 a4795897-2c25-0410-b006-0d3caba88fa1
parent ed9859bb6169
Branches
No related tags found
No related merge requests found
Version 2.1.0 released XXXX-XX-XX
* Decoding performance and memory utilization enhancements
http://bugs.python.org/issue7451
* JSONEncoderForHTML class for escaping &, <, >
http://code.google.com/p/simplejson/issues/detail?id=66
* Memoization of object keys during encoding (when using speedups)
......
......@@ -54,6 +54,7 @@
PyObject *parse_float;
PyObject *parse_int;
PyObject *parse_constant;
PyObject *memo;
} PyScannerObject;
static PyMemberDef scanner_members[] = {
......@@ -441,6 +442,21 @@
return tpl;
}
#define APPEND_OLD_CHUNK \
if (chunk != NULL) { \
if (chunks == NULL) { \
chunks = PyList_New(0); \
if (chunks == NULL) { \
goto bail; \
} \
} \
if (PyList_Append(chunks, chunk)) { \
Py_DECREF(chunk); \
goto bail; \
} \
Py_CLEAR(chunk); \
}
static PyObject *
scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
{
......@@ -459,10 +475,8 @@
Py_ssize_t next = begin;
int has_unicode = 0;
char *buf = PyString_AS_STRING(pystr);
PyObject *chunks = PyList_New(0);
if (chunks == NULL) {
goto bail;
}
PyObject *chunks = NULL;
PyObject *chunk = NULL;
if (end < 0 || len <= end) {
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
goto bail;
......@@ -470,7 +484,6 @@
while (1) {
/* Find the end of the string or the next escape */
Py_UNICODE c = 0;
PyObject *chunk = NULL;
for (next = end; next < len; next++) {
c = (unsigned char)buf[next];
if (c == '"' || c == '\\') {
......@@ -490,6 +503,7 @@
}
/* Pick up this chunk if it's not zero length */
if (next != end) {
APPEND_OLD_CHUNK
PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
if (strchunk == NULL) {
goto bail;
......@@ -504,11 +518,6 @@
else {
chunk = strchunk;
}
if (PyList_Append(chunks, chunk)) {
Py_DECREF(chunk);
goto bail;
}
Py_DECREF(chunk);
}
next++;
if (c == '"') {
......@@ -613,6 +622,7 @@
if (c > 0x7f) {
has_unicode = 1;
}
APPEND_OLD_CHUNK
if (has_unicode) {
chunk = PyUnicode_FromUnicode(&c, 1);
if (chunk == NULL) {
......@@ -626,10 +636,5 @@
goto bail;
}
}
if (PyList_Append(chunks, chunk)) {
Py_DECREF(chunk);
goto bail;
}
Py_DECREF(chunk);
}
......@@ -634,7 +639,15 @@
}
if (chunks == NULL) {
if (chunk != NULL)
rval = chunk;
else
rval = PyString_FromStringAndSize("", 0);
}
else {
APPEND_OLD_CHUNK
rval = join_list_string(chunks);
if (rval == NULL) {
goto bail;
}
Py_CLEAR(chunks);
......@@ -636,9 +649,11 @@
rval = join_list_string(chunks);
if (rval == NULL) {
goto bail;
}
Py_CLEAR(chunks);
}
*next_end_ptr = end;
return rval;
bail:
*next_end_ptr = -1;
......@@ -641,7 +656,8 @@
*next_end_ptr = end;
return rval;
bail:
*next_end_ptr = -1;
Py_XDECREF(chunk);
Py_XDECREF(chunks);
return NULL;
}
......@@ -663,10 +679,9 @@
Py_ssize_t begin = end - 1;
Py_ssize_t next = begin;
const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
PyObject *chunks = PyList_New(0);
if (chunks == NULL) {
goto bail;
}
PyObject *chunks = NULL;
PyObject *chunk = NULL;
if (end < 0 || len <= end) {
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
goto bail;
......@@ -674,7 +689,6 @@
while (1) {
/* Find the end of the string or the next escape */
Py_UNICODE c = 0;
PyObject *chunk = NULL;
for (next = end; next < len; next++) {
c = buf[next];
if (c == '"' || c == '\\') {
......@@ -691,7 +705,8 @@
}
/* Pick up this chunk if it's not zero length */
if (next != end) {
APPEND_OLD_CHUNK
chunk = PyUnicode_FromUnicode(&buf[end], next - end);
if (chunk == NULL) {
goto bail;
}
......@@ -694,12 +709,7 @@
chunk = PyUnicode_FromUnicode(&buf[end], next - end);
if (chunk == NULL) {
goto bail;
}
if (PyList_Append(chunks, chunk)) {
Py_DECREF(chunk);
goto bail;
}
Py_DECREF(chunk);
}
next++;
if (c == '"') {
......@@ -801,7 +811,8 @@
}
#endif
}
APPEND_OLD_CHUNK
chunk = PyUnicode_FromUnicode(&c, 1);
if (chunk == NULL) {
goto bail;
}
......@@ -804,11 +815,6 @@
chunk = PyUnicode_FromUnicode(&c, 1);
if (chunk == NULL) {
goto bail;
}
if (PyList_Append(chunks, chunk)) {
Py_DECREF(chunk);
goto bail;
}
Py_DECREF(chunk);
}
......@@ -813,6 +819,14 @@
}
if (chunks == NULL) {
if (chunk != NULL)
rval = chunk;
else
rval = PyUnicode_FromStringAndSize("", 0);
}
else {
APPEND_OLD_CHUNK
rval = join_list_unicode(chunks);
if (rval == NULL) {
goto bail;
}
......@@ -815,9 +829,10 @@
rval = join_list_unicode(chunks);
if (rval == NULL) {
goto bail;
}
Py_DECREF(chunks);
Py_CLEAR(chunks);
}
*next_end_ptr = end;
return rval;
bail:
*next_end_ptr = -1;
......@@ -820,7 +835,8 @@
*next_end_ptr = end;
return rval;
bail:
*next_end_ptr = -1;
Py_XDECREF(chunk);
Py_XDECREF(chunks);
return NULL;
}
......@@ -914,6 +930,7 @@
Py_VISIT(s->parse_float);
Py_VISIT(s->parse_int);
Py_VISIT(s->parse_constant);
Py_VISIT(s->memo);
return 0;
}
......@@ -930,6 +947,7 @@
Py_CLEAR(s->parse_float);
Py_CLEAR(s->parse_int);
Py_CLEAR(s->parse_constant);
Py_CLEAR(s->memo);
return 0;
}
......@@ -945,10 +963,10 @@
*/
char *str = PyString_AS_STRING(pystr);
Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
PyObject *rval;
PyObject *pairs;
PyObject *rval = NULL;
PyObject *pairs = NULL;
PyObject *item;
PyObject *key = NULL;
PyObject *val = NULL;
char *encoding = PyString_AS_STRING(s->encoding);
int strict = PyObject_IsTrue(s->strict);
......@@ -950,6 +968,7 @@
PyObject *item;
PyObject *key = NULL;
PyObject *val = NULL;
char *encoding = PyString_AS_STRING(s->encoding);
int strict = PyObject_IsTrue(s->strict);
int has_pairs_hook = (s->pairs_hook != Py_None);
Py_ssize_t next_idx;
......@@ -955,4 +974,5 @@
Py_ssize_t next_idx;
if (has_pairs_hook) {
pairs = PyList_New(0);
if (pairs == NULL)
return NULL;
......@@ -956,6 +976,12 @@
pairs = PyList_New(0);
if (pairs == NULL)
return NULL;
}
else {
rval = PyDict_New();
if (rval == NULL)
return NULL;
}
/* skip whitespace after { */
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
......@@ -963,6 +989,8 @@
/* only loop if the object is non-empty */
if (idx <= end_idx && str[idx] != '}') {
while (idx <= end_idx) {
PyObject *memokey;
/* read key */
if (str[idx] != '"') {
raise_errmsg("Expecting property name", pystr, idx);
......@@ -971,6 +999,16 @@
key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
if (key == NULL)
goto bail;
memokey = PyDict_GetItem(s->memo, key);
if (memokey != NULL) {
Py_INCREF(memokey);
Py_DECREF(key);
key = memokey;
}
else {
if (PyDict_SetItem(s->memo, key, key) < 0)
goto bail;
}
idx = next_idx;
/* skip whitespace between key and : delimiter, read :, skip whitespace */
......@@ -987,6 +1025,7 @@
if (val == NULL)
goto bail;
if (has_pairs_hook) {
item = PyTuple_Pack(2, key, val);
if (item == NULL)
goto bail;
......@@ -997,6 +1036,13 @@
goto bail;
}
Py_DECREF(item);
}
else {
if (PyDict_SetItem(rval, key, val) < 0)
goto bail;
Py_CLEAR(key);
Py_CLEAR(val);
}
idx = next_idx;
/* skip whitespace before } or , */
......@@ -1033,12 +1079,6 @@
return val;
}
rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
pairs, NULL);
if (rval == NULL)
goto bail;
Py_CLEAR(pairs);
/* if object_hook is not None: rval = object_hook(rval) */
if (s->object_hook != Py_None) {
val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
......@@ -1051,6 +1091,7 @@
*next_idx_ptr = idx + 1;
return rval;
bail:
Py_XDECREF(rval);
Py_XDECREF(key);
Py_XDECREF(val);
Py_XDECREF(pairs);
......@@ -1068,9 +1109,9 @@
*/
Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
PyObject *rval;
PyObject *pairs;
PyObject *rval = NULL;
PyObject *pairs = NULL;
PyObject *item;
PyObject *key = NULL;
PyObject *val = NULL;
int strict = PyObject_IsTrue(s->strict);
......@@ -1073,6 +1114,7 @@
PyObject *item;
PyObject *key = NULL;
PyObject *val = NULL;
int strict = PyObject_IsTrue(s->strict);
int has_pairs_hook = (s->pairs_hook != Py_None);
Py_ssize_t next_idx;
......@@ -1077,5 +1119,6 @@
Py_ssize_t next_idx;
if (has_pairs_hook) {
pairs = PyList_New(0);
if (pairs == NULL)
return NULL;
......@@ -1079,6 +1122,12 @@
pairs = PyList_New(0);
if (pairs == NULL)
return NULL;
}
else {
rval = PyDict_New();
if (rval == NULL)
return NULL;
}
/* skip whitespace after { */
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
......@@ -1086,6 +1135,8 @@
/* only loop if the object is non-empty */
if (idx <= end_idx && str[idx] != '}') {
while (idx <= end_idx) {
PyObject *memokey;
/* read key */
if (str[idx] != '"') {
raise_errmsg("Expecting property name", pystr, idx);
......@@ -1094,6 +1145,16 @@
key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
if (key == NULL)
goto bail;
memokey = PyDict_GetItem(s->memo, key);
if (memokey != NULL) {
Py_INCREF(memokey);
Py_DECREF(key);
key = memokey;
}
else {
if (PyDict_SetItem(s->memo, key, key) < 0)
goto bail;
}
idx = next_idx;
/* skip whitespace between key and : delimiter, read :, skip whitespace */
......@@ -1110,6 +1171,7 @@
if (val == NULL)
goto bail;
if (has_pairs_hook) {
item = PyTuple_Pack(2, key, val);
if (item == NULL)
goto bail;
......@@ -1120,6 +1182,13 @@
goto bail;
}
Py_DECREF(item);
}
else {
if (PyDict_SetItem(rval, key, val) < 0)
goto bail;
Py_CLEAR(key);
Py_CLEAR(val);
}
idx = next_idx;
/* skip whitespace before } or , */
......@@ -1157,12 +1226,6 @@
return val;
}
rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
pairs, NULL);
if (rval == NULL)
goto bail;
Py_CLEAR(pairs);
/* if object_hook is not None: rval = object_hook(rval) */
if (s->object_hook != Py_None) {
val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
......@@ -1175,6 +1238,7 @@
*next_idx_ptr = idx + 1;
return rval;
bail:
Py_XDECREF(rval);
Py_XDECREF(key);
Py_XDECREF(val);
Py_XDECREF(pairs);
......@@ -1723,6 +1787,7 @@
Py_TYPE(pystr)->tp_name);
return NULL;
}
PyDict_Clear(s->memo);
return _build_rval_index_tuple(rval, next_idx);
}
......@@ -1757,6 +1822,12 @@
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
return -1;
if (s->memo == NULL) {
s->memo = PyDict_New();
if (s->memo == NULL)
goto bail;
}
/* PyString_AS_STRING is used on encoding */
s->encoding = PyObject_GetAttrString(ctx, "encoding");
if (s->encoding == NULL)
......
......@@ -175,7 +175,12 @@
WHITESPACE_STR = ' \t\n\r'
def JSONObject((s, end), encoding, strict, scan_once, object_hook,
object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
object_pairs_hook, memo=None,
_w=WHITESPACE.match, _ws=WHITESPACE_STR):
# Backwards compatibility
if memo is None:
memo = {}
memo_get = memo.setdefault
pairs = []
# Use a slice to prevent IndexError from being raised, the following
# check will raise a more specific ValueError if the string is empty
......@@ -199,6 +204,7 @@
end += 1
while True:
key, end = scanstring(s, end, encoding, strict)
key = memo_get(key, key)
# To skip some function call overhead we optimize the fast paths where
# the JSON key separator is ": " or just ":".
......@@ -382,6 +388,7 @@
self.parse_object = JSONObject
self.parse_array = JSONArray
self.parse_string = scanstring
self.memo = {}
self.scan_once = make_scanner(self)
def decode(self, s, _w=WHITESPACE.match):
......
......@@ -24,6 +24,7 @@
parse_constant = context.parse_constant
object_hook = context.object_hook
object_pairs_hook = context.object_pairs_hook
memo = context.memo
def _scan_once(string, idx):
try:
......@@ -35,7 +36,7 @@
return parse_string(string, idx + 1, encoding, strict)
elif nextchar == '{':
return parse_object((string, idx + 1), encoding, strict,
_scan_once, object_hook, object_pairs_hook)
_scan_once, object_hook, object_pairs_hook, memo)
elif nextchar == '[':
return parse_array((string, idx + 1), _scan_once)
elif nextchar == 'n' and string[idx:idx + 4] == 'null':
......@@ -62,6 +63,12 @@
else:
raise StopIteration
return _scan_once
def scan_once(string, idx):
try:
return _scan_once(string, idx)
finally:
memo.clear()
return scan_once
make_scanner = c_make_scanner or py_make_scanner
......@@ -6,6 +6,10 @@
from simplejson import OrderedDict
class TestDecode(TestCase):
if not hasattr(TestCase, 'assertIs'):
def assertIs(self, a, b):
self.assert_(a is b, '%r is %r' % (a, b))
def test_decimal(self):
rval = json.loads('1.1', parse_float=decimal.Decimal)
self.assert_(isinstance(rval, decimal.Decimal))
......@@ -47,3 +51,18 @@
object_pairs_hook=OrderedDict,
object_hook=lambda x: None),
OrderedDict(p))
def check_keys_reuse(self, source, loads):
rval = loads(source)
(a, b), (c, d) = sorted(rval[0]), sorted(rval[1])
self.assertIs(a, c)
self.assertIs(b, d)
def test_keys_reuse_str(self):
s = u'[{"a_key": 1, "b_\xe9": 2}, {"a_key": 3, "b_\xe9": 4}]'.encode('utf8')
self.check_keys_reuse(s, json.loads)
def test_keys_reuse_unicode(self):
s = u'[{"a_key": 1, "b_\xe9": 2}, {"a_key": 3, "b_\xe9": 4}]'
self.check_keys_reuse(s, json.loads)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment