diff --git a/CHANGES.txt b/CHANGES.txt index a2215e13a50cb17860f03b8907bd79011752da4e_Q0hBTkdFUy50eHQ=..f22c02e562846ff23d9b4654ea90d94775b6c905_Q0hBTkdFUy50eHQ= 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,9 @@ +Version 3.0.1 released 2013-01-01 + +* Add accumulator optimization to encoder, equivalent to the usage of + `_Py_Accu` in the Python 3.3 json library. Only relevant if encoding + very large JSON documents. + Version 3.0.0 released 2012-12-30 * Python 3.3 is now supported, thanks to Vinay Sajip #8 diff --git a/conf.py b/conf.py index a2215e13a50cb17860f03b8907bd79011752da4e_Y29uZi5weQ==..f22c02e562846ff23d9b4654ea90d94775b6c905_Y29uZi5weQ== 100644 --- a/conf.py +++ b/conf.py @@ -44,7 +44,7 @@ # The short X.Y version. version = '3.0' # The full version, including alpha/beta/rc tags. -release = '3.0.0' +release = '3.0.1' # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: diff --git a/setup.py b/setup.py index a2215e13a50cb17860f03b8907bd79011752da4e_c2V0dXAucHk=..f22c02e562846ff23d9b4654ea90d94775b6c905_c2V0dXAucHk= 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ DistutilsPlatformError IS_PYPY = hasattr(sys, 'pypy_translation_info') -VERSION = '3.0.0' +VERSION = '3.0.1' DESCRIPTION = "Simple, fast, extensible JSON encoder/decoder for Python" LONG_DESCRIPTION = open('README.rst', 'r').read() diff --git a/simplejson/__init__.py b/simplejson/__init__.py index a2215e13a50cb17860f03b8907bd79011752da4e_c2ltcGxlanNvbi9fX2luaXRfXy5weQ==..f22c02e562846ff23d9b4654ea90d94775b6c905_c2ltcGxlanNvbi9fX2luaXRfXy5weQ== 100644 --- a/simplejson/__init__.py +++ b/simplejson/__init__.py @@ -99,7 +99,7 @@ Expecting property name: line 1 column 2 (char 2) """ from __future__ import absolute_import -__version__ = '3.0.0' +__version__ = '3.0.1' __all__ = [ 'dump', 'dumps', 'load', 'loads', 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c index a2215e13a50cb17860f03b8907bd79011752da4e_c2ltcGxlanNvbi9fc3BlZWR1cHMuYw==..f22c02e562846ff23d9b4654ea90d94775b6c905_c2ltcGxlanNvbi9fc3BlZWR1cHMuYw== 100644 --- a/simplejson/_speedups.c +++ b/simplejson/_speedups.c @@ -95,6 +95,24 @@ static PyTypeObject PyScannerType; static PyTypeObject PyEncoderType; +#undef small /* defined by some Windows headers */ + +typedef PyObject *(*joinerfunc)(PyObject *); +typedef struct { + PyObject *large; /* A list of previously accumulated large strings */ + PyObject *small; /* Pending small strings */ + joinerfunc joiner; +} JSON_Accu; + +static int +JSON_Accu_Init(JSON_Accu *acc); +static int +JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode); +static PyObject * +JSON_Accu_FinishAsList(JSON_Accu *acc); +static void +JSON_Accu_Destroy(JSON_Accu *acc); + typedef struct _PyScannerObject { PyObject_HEAD PyObject *encoding; @@ -157,6 +175,8 @@ }; static PyObject * +join_list_unicode(PyObject *lst); +static PyObject * JSON_ParseEncoding(PyObject *encoding); static PyObject * JSON_UnicodeFromChar(JSON_UNICHR c); @@ -174,6 +194,8 @@ py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); #if PY_MAJOR_VERSION < 3 static PyObject * +join_list_string(PyObject *lst); +static PyObject * scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); static PyObject * scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr); @@ -205,5 +227,5 @@ static PyObject * encoder_stringify_key(PyEncoderObject *s, PyObject *key); static int -encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); +encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level); static int @@ -209,3 +231,3 @@ static int -encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); +encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level); static int @@ -211,5 +233,5 @@ static int -encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); +encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level); static PyObject * _encoded_const(PyObject *obj); static void @@ -233,6 +255,97 @@ #define MIN_EXPANSION 6 static int +JSON_Accu_Init(JSON_Accu *acc) +{ + /* Lazily allocated */ + acc->large = NULL; + acc->small = PyList_New(0); + if (acc->small == NULL) + return -1; +#if PY_MAJOR_VERSION >= 3 + acc->joiner = join_list_unicode; +#else /* PY_MAJOR_VERSION >= 3 */ + acc->joiner = join_list_string; +#endif /* PY_MAJOR_VERSION < 3 */ + return 0; +} + +static int +flush_accumulator(JSON_Accu *acc) +{ + Py_ssize_t nsmall = PyList_GET_SIZE(acc->small); + if (nsmall) { + int ret; + PyObject *joined; + if (acc->large == NULL) { + acc->large = PyList_New(0); + if (acc->large == NULL) + return -1; + } + joined = acc->joiner(acc->small); + if (joined == NULL) + return -1; + if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) { + Py_DECREF(joined); + return -1; + } + ret = PyList_Append(acc->large, joined); + Py_DECREF(joined); + return ret; + } + return 0; +} + +static int +JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode) +{ + Py_ssize_t nsmall; +#if PY_MAJOR_VERSION >= 3 + assert(PyUnicode_Check(unicode)); +#else /* PY_MAJOR_VERSION >= 3 */ + assert(JSON_ASCII_Check(unicode) || PyUnicode_Check(unicode)); +#endif /* PY_MAJOR_VERSION < 3 */ + + if (PyList_Append(acc->small, unicode)) + return -1; + nsmall = PyList_GET_SIZE(acc->small); + /* Each item in a list of unicode objects has an overhead (in 64-bit + * builds) of: + * - 8 bytes for the list slot + * - 56 bytes for the header of the unicode object + * that is, 64 bytes. 100000 such objects waste more than 6MB + * compared to a single concatenated string. + */ + if (nsmall < 100000) + return 0; + return flush_accumulator(acc); +} + +static PyObject * +JSON_Accu_FinishAsList(JSON_Accu *acc) +{ + int ret; + PyObject *res; + + ret = flush_accumulator(acc); + Py_CLEAR(acc->small); + if (ret) { + Py_CLEAR(acc->large); + return NULL; + } + res = acc->large; + acc->large = NULL; + return res; +} + +static void +JSON_Accu_Destroy(JSON_Accu *acc) +{ + Py_CLEAR(acc->small); + Py_CLEAR(acc->large); +} + +static int IS_DIGIT(JSON_UNICHR c) { return c >= '0' && c <= '9'; @@ -1983,8 +2096,6 @@ PyErr_SetNone(PyExc_StopIteration); return NULL; } - if (Py_EnterRecursiveCall(" while decoding a JSON document")) - return NULL; switch (str[idx]) { case '"': /* string */ @@ -1995,4 +2106,7 @@ break; case '{': /* object */ + if (Py_EnterRecursiveCall(" while decoding a JSON object " + "from a string")) + return NULL; rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr); @@ -1998,4 +2112,5 @@ rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); break; case '[': /* array */ @@ -1999,4 +2114,7 @@ break; case '[': /* array */ + if (Py_EnterRecursiveCall(" while decoding a JSON array " + "from a string")) + return NULL; rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr); @@ -2002,4 +2120,5 @@ rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); break; case 'n': /* null */ @@ -2061,7 +2180,6 @@ /* Didn't find a string, object, array, or named constant. Look for a number. */ if (fallthrough) rval = _match_number_str(s, pystr, idx, next_idx_ptr); - Py_LeaveRecursiveCall(); return rval; } #endif /* PY_MAJOR_VERSION < 3 */ @@ -2086,8 +2204,6 @@ PyErr_SetNone(PyExc_StopIteration); return NULL; } - if (Py_EnterRecursiveCall(" while decoding a JSON document")) - return NULL; switch (PyUnicode_READ(kind, str, idx)) { case '"': /* string */ @@ -2097,4 +2213,6 @@ break; case '{': /* object */ + if (Py_EnterRecursiveCall(" while decoding a JSON object " + "from a unicode string")) rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); @@ -2100,4 +2218,5 @@ rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); break; case '[': /* array */ @@ -2101,4 +2220,6 @@ break; case '[': /* array */ + if (Py_EnterRecursiveCall(" while decoding a JSON array " + "from a unicode string")) rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); @@ -2104,4 +2225,5 @@ rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); break; case 'n': /* null */ @@ -2481,6 +2603,5 @@ /* Python callable interface to encode_listencode_obj */ static char *kwlist[] = {"obj", "_current_indent_level", NULL}; PyObject *obj; - PyObject *rval; Py_ssize_t indent_level; PyEncoderObject *s; @@ -2485,7 +2606,8 @@ Py_ssize_t indent_level; PyEncoderObject *s; + JSON_Accu rval; assert(PyEncoder_Check(self)); s = (PyEncoderObject *)self; if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, &obj, _convertPyInt_AsSsize_t, &indent_level)) return NULL; @@ -2487,8 +2609,7 @@ assert(PyEncoder_Check(self)); s = (PyEncoderObject *)self; if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, &obj, _convertPyInt_AsSsize_t, &indent_level)) return NULL; - rval = PyList_New(0); - if (rval == NULL) + if (JSON_Accu_Init(&rval)) return NULL; @@ -2494,5 +2615,5 @@ return NULL; - if (encoder_listencode_obj(s, rval, obj, indent_level)) { - Py_DECREF(rval); + if (encoder_listencode_obj(s, &rval, obj, indent_level)) { + JSON_Accu_Destroy(&rval); return NULL; } @@ -2497,6 +2618,6 @@ return NULL; } - return rval; + return JSON_Accu_FinishAsList(&rval); } static PyObject * @@ -2583,6 +2704,6 @@ } static int -_steal_list_append(PyObject *lst, PyObject *stolen) +_steal_accumulate(JSON_Accu *lst, PyObject *stolen) { /* Append stolen and then decrement its reference count */ @@ -2587,8 +2708,8 @@ { /* Append stolen and then decrement its reference count */ - int rval = PyList_Append(lst, stolen); + int rval = JSON_Accu_Accumulate(lst, stolen); Py_DECREF(stolen); return rval; } static int @@ -2590,9 +2711,9 @@ Py_DECREF(stolen); return rval; } static int -encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) +encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level) { /* Encode Python object obj to a JSON term, rval is a PyList */ int rv = -1; @@ -2602,9 +2723,9 @@ if (obj == Py_None || obj == Py_True || obj == Py_False) { PyObject *cstr = _encoded_const(obj); if (cstr != NULL) - rv = _steal_list_append(rval, cstr); + rv = _steal_accumulate(rval, cstr); } else if (PyString_Check(obj) || PyUnicode_Check(obj)) { PyObject *encoded = encoder_encode_string(s, obj); if (encoded != NULL) @@ -2606,9 +2727,9 @@ } else if (PyString_Check(obj) || PyUnicode_Check(obj)) { PyObject *encoded = encoder_encode_string(s, obj); if (encoded != NULL) - rv = _steal_list_append(rval, encoded); + rv = _steal_accumulate(rval, encoded); } else if (PyInt_Check(obj) || PyLong_Check(obj)) { PyObject *encoded = PyObject_Str(obj); @@ -2618,9 +2739,9 @@ if (encoded == NULL) break; } - rv = _steal_list_append(rval, encoded); + rv = _steal_accumulate(rval, encoded); } } else if (PyFloat_Check(obj)) { PyObject *encoded = encoder_encode_float(s, obj); if (encoded != NULL) @@ -2622,9 +2743,9 @@ } } else if (PyFloat_Check(obj)) { PyObject *encoded = encoder_encode_float(s, obj); if (encoded != NULL) - rv = _steal_list_append(rval, encoded); + rv = _steal_accumulate(rval, encoded); } else if (s->namedtuple_as_object && _is_namedtuple(obj)) { PyObject *newobj = PyObject_CallMethod(obj, "_asdict", NULL); @@ -2642,7 +2763,7 @@ else if (s->use_decimal && PyObject_TypeCheck(obj, (PyTypeObject *)s->Decimal)) { PyObject *encoded = PyObject_Str(obj); if (encoded != NULL) - rv = _steal_list_append(rval, encoded); + rv = _steal_accumulate(rval, encoded); } else { PyObject *ident = NULL; @@ -2689,5 +2810,5 @@ } static int -encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) +encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level) { @@ -2693,5 +2814,5 @@ { - /* Encode Python dict dct a JSON term, rval is a PyList */ + /* Encode Python dict dct a JSON term */ static PyObject *open_dict = NULL; static PyObject *close_dict = NULL; static PyObject *empty_dict = NULL; @@ -2711,7 +2832,7 @@ return -1; } if (PyDict_Size(dct) == 0) - return PyList_Append(rval, empty_dict); + return JSON_Accu_Accumulate(rval, empty_dict); if (s->markers != Py_None) { int has_key; @@ -2729,7 +2850,7 @@ } } - if (PyList_Append(rval, open_dict)) + if (JSON_Accu_Accumulate(rval, open_dict)) goto bail; if (s->indent != Py_None) { @@ -2775,7 +2896,7 @@ } } if (idx) { - if (PyList_Append(rval, s->item_separator)) + if (JSON_Accu_Accumulate(rval, s->item_separator)) goto bail; } if (encoded == NULL) { @@ -2786,7 +2907,7 @@ if (PyDict_SetItem(s->key_memo, key, encoded)) goto bail; } - if (PyList_Append(rval, encoded)) { + if (JSON_Accu_Accumulate(rval, encoded)) { goto bail; } Py_CLEAR(encoded); @@ -2790,7 +2911,7 @@ goto bail; } Py_CLEAR(encoded); - if (PyList_Append(rval, s->key_separator)) + if (JSON_Accu_Accumulate(rval, s->key_separator)) goto bail; if (encoder_listencode_obj(s, rval, value, indent_level)) goto bail; @@ -2812,7 +2933,7 @@ yield '\n' + (_indent * _current_indent_level) */ } - if (PyList_Append(rval, close_dict)) + if (JSON_Accu_Accumulate(rval, close_dict)) goto bail; return 0; @@ -2827,5 +2948,5 @@ static int -encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) +encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level) { @@ -2831,5 +2952,5 @@ { - /* Encode Python list seq to a JSON term, rval is a PyList */ + /* Encode Python list seq to a JSON term */ static PyObject *open_array = NULL; static PyObject *close_array = NULL; static PyObject *empty_array = NULL; @@ -2851,7 +2972,7 @@ if (is_true == -1) return -1; else if (is_true == 0) - return PyList_Append(rval, empty_array); + return JSON_Accu_Accumulate(rval, empty_array); if (s->markers != Py_None) { int has_key; @@ -2873,7 +2994,7 @@ if (iter == NULL) goto bail; - if (PyList_Append(rval, open_array)) + if (JSON_Accu_Accumulate(rval, open_array)) goto bail; if (s->indent != Py_None) { /* TODO: DOES NOT RUN */ @@ -2886,7 +3007,7 @@ } while ((obj = PyIter_Next(iter))) { if (i) { - if (PyList_Append(rval, s->item_separator)) + if (JSON_Accu_Accumulate(rval, s->item_separator)) goto bail; } if (encoder_listencode_obj(s, rval, obj, indent_level)) @@ -2909,7 +3030,7 @@ yield '\n' + (_indent * _current_indent_level) */ } - if (PyList_Append(rval, close_array)) + if (JSON_Accu_Accumulate(rval, close_array)) goto bail; return 0; diff --git a/simplejson/tests/test_dump.py b/simplejson/tests/test_dump.py index a2215e13a50cb17860f03b8907bd79011752da4e_c2ltcGxlanNvbi90ZXN0cy90ZXN0X2R1bXAucHk=..f22c02e562846ff23d9b4654ea90d94775b6c905_c2ltcGxlanNvbi90ZXN0cy90ZXN0X2R1bXAucHk= 100644 --- a/simplejson/tests/test_dump.py +++ b/simplejson/tests/test_dump.py @@ -114,3 +114,8 @@ s = json.dumps([0, 1, 2], indent=AwesomeInt(3)) self.assertEqual(s, '[\n 0,\n 1,\n 2\n]') + + def test_accumulator(self): + # the C API uses an accumulator that collects after 100,000 appends + lst = [0] * 100000 + self.assertEqual(json.loads(json.dumps(lst)), lst)