diff --git a/CHANGES.txt b/CHANGES.txt index 0b84b0936d8f57a13ffa43e97924859b207ec148_Q0hBTkdFUy50eHQ=..aa540f06d36087017eafcfb33ea89ff9d294ec08_Q0hBTkdFUy50eHQ= 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,5 +1,7 @@ Version 2.1.6 released 2011-XX-XX +* Prevent segfaults with deeply nested JSON documents + https://github.com/simplejson/simplejson/issues/11 * Fix compatibility with Python 2.5 https://github.com/simplejson/simplejson/issues/5 diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c index 0b84b0936d8f57a13ffa43e97924859b207ec148_c2ltcGxlanNvbi9fc3BlZWR1cHMuYw==..aa540f06d36087017eafcfb33ea89ff9d294ec08_c2ltcGxlanNvbi9fc3BlZWR1cHMuYw== 100644 --- a/simplejson/_speedups.c +++ b/simplejson/_speedups.c @@ -1628,7 +1628,9 @@ */ char *str = PyString_AS_STRING(pystr); Py_ssize_t length = PyString_GET_SIZE(pystr); + PyObject *rval = NULL; + int fallthrough = 0; if (idx >= length) { PyErr_SetNone(PyExc_StopIteration); return NULL; } @@ -1631,7 +1633,9 @@ if (idx >= length) { PyErr_SetNone(PyExc_StopIteration); return NULL; } + if (Py_EnterRecursiveCall(" while decoding a JSON document")) + return NULL; switch (str[idx]) { case '"': /* string */ @@ -1635,7 +1639,7 @@ switch (str[idx]) { case '"': /* string */ - return scanstring_str(pystr, idx + 1, + rval = scanstring_str(pystr, idx + 1, PyString_AS_STRING(s->encoding), PyObject_IsTrue(s->strict), next_idx_ptr); @@ -1639,5 +1643,6 @@ PyString_AS_STRING(s->encoding), PyObject_IsTrue(s->strict), next_idx_ptr); + break; case '{': /* object */ @@ -1642,5 +1647,6 @@ case '{': /* object */ - return _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + break; case '[': /* array */ @@ -1645,8 +1651,9 @@ case '[': /* array */ - return _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + break; case 'n': /* null */ if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { Py_INCREF(Py_None); *next_idx_ptr = idx + 4; @@ -1648,7 +1655,7 @@ case 'n': /* null */ if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { Py_INCREF(Py_None); *next_idx_ptr = idx + 4; - return Py_None; + rval = Py_None; } @@ -1654,7 +1661,9 @@ } + else + fallthrough = 1; break; case 't': /* true */ if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { Py_INCREF(Py_True); *next_idx_ptr = idx + 4; @@ -1655,8 +1664,8 @@ break; case 't': /* true */ if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { Py_INCREF(Py_True); *next_idx_ptr = idx + 4; - return Py_True; + rval = Py_True; } @@ -1662,7 +1671,9 @@ } + else + fallthrough = 1; break; case 'f': /* false */ if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { Py_INCREF(Py_False); *next_idx_ptr = idx + 5; @@ -1663,8 +1674,8 @@ break; case 'f': /* false */ if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { Py_INCREF(Py_False); *next_idx_ptr = idx + 5; - return Py_False; + rval = Py_False; } @@ -1670,5 +1681,7 @@ } + else + fallthrough = 1; break; case 'N': /* NaN */ if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { @@ -1671,6 +1684,6 @@ break; case 'N': /* NaN */ if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { - return _parse_constant(s, "NaN", idx, next_idx_ptr); + rval = _parse_constant(s, "NaN", idx, next_idx_ptr); } @@ -1676,5 +1689,7 @@ } + else + fallthrough = 1; break; case 'I': /* Infinity */ if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { @@ -1677,6 +1692,6 @@ break; case 'I': /* Infinity */ if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { - return _parse_constant(s, "Infinity", idx, next_idx_ptr); + rval = _parse_constant(s, "Infinity", idx, next_idx_ptr); } @@ -1682,5 +1697,7 @@ } + else + fallthrough = 1; break; case '-': /* -Infinity */ if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { @@ -1683,6 +1700,6 @@ break; case '-': /* -Infinity */ if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { - return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr); } @@ -1688,2 +1705,4 @@ } + else + fallthrough = 1; break; @@ -1689,3 +1708,5 @@ break; + default: + fallthrough = 1; } /* Didn't find a string, object, array, or named constant. Look for a number. */ @@ -1690,6 +1711,9 @@ } /* Didn't find a string, object, array, or named constant. Look for a number. */ - return _match_number_str(s, pystr, idx, next_idx_ptr); + if (fallthrough) + rval = _match_number_str(s, pystr, idx, next_idx_ptr); + Py_LeaveRecursiveCall(); + return rval; } static PyObject * @@ -1704,7 +1728,9 @@ */ Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); Py_ssize_t length = PyUnicode_GET_SIZE(pystr); + PyObject *rval = NULL; + int fallthrough = 0; if (idx >= length) { PyErr_SetNone(PyExc_StopIteration); return NULL; } @@ -1707,7 +1733,9 @@ if (idx >= length) { PyErr_SetNone(PyExc_StopIteration); return NULL; } + if (Py_EnterRecursiveCall(" while decoding a JSON document")) + return NULL; switch (str[idx]) { case '"': /* string */ @@ -1711,6 +1739,6 @@ switch (str[idx]) { case '"': /* string */ - return scanstring_unicode(pystr, idx + 1, + rval = scanstring_unicode(pystr, idx + 1, PyObject_IsTrue(s->strict), next_idx_ptr); @@ -1715,4 +1743,5 @@ PyObject_IsTrue(s->strict), next_idx_ptr); + break; case '{': /* object */ @@ -1717,5 +1746,6 @@ case '{': /* object */ - return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + break; case '[': /* array */ @@ -1720,8 +1750,9 @@ case '[': /* array */ - return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + break; case 'n': /* null */ if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { Py_INCREF(Py_None); *next_idx_ptr = idx + 4; @@ -1723,7 +1754,7 @@ case 'n': /* null */ if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { Py_INCREF(Py_None); *next_idx_ptr = idx + 4; - return Py_None; + rval = Py_None; } @@ -1729,7 +1760,9 @@ } + else + fallthrough = 1; break; case 't': /* true */ if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { Py_INCREF(Py_True); *next_idx_ptr = idx + 4; @@ -1730,8 +1763,8 @@ break; case 't': /* true */ if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { Py_INCREF(Py_True); *next_idx_ptr = idx + 4; - return Py_True; + rval = Py_True; } @@ -1737,7 +1770,9 @@ } + else + fallthrough = 1; break; case 'f': /* false */ if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { Py_INCREF(Py_False); *next_idx_ptr = idx + 5; @@ -1738,8 +1773,8 @@ break; case 'f': /* false */ if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { Py_INCREF(Py_False); *next_idx_ptr = idx + 5; - return Py_False; + rval = Py_False; } @@ -1745,5 +1780,7 @@ } + else + fallthrough = 1; break; case 'N': /* NaN */ if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { @@ -1746,6 +1783,6 @@ break; case 'N': /* NaN */ if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { - return _parse_constant(s, "NaN", idx, next_idx_ptr); + rval = _parse_constant(s, "NaN", idx, next_idx_ptr); } @@ -1751,5 +1788,7 @@ } + else + fallthrough = 1; break; case 'I': /* Infinity */ if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { @@ -1752,6 +1791,6 @@ break; case 'I': /* Infinity */ if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { - return _parse_constant(s, "Infinity", idx, next_idx_ptr); + rval = _parse_constant(s, "Infinity", idx, next_idx_ptr); } @@ -1757,5 +1796,7 @@ } + else + fallthrough = 1; break; case '-': /* -Infinity */ if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { @@ -1758,6 +1799,6 @@ break; case '-': /* -Infinity */ if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { - return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr); } @@ -1763,2 +1804,4 @@ } + else + fallthrough = 1; break; @@ -1764,3 +1807,5 @@ break; + default: + fallthrough = 1; } /* Didn't find a string, object, array, or named constant. Look for a number. */ @@ -1765,6 +1810,9 @@ } /* Didn't find a string, object, array, or named constant. Look for a number. */ - return _match_number_unicode(s, pystr, idx, next_idx_ptr); + if (fallthrough) + rval = _match_number_unicode(s, pystr, idx, next_idx_ptr); + Py_LeaveRecursiveCall(); + return rval; } static PyObject * @@ -2097,57 +2145,59 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) { /* Encode Python object obj to a JSON term, rval is a PyList */ - PyObject *newobj; - int rv; - - if (obj == Py_None || obj == Py_True || obj == Py_False) { - PyObject *cstr = _encoded_const(obj); - if (cstr == NULL) - return -1; - return _steal_list_append(rval, cstr); - } - else if (PyString_Check(obj) || PyUnicode_Check(obj)) - { - PyObject *encoded = encoder_encode_string(s, obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyInt_Check(obj) || PyLong_Check(obj)) { - PyObject *encoded = PyObject_Str(obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyFloat_Check(obj)) { - PyObject *encoded = encoder_encode_float(s, obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyList_Check(obj) || PyTuple_Check(obj)) { - return encoder_listencode_list(s, rval, obj, indent_level); - } - else if (PyDict_Check(obj)) { - return encoder_listencode_dict(s, rval, obj, indent_level); - } - else if (s->use_decimal && Decimal_Check(obj)) { - PyObject *encoded = PyObject_Str(obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else { - PyObject *ident = NULL; - if (s->markers != Py_None) { - int has_key; - ident = PyLong_FromVoidPtr(obj); - if (ident == NULL) - return -1; - has_key = PyDict_Contains(s->markers, ident); - if (has_key) { - if (has_key != -1) - PyErr_SetString(PyExc_ValueError, "Circular reference detected"); - Py_DECREF(ident); - return -1; + int rv = -1; + if (Py_EnterRecursiveCall(" while encoding a JSON document")) + return rv; + do { + if (obj == Py_None || obj == Py_True || obj == Py_False) { + PyObject *cstr = _encoded_const(obj); + if (cstr != NULL) + rv = _steal_list_append(rval, cstr); + } + else if (PyString_Check(obj) || PyUnicode_Check(obj)) + { + PyObject *encoded = encoder_encode_string(s, obj); + if (encoded != NULL) + rv = _steal_list_append(rval, encoded); + } + else if (PyInt_Check(obj) || PyLong_Check(obj)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded != NULL) + rv = _steal_list_append(rval, encoded); + } + else if (PyFloat_Check(obj)) { + PyObject *encoded = encoder_encode_float(s, obj); + if (encoded != NULL) + rv = _steal_list_append(rval, encoded); + } + else if (PyList_Check(obj) || PyTuple_Check(obj)) { + rv = encoder_listencode_list(s, rval, obj, indent_level); + } + else if (PyDict_Check(obj)) { + rv = encoder_listencode_dict(s, rval, obj, indent_level); + } + else if (s->use_decimal && Decimal_Check(obj)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded != NULL) + rv = _steal_list_append(rval, encoded); + } + else { + PyObject *ident = NULL; + PyObject *newobj; + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(obj); + if (ident == NULL) + break; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + Py_DECREF(ident); + break; + } + if (PyDict_SetItem(s->markers, ident, obj)) { + Py_DECREF(ident); + break; + } } @@ -2153,6 +2203,20 @@ } - if (PyDict_SetItem(s->markers, ident, obj)) { - Py_DECREF(ident); - return -1; + newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); + if (newobj == NULL) { + Py_XDECREF(ident); + break; + } + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_DECREF(newobj); + if (rv) { + Py_XDECREF(ident); + rv = -1; + } + else if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) { + Py_XDECREF(ident); + rv = -1; + } + Py_XDECREF(ident); } } @@ -2157,25 +2221,8 @@ } } - newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); - if (newobj == NULL) { - Py_XDECREF(ident); - return -1; - } - rv = encoder_listencode_obj(s, rval, newobj, indent_level); - Py_DECREF(newobj); - if (rv) { - Py_XDECREF(ident); - return -1; - } - if (ident != NULL) { - if (PyDict_DelItem(s->markers, ident)) { - Py_XDECREF(ident); - return -1; - } - Py_XDECREF(ident); - } - return rv; - } + } while (0); + Py_LeaveRecursiveCall(); + return rv; } static int