Skip to content
Snippets Groups Projects
Commit 83db8324a4cb authored by ijl's avatar ijl
Browse files

Compare correctness

parent 3376f44bf758
Branches
No related tags found
No related merge requests found
...@@ -888,6 +888,25 @@ ...@@ -888,6 +888,25 @@
workers) and when workers) and when
multithreaded. It also uses some tests from the ultrajson library. multithreaded. It also uses some tests from the ultrajson library.
orjson is the most correct of the compared libraries. This graph shows how each
library fares handles a combined 342 JSON fixtures from the
[JSONTestSuite](https://github.com/nst/JSONTestSuite) and
[nativejson-benchmark](https://github.com/miloyip/nativejson-benchmark) tests:
| Library | Invalid JSON fixtures not rejected | Valid JSON fixtures not deserialized |
|------------|--------------------------------------|----------------------------------------|
| orjson | 0 | 0 |
| ujson | 38 | 0 |
| rapidjson | 6 | 0 |
| simplejson | 13 | 0 |
| json | 17 | 0 |
This shows that all libraries deserialize valid JSON but only orjson
correctly rejects the given invalid JSON fixtures. Errors are largely due to
accepting invalid strings and numbers.
The graph above can be reproduced using the `pycorrectness` script.
## Performance ## Performance
Serialization and deserialization performance of orjson is better than Serialization and deserialization performance of orjson is better than
......
...@@ -3,6 +3,6 @@ ...@@ -3,6 +3,6 @@
set -eou pipefail set -eou pipefail
autoflake --in-place --recursive --remove-all-unused-imports --ignore-init-module-imports . autoflake --in-place --recursive --remove-all-unused-imports --ignore-init-module-imports .
isort ./bench/*.py ./orjson.pyi ./test/*.py pydataclass pymem pysort pynumpy pynonstr isort ./bench/*.py ./orjson.pyi ./test/*.py pydataclass pymem pysort pynumpy pynonstr pycorrectness
black ./bench/*.py ./orjson.pyi ./test/*.py pydataclass pymem pysort pynumpy pynonstr black ./bench/*.py ./orjson.pyi ./test/*.py pydataclass pymem pysort pynumpy pynonstr pycorrectness
mypy --ignore-missing-imports ./bench/*.py ./orjson.pyi ./test/*.py mypy --ignore-missing-imports ./bench/*.py ./orjson.pyi ./test/*.py
#!/usr/bin/env python3
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import collections
import io
import lzma
import os
from pathlib import Path
from tabulate import tabulate
import orjson
dirname = os.path.join(os.path.dirname(__file__), "data")
LIBRARIES = ["orjson", "ujson", "rapidjson", "simplejson", "json"]
def read_fixture_bytes(filename, subdir=None):
if subdir is None:
parts = (dirname, filename)
else:
parts = (dirname, subdir, filename)
path = Path(*parts)
if path.suffix == ".xz":
contents = lzma.decompress(path.read_bytes())
else:
contents = path.read_bytes()
return contents
PARSING = {
filename: read_fixture_bytes(filename, "parsing")
for filename in os.listdir("data/parsing")
}
JSONCHECKER = {
filename: read_fixture_bytes(filename, "jsonchecker")
for filename in os.listdir("data/jsonchecker")
}
RESULTS = collections.defaultdict(dict)
def read_fixture(filename, subdir=None):
if not filename in BYTES_CACHE:
BYTES_CACHE[filename] = read_fixture_bytes(filename, subdir)
return BYTES_CACHE[filename]
def test_passed(library, fixture):
passed = []
try:
passed.append(library.loads(fixture) == orjson.loads(fixture))
passed.append(
library.loads(fixture.decode("utf-8"))
== orjson.loads(fixture.decode("utf-8"))
)
except Exception:
passed.append(False)
return all(passed)
def test_failed(library, fixture):
rejected_as_bytes = False
try:
library.loads(fixture)
except Exception:
rejected_as_bytes = True
rejected_as_str = False
try:
library.loads(fixture.decode("utf-8"))
except Exception:
rejected_as_str = True
return rejected_as_bytes and rejected_as_str
MISTAKEN_PASSES = {key: 0 for key in LIBRARIES}
MISTAKEN_FAILS = {key: 0 for key in LIBRARIES}
PASS_WHITELIST = ("fail01.json", "fail18.json")
def should_pass(filename):
return (
filename.startswith("y_")
or filename.startswith("pass")
or filename in PASS_WHITELIST
)
def should_fail(filename):
return (
filename.startswith("n_")
or filename.startswith("i_string")
or filename.startswith("i_object")
or filename.startswith("fail")
) and not filename in PASS_WHITELIST
for libname in LIBRARIES:
library = __import__(libname)
for fixture_set in (PARSING, JSONCHECKER):
for filename, fixture in fixture_set.items():
if should_pass(filename):
res = test_passed(library, fixture)
RESULTS[filename][libname] = res
if not res:
MISTAKEN_PASSES[libname] += 1
elif should_fail(filename):
res = test_failed(library, fixture)
RESULTS[filename][libname] = res
if not res:
MISTAKEN_FAILS[libname] += 1
elif filename.startswith("i_"):
continue
else:
raise NotImplementedError
FILENAMES = sorted(list(PARSING.keys()) + list(JSONCHECKER.keys()))
tab_results = []
for filename in FILENAMES:
entry = [
filename,
]
for libname in LIBRARIES:
try:
entry.append("ok" if RESULTS[filename][libname] else "fail")
except KeyError:
continue
tab_results.append(entry)
buf = io.StringIO()
buf.write(tabulate(tab_results, ["Fixture"] + LIBRARIES, tablefmt="github"))
buf.write("\n")
print(buf.getvalue())
failure_results = [
[libname, MISTAKEN_FAILS[libname], MISTAKEN_PASSES[libname]]
for libname in LIBRARIES
]
buf = io.StringIO()
buf.write(
tabulate(
failure_results,
[
"Library",
"Invalid JSON fixtures not rejected",
"Valid JSON fixtures not deserialized",
],
tablefmt="github",
)
)
buf.write("\n")
print(buf.getvalue())
num_results = len([each for each in tab_results if len(each) > 1])
print(f"{num_results} fixtures tested")
...@@ -22,8 +22,6 @@ ...@@ -22,8 +22,6 @@
@dataclass @dataclass
class EmptyDataclassSlots: class EmptyDataclassSlots:
__slots__ = () __slots__ = ()
pass
@dataclass @dataclass
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment