#!/usr/bin/env python3 # SPDX-License-Identifier: (Apache-2.0 OR MIT) import io import json import lzma import sys import os from pathlib import Path from timeit import timeit import orjson import simplejson import ujson import rapidjson from tabulate import tabulate os.sched_setaffinity(os.getpid(), {0, 1}) dirname = os.path.join(os.path.dirname(__file__), "data") def read_fixture_obj(filename): path = Path(dirname, filename) if path.suffix == ".xz": contents = lzma.decompress(path.read_bytes()) else: contents = path.read_bytes() return orjson.loads(contents) filename = sys.argv[1] if len(sys.argv) >= 1 else "" data = read_fixture_obj(f"{filename}.json.xz") headers = ("Library", "compact (ms)", "pretty (ms)", "vs. orjson") LIBRARIES = ("orjson", "ujson", "rapidjson", "simplejson", "json") output_in_kib_compact = len(orjson.dumps(data)) / 1024 output_in_kib_pretty = len(orjson.dumps(data, option=orjson.OPT_INDENT_2)) / 1024 # minimum 2s runtime for orjson compact ITERATIONS = int(2 / (timeit(lambda: orjson.dumps(data), number=20) / 20)) print(f"{output_in_kib_compact:,.0f}KiB compact, {output_in_kib_pretty:,.0f}KiB pretty, {ITERATIONS} iterations") def per_iter_latency(val): if val is None: return None return (val * 1000) / ITERATIONS def test_correctness(serialized): return orjson.loads(serialized) == data table = [] for lib_name in LIBRARIES: print(f"{lib_name}...") if lib_name == "json": time_compact = timeit( lambda: json.dumps(data).encode("utf-8"), number=ITERATIONS, ) time_pretty = timeit( lambda: json.dumps(data, indent=2).encode("utf-8"), number=ITERATIONS, ) correct = test_correctness( json.dumps(data, indent=2).encode("utf-8") ) elif lib_name == "simplejson": time_compact = timeit( lambda: simplejson.dumps(data).encode("utf-8"), number=ITERATIONS, ) time_pretty = timeit( lambda: simplejson.dumps(data, indent=2).encode("utf-8"), number=ITERATIONS, ) correct = test_correctness( simplejson.dumps(data, indent=2).encode("utf-8") ) elif lib_name == "ujson": time_compact = timeit( lambda: ujson.dumps(data).encode("utf-8"), number=ITERATIONS, ) time_pretty = timeit( lambda: ujson.dumps(data, indent=2).encode("utf-8"), number=ITERATIONS, ) correct = test_correctness( ujson.dumps(data, indent=2).encode("utf-8") ) elif lib_name == "rapidjson": time_compact = timeit(lambda: rapidjson.dumps(data), number=ITERATIONS) time_pretty = None correct = False elif lib_name == "orjson": time_compact = timeit(lambda: orjson.dumps(data), number=ITERATIONS) time_pretty = timeit( lambda: orjson.dumps(data, None, orjson.OPT_INDENT_2), number=ITERATIONS, ) correct = test_correctness( orjson.dumps(data, None, orjson.OPT_INDENT_2) ) orjson_time_pretty = per_iter_latency(time_pretty) else: raise NotImplementedError time_compact = per_iter_latency(time_compact) if not correct: time_pretty = None else: time_pretty = per_iter_latency(time_pretty) if lib_name == "orjson": compared_to_orjson = 1 elif time_pretty: compared_to_orjson = time_pretty / orjson_time_pretty else: compared_to_orjson = None table.append( ( lib_name, f"{time_compact:,.2f}" if time_compact else "", f"{time_pretty:,.2f}" if time_pretty else "", f"{compared_to_orjson:,.1f}" if compared_to_orjson else "", ) ) buf = io.StringIO() buf.write(tabulate(table, headers, tablefmt="grid") + "\n") print( buf.getvalue() .replace("-", "") .replace("*", "-") .replace("=", "-") .replace("+", "|") .replace("|||||", "") .replace("\n\n", "\n") )