Skip to content
Snippets Groups Projects
pyindent 3.96 KiB
Newer Older
  • Learn to ignore specific revisions
  • ijl's avatar
    ijl committed
    #!/usr/bin/env python3
    # SPDX-License-Identifier: (Apache-2.0 OR MIT)
    
    import io
    import json
    import lzma
    import sys
    import os
    from pathlib import Path
    from timeit import timeit
    
    import orjson
    import simplejson
    import ujson
    import rapidjson
    from tabulate import tabulate
    
    os.sched_setaffinity(os.getpid(), {0, 1})
    
    
    dirname = os.path.join(os.path.dirname(__file__), "data")
    
    
    def read_fixture_obj(filename):
        path = Path(dirname, filename)
        if path.suffix == ".xz":
            contents = lzma.decompress(path.read_bytes())
        else:
            contents = path.read_bytes()
        return orjson.loads(contents)
    
    filename = sys.argv[1] if len(sys.argv) >= 1 else ""
    
    data = read_fixture_obj(f"{filename}.json.xz")
    
    headers = ("Library", "compact (ms)", "pretty (ms)", "vs. orjson")
    
    LIBRARIES = ("orjson", "ujson", "rapidjson", "simplejson", "json")
    
    output_in_kib_compact = len(orjson.dumps(data)) / 1024
    output_in_kib_pretty = len(orjson.dumps(data, option=orjson.OPT_INDENT_2)) / 1024
    
    # minimum 2s runtime for orjson compact
    ITERATIONS = int(2 / (timeit(lambda: orjson.dumps(data), number=20) / 20))
    
    print(f"{output_in_kib_compact:,.0f}KiB compact, {output_in_kib_pretty:,.0f}KiB pretty, {ITERATIONS} iterations")
    
    def per_iter_latency(val):
        if val is None:
            return None
        return (val * 1000) / ITERATIONS
    
    def test_correctness(serialized):
        return orjson.loads(serialized) == data
    
    table = []
    for lib_name in LIBRARIES:
        print(f"{lib_name}...")
        if lib_name == "json":
            time_compact = timeit(
                lambda: json.dumps(data).encode("utf-8"), number=ITERATIONS,
            )
            time_pretty = timeit(
                lambda: json.dumps(data, indent=2).encode("utf-8"), number=ITERATIONS,
            )
            correct = test_correctness(
                json.dumps(data, indent=2).encode("utf-8")
            )
        elif lib_name == "simplejson":
            time_compact = timeit(
                lambda: simplejson.dumps(data).encode("utf-8"), number=ITERATIONS,
            )
            time_pretty = timeit(
                lambda: simplejson.dumps(data, indent=2).encode("utf-8"), number=ITERATIONS,
            )
            correct = test_correctness(
                simplejson.dumps(data, indent=2).encode("utf-8")
            )
        elif lib_name == "ujson":
            time_compact = timeit(
                lambda: ujson.dumps(data).encode("utf-8"), number=ITERATIONS,
            )
            time_pretty = timeit(
                lambda: ujson.dumps(data, indent=2).encode("utf-8"), number=ITERATIONS,
            )
            correct = test_correctness(
                ujson.dumps(data, indent=2).encode("utf-8")
            )
        elif lib_name == "rapidjson":
            time_compact = timeit(lambda: rapidjson.dumps(data), number=ITERATIONS)
            time_pretty = None
            correct = False
        elif lib_name == "orjson":
            time_compact = timeit(lambda: orjson.dumps(data), number=ITERATIONS)
            time_pretty = timeit(
                lambda: orjson.dumps(data, None, orjson.OPT_INDENT_2), number=ITERATIONS,
            )
            correct = test_correctness(
                orjson.dumps(data, None, orjson.OPT_INDENT_2)
            )
            orjson_time_pretty = per_iter_latency(time_pretty)
        else:
            raise NotImplementedError
    
        time_compact = per_iter_latency(time_compact)
        if not correct:
            time_pretty = None
        else:
            time_pretty = per_iter_latency(time_pretty)
    
        if lib_name == "orjson":
            compared_to_orjson = 1
        elif time_pretty:
            compared_to_orjson = time_pretty / orjson_time_pretty
        else:
            compared_to_orjson = None
    
        table.append(
            (
                lib_name,
                f"{time_compact:,.2f}" if time_compact else "",
                f"{time_pretty:,.2f}" if time_pretty else "",
                f"{compared_to_orjson:,.1f}" if compared_to_orjson else "",
            )
        )
    
    buf = io.StringIO()
    buf.write(tabulate(table, headers, tablefmt="grid") + "\n")
    
    print(
        buf.getvalue()
        .replace("-", "")
        .replace("*", "-")
        .replace("=", "-")
        .replace("+", "|")
        .replace("|||||", "")
        .replace("\n\n", "\n")
    )