diff --git a/Cargo.lock b/Cargo.lock index a9eff9f72d1e62ae90f6510c38eff135fdd64790_Q2FyZ28ubG9jaw==..bfde36265adde4fee511cd82e2ffe6e1420db72a_Q2FyZ28ubG9jaw== 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -60,5 +60,5 @@ [[package]] name = "instant" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -64,5 +64,5 @@ source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69da7ce1490173c2bf4d26bc8be429aaeeaf4cce6c4b970b7949651fa17655fe" +checksum = "5b141fdc7836c525d4d594027d318c84161ca17aaf8113ab1f81ab93ae897485" [[package]] name = "itoa" @@ -85,5 +85,5 @@ [[package]] name = "libc" -version = "0.2.71" +version = "0.2.72" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -89,5 +89,5 @@ source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9457b06509d27052635f90d6466700c65095fdf75409b3fbdd903e988b886f49" +checksum = "a9f8082297d534141b30c8d39e9b1773713ab50fdbe4ff30f750d063b3bfd701" [[package]] name = "lock_api" @@ -91,5 +91,5 @@ [[package]] name = "lock_api" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -95,5 +95,5 @@ source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de302ce1fe7482db13738fbaf2e21cfb06a986b89c0bf38d88abf16681aada4e" +checksum = "28247cc5a5be2f05fbcd76dd0cf2c7d3b5400cb978a28042abcd4fa0b3f8261c" dependencies = [ "scopeguard", ] @@ -169,5 +169,5 @@ [[package]] name = "rand_core" -version = "0.4.2" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -173,5 +173,5 @@ source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" [[package]] name = "redox_syscall" @@ -175,5 +175,5 @@ [[package]] name = "redox_syscall" -version = "0.1.56" +version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -179,5 +179,5 @@ source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" +checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" [[package]] name = "ryu" @@ -209,5 +209,5 @@ [[package]] name = "smallvec" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -213,5 +213,5 @@ source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7cb5678e1615754284ec264d9bb5b4c27d2018577fd90ac0ceb578591ed5ee4" +checksum = "3757cb9d89161a2f24e1cf78efa0c1fcff485d18e3f55e0aa3480824ddaa0f3f" [[package]] name = "static_assertions" @@ -243,5 +243,5 @@ [[package]] name = "wyhash" -version = "0.3.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -247,5 +247,5 @@ source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "782a50f48ac4336916227cd199c61c7b42f38d0ad705421b49eb12c74c53ae00" +checksum = "0fe26121db27575e4fb30ceded9806fbfe0edb489f170a17506d9ad0b1aca41c" dependencies = [ "rand_core", ] diff --git a/Cargo.toml b/Cargo.toml index a9eff9f72d1e62ae90f6510c38eff135fdd64790_Q2FyZ28udG9tbA==..bfde36265adde4fee511cd82e2ffe6e1420db72a_Q2FyZ28udG9tbA== 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,7 +60,7 @@ serde = { version = "1", default_features = false } serde_json = { path = "./json", default_features = false, features = ["std"] } smallvec = { version = "1", default_features = false, features = ["const_generics", "union", "specialization", "write"] } -wyhash = { version = "0.3" } +wyhash = { version = "0.4" } [profile.release] codegen-units = 1 diff --git a/bench/benchmark_dumps.py b/bench/benchmark_dumps.py index a9eff9f72d1e62ae90f6510c38eff135fdd64790_YmVuY2gvYmVuY2htYXJrX2R1bXBzLnB5..bfde36265adde4fee511cd82e2ffe6e1420db72a_YmVuY2gvYmVuY2htYXJrX2R1bXBzLnB5 100644 --- a/bench/benchmark_dumps.py +++ b/bench/benchmark_dumps.py @@ -4,8 +4,7 @@ from json import dumps as _json_dumps from json import loads as json_loads -from orjson import dumps as _orjson_dumps from rapidjson import dumps as _rapidjson_dumps from simplejson import dumps as _simplejson_dumps from ujson import dumps as _ujson_dumps @@ -8,7 +7,9 @@ from rapidjson import dumps as _rapidjson_dumps from simplejson import dumps as _simplejson_dumps from ujson import dumps as _ujson_dumps +from orjson import dumps as _orjson_dumps + from .util import read_fixture_obj diff --git a/bench/benchmark_loads.py b/bench/benchmark_loads.py index a9eff9f72d1e62ae90f6510c38eff135fdd64790_YmVuY2gvYmVuY2htYXJrX2xvYWRzLnB5..bfde36265adde4fee511cd82e2ffe6e1420db72a_YmVuY2gvYmVuY2htYXJrX2xvYWRzLnB5 100644 --- a/bench/benchmark_loads.py +++ b/bench/benchmark_loads.py @@ -4,8 +4,6 @@ from json import dumps as json_dumps from json import loads as json_loads -from orjson import dumps as orjson_dumps -from orjson import loads as orjson_loads from rapidjson import dumps as rapidjson_dumps from rapidjson import loads as rapidjson_loads from simplejson import dumps as simplejson_dumps @@ -13,6 +11,9 @@ from ujson import dumps as ujson_dumps from ujson import loads as ujson_loads +from orjson import dumps as orjson_dumps +from orjson import loads as orjson_loads + from .util import read_fixture_str diff --git a/pydataclass b/pydataclass index a9eff9f72d1e62ae90f6510c38eff135fdd64790_cHlkYXRhY2xhc3M=..bfde36265adde4fee511cd82e2ffe6e1420db72a_cHlkYXRhY2xhc3M= 100755 --- a/pydataclass +++ b/pydataclass @@ -8,9 +8,8 @@ from timeit import timeit from typing import List -import orjson import rapidjson import simplejson import ujson from tabulate import tabulate @@ -12,8 +11,10 @@ import rapidjson import simplejson import ujson from tabulate import tabulate +import orjson + os.sched_setaffinity(os.getpid(), {0, 1}) diff --git a/pynonstr b/pynonstr index a9eff9f72d1e62ae90f6510c38eff135fdd64790_cHlub25zdHI=..bfde36265adde4fee511cd82e2ffe6e1420db72a_cHlub25zdHI= 100755 --- a/pynonstr +++ b/pynonstr @@ -9,9 +9,8 @@ from time import mktime from timeit import timeit -import orjson import rapidjson import simplejson import ujson from tabulate import tabulate @@ -13,8 +12,10 @@ import rapidjson import simplejson import ujson from tabulate import tabulate +import orjson + os.sched_setaffinity(os.getpid(), {0, 1}) data_as_obj = [] diff --git a/pynumpy b/pynumpy index a9eff9f72d1e62ae90f6510c38eff135fdd64790_cHludW1weQ==..bfde36265adde4fee511cd82e2ffe6e1420db72a_cHludW1weQ== 100755 --- a/pynumpy +++ b/pynumpy @@ -10,10 +10,9 @@ from timeit import timeit import numpy -import orjson import psutil import rapidjson import simplejson from memory_profiler import memory_usage from tabulate import tabulate @@ -14,9 +13,11 @@ import psutil import rapidjson import simplejson from memory_profiler import memory_usage from tabulate import tabulate +import orjson + os.sched_setaffinity(os.getpid(), {0, 1}) diff --git a/pysort b/pysort index a9eff9f72d1e62ae90f6510c38eff135fdd64790_cHlzb3J0..bfde36265adde4fee511cd82e2ffe6e1420db72a_cHlzb3J0 100755 --- a/pysort +++ b/pysort @@ -8,9 +8,8 @@ from pathlib import Path from timeit import timeit -import orjson import rapidjson import simplejson import ujson from tabulate import tabulate @@ -12,8 +11,10 @@ import rapidjson import simplejson import ujson from tabulate import tabulate +import orjson + os.sched_setaffinity(os.getpid(), {0, 1}) diff --git a/src/unicode.rs b/src/unicode.rs index a9eff9f72d1e62ae90f6510c38eff135fdd64790_c3JjL3VuaWNvZGUucnM=..bfde36265adde4fee511cd82e2ffe6e1420db72a_c3JjL3VuaWNvZGUucnM= 100644 --- a/src/unicode.rs +++ b/src/unicode.rs @@ -33,6 +33,15 @@ const STATE_COMPACT_ASCII: u32 = 0b00000000000000000000000001100000; const STATE_COMPACT: u32 = 0b00000000000000000000000000100000; +fn is_four_byte(buf: &str) -> bool { + for &each in buf.as_bytes() { + if unlikely!(each >= 240) { + return true; + } + } + false +} + enum PyUnicodeKind { Ascii, OneByte, @@ -40,10 +49,9 @@ FourByte, } -fn find_str_kind(buf: &str) -> PyUnicodeKind { - if encoding_rs::mem::is_ascii(buf.as_bytes()) { - // needed to optimize ASCII case +fn find_str_kind(buf: &str, num_chars: usize) -> PyUnicodeKind { + if buf.len() == num_chars { PyUnicodeKind::Ascii } else if unlikely!(encoding_rs::mem::is_str_latin1(buf)) { // fails fast, no obvious effect on CJK PyUnicodeKind::OneByte @@ -46,8 +54,8 @@ PyUnicodeKind::Ascii } else if unlikely!(encoding_rs::mem::is_str_latin1(buf)) { // fails fast, no obvious effect on CJK PyUnicodeKind::OneByte - } else if *buf.as_bytes().iter().max().unwrap() >= 240 { + } else if is_four_byte(buf) { PyUnicodeKind::FourByte } else { PyUnicodeKind::TwoByte @@ -60,7 +68,8 @@ ffi!(Py_INCREF(EMPTY_UNICODE)); unsafe { EMPTY_UNICODE } } else { - match find_str_kind(buf) { + let num_chars = bytecount::num_chars(buf.as_bytes()) as isize; + match find_str_kind(buf, num_chars as usize) { PyUnicodeKind::Ascii => unsafe { let ptr = ffi!(PyUnicode_New(len as isize, 127)); let data_ptr = ptr.cast::<PyASCIIObject>().offset(1) as *mut u8; @@ -69,11 +78,11 @@ ptr }, PyUnicodeKind::OneByte => unsafe { - let num_chars = bytecount::num_chars(buf.as_bytes()) as isize; - let ptr = ffi!(PyUnicode_New(num_chars as isize, 255)); + let ptr = ffi!(PyUnicode_New(num_chars, 255)); + (*ptr.cast::<PyCompactUnicodeObject>()).length = num_chars; let mut data_ptr = ptr.cast::<PyCompactUnicodeObject>().offset(1) as *mut u8; for each in buf.chars() { core::ptr::write(data_ptr, each as u8); data_ptr = data_ptr.offset(1); } core::ptr::write(data_ptr, 0); @@ -74,10 +83,9 @@ let mut data_ptr = ptr.cast::<PyCompactUnicodeObject>().offset(1) as *mut u8; for each in buf.chars() { core::ptr::write(data_ptr, each as u8); data_ptr = data_ptr.offset(1); } core::ptr::write(data_ptr, 0); - (*ptr.cast::<PyCompactUnicodeObject>()).length = num_chars; ptr }, PyUnicodeKind::TwoByte => unsafe { @@ -81,5 +89,4 @@ ptr }, PyUnicodeKind::TwoByte => unsafe { - let num_chars = bytecount::num_chars(buf.as_bytes()) as isize; let ptr = ffi!(PyUnicode_New(num_chars, 65535)); @@ -85,7 +92,8 @@ let ptr = ffi!(PyUnicode_New(num_chars, 65535)); + (*ptr.cast::<PyCompactUnicodeObject>()).length = num_chars; let mut data_ptr = ptr.cast::<PyCompactUnicodeObject>().offset(1) as *mut u16; for each in buf.chars() { core::ptr::write(data_ptr, each as u16); data_ptr = data_ptr.offset(1); } core::ptr::write(data_ptr, 0); @@ -86,10 +94,9 @@ let mut data_ptr = ptr.cast::<PyCompactUnicodeObject>().offset(1) as *mut u16; for each in buf.chars() { core::ptr::write(data_ptr, each as u16); data_ptr = data_ptr.offset(1); } core::ptr::write(data_ptr, 0); - (*ptr.cast::<PyCompactUnicodeObject>()).length = num_chars; ptr }, PyUnicodeKind::FourByte => unsafe { @@ -93,5 +100,4 @@ ptr }, PyUnicodeKind::FourByte => unsafe { - let num_chars = bytecount::num_chars(buf.as_bytes()) as isize; let ptr = ffi!(PyUnicode_New(num_chars, 1114111)); @@ -97,7 +103,8 @@ let ptr = ffi!(PyUnicode_New(num_chars, 1114111)); + (*ptr.cast::<PyCompactUnicodeObject>()).length = num_chars; let mut data_ptr = ptr.cast::<PyCompactUnicodeObject>().offset(1) as *mut u32; for each in buf.chars() { core::ptr::write(data_ptr, each as u32); data_ptr = data_ptr.offset(1); } core::ptr::write(data_ptr, 0); @@ -98,10 +105,9 @@ let mut data_ptr = ptr.cast::<PyCompactUnicodeObject>().offset(1) as *mut u32; for each in buf.chars() { core::ptr::write(data_ptr, each as u32); data_ptr = data_ptr.offset(1); } core::ptr::write(data_ptr, 0); - (*ptr.cast::<PyCompactUnicodeObject>()).length = num_chars; ptr }, } diff --git a/test/test_datetime.py b/test/test_datetime.py index a9eff9f72d1e62ae90f6510c38eff135fdd64790_dGVzdC90ZXN0X2RhdGV0aW1lLnB5..bfde36265adde4fee511cd82e2ffe6e1420db72a_dGVzdC90ZXN0X2RhdGV0aW1lLnB5 100644 --- a/test/test_datetime.py +++ b/test/test_datetime.py @@ -3,8 +3,7 @@ import datetime import unittest -import orjson import pytest import pytz from dateutil import tz @@ -7,7 +6,9 @@ import pytest import pytz from dateutil import tz +import orjson + try: import pendulum except ImportError: diff --git a/test/test_memory.py b/test/test_memory.py index a9eff9f72d1e62ae90f6510c38eff135fdd64790_dGVzdC90ZXN0X21lbW9yeS5weQ==..bfde36265adde4fee511cd82e2ffe6e1420db72a_dGVzdC90ZXN0X21lbW9yeS5weQ== 100644 --- a/test/test_memory.py +++ b/test/test_memory.py @@ -7,7 +7,6 @@ import unittest from typing import List -import orjson import psutil import pytest @@ -11,6 +10,8 @@ import psutil import pytest +import orjson + try: import numpy except ImportError: diff --git a/test/test_non_str_keys.py b/test/test_non_str_keys.py index a9eff9f72d1e62ae90f6510c38eff135fdd64790_dGVzdC90ZXN0X25vbl9zdHJfa2V5cy5weQ==..bfde36265adde4fee511cd82e2ffe6e1420db72a_dGVzdC90ZXN0X25vbl9zdHJfa2V5cy5weQ== 100644 --- a/test/test_non_str_keys.py +++ b/test/test_non_str_keys.py @@ -5,7 +5,6 @@ import unittest import uuid -import orjson import pytest import pytz @@ -9,6 +8,8 @@ import pytest import pytz +import orjson + try: import numpy except ImportError: diff --git a/test/test_numpy.py b/test/test_numpy.py index a9eff9f72d1e62ae90f6510c38eff135fdd64790_dGVzdC90ZXN0X251bXB5LnB5..bfde36265adde4fee511cd82e2ffe6e1420db72a_dGVzdC90ZXN0X251bXB5LnB5 100644 --- a/test/test_numpy.py +++ b/test/test_numpy.py @@ -2,4 +2,6 @@ import unittest +import pytest + import orjson @@ -5,5 +7,4 @@ import orjson -import pytest try: import numpy diff --git a/test/test_type.py b/test/test_type.py index a9eff9f72d1e62ae90f6510c38eff135fdd64790_dGVzdC90ZXN0X3R5cGUucHk=..bfde36265adde4fee511cd82e2ffe6e1420db72a_dGVzdC90ZXN0X3R5cGUucHk= 100644 --- a/test/test_type.py +++ b/test/test_type.py @@ -2,4 +2,6 @@ import unittest +import xxhash + import orjson @@ -5,5 +7,4 @@ import orjson -import xxhash class TypeTests(unittest.TestCase):