diff --git a/Cargo.lock b/Cargo.lock index bc06ded407d452ae09d2e349a7bb9c77647e42dc_Q2FyZ28ubG9jaw==..8a8e4a42d063aef375dd9ff07a551d7278ec6dc7_Q2FyZ28ubG9jaw== 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,5 +2,5 @@ # It is not intended for manual editing. [[package]] name = "arrayvec" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -6,5 +6,5 @@ source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" [[package]] name = "associative-cache" @@ -124,7 +124,7 @@ "serde", "serde_json", "smallvec", - "wyhash", + "wy", ] [[package]] @@ -174,12 +174,6 @@ ] [[package]] -name = "rand_core" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" - -[[package]] name = "redox_syscall" version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -248,6 +242,6 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] -name = "wyhash" -version = "0.4.1" +name = "wy" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -253,5 +247,2 @@ source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fe26121db27575e4fb30ceded9806fbfe0edb489f170a17506d9ad0b1aca41c" -dependencies = [ - "rand_core", -] +checksum = "f3dede6c845dff0b268e7b0c12bc92c97d1e9aafdce335e8e1d0eadbd52c7357" diff --git a/Cargo.toml b/Cargo.toml index bc06ded407d452ae09d2e349a7bb9c77647e42dc_Q2FyZ28udG9tbA==..8a8e4a42d063aef375dd9ff07a551d7278ec6dc7_Q2FyZ28udG9tbA== 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,7 +60,7 @@ serde = { version = "1", default_features = false } serde_json = { path = "./json", default_features = false, features = ["std"] } smallvec = { version = "1", default_features = false, features = ["const_generics", "union", "specialization", "write"] } -wyhash = { version = "0.4" } +wy = { version = "1" } [target.'cfg(not(any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "i686", target_arch = "armv7")))'.dependencies] encoding_rs = { path = "./encoding_rs", default_features = false } diff --git a/README.md b/README.md index bc06ded407d452ae09d2e349a7bb9c77647e42dc_UkVBRE1FLm1k..8a8e4a42d063aef375dd9ff07a551d7278ec6dc7_UkVBRE1FLm1k 100644 --- a/README.md +++ b/README.md @@ -590,7 +590,7 @@ orjson maintains a cache of map keys for the duration of the process. This causes a net reduction in memory usage by avoiding duplicate strings. The -keys must be at most 64 chars to be cached and 512 entries are stored. +keys must be at most 64 bytes to be cached and 512 entries are stored. It raises `JSONDecodeError` if given an invalid type or invalid JSON. This includes if the input contains `NaN`, `Infinity`, or `-Infinity`, diff --git a/src/deserialize/cache.rs b/src/deserialize/cache.rs index bc06ded407d452ae09d2e349a7bb9c77647e42dc_c3JjL2Rlc2VyaWFsaXplL2NhY2hlLnJz..8a8e4a42d063aef375dd9ff07a551d7278ec6dc7_c3JjL2Rlc2VyaWFsaXplL2NhY2hlLnJz 100644 --- a/src/deserialize/cache.rs +++ b/src/deserialize/cache.rs @@ -5,6 +5,6 @@ use once_cell::unsync::OnceCell; use std::os::raw::c_void; -#[derive(Clone)] +#[repr(transparent)] pub struct CachedKey { ptr: *mut c_void, @@ -9,9 +9,8 @@ pub struct CachedKey { ptr: *mut c_void, - hash: pyo3::ffi::Py_hash_t, } unsafe impl Send for CachedKey {} unsafe impl Sync for CachedKey {} impl CachedKey { @@ -12,9 +11,9 @@ } unsafe impl Send for CachedKey {} unsafe impl Sync for CachedKey {} impl CachedKey { - pub fn new(ptr: *mut pyo3::ffi::PyObject, hash: pyo3::ffi::Py_hash_t) -> CachedKey { + pub fn new(ptr: *mut pyo3::ffi::PyObject) -> CachedKey { CachedKey { ptr: ptr as *mut c_void, @@ -19,6 +18,5 @@ CachedKey { ptr: ptr as *mut c_void, - hash: hash, } } @@ -22,6 +20,6 @@ } } - pub fn get(&mut self) -> (*mut pyo3::ffi::PyObject, pyo3::ffi::Py_hash_t) { + pub fn get(&mut self) -> *mut pyo3::ffi::PyObject { let ptr = self.ptr as *mut pyo3::ffi::PyObject; ffi!(Py_INCREF(ptr)); @@ -26,6 +24,6 @@ let ptr = self.ptr as *mut pyo3::ffi::PyObject; ffi!(Py_INCREF(ptr)); - (ptr, self.hash) + ptr } } @@ -36,6 +34,6 @@ } pub type KeyMap = - AssociativeCache<u64, CachedKey, Capacity512, HashDirectMapped, RoundRobinReplacement>; + AssociativeCache<u32, CachedKey, Capacity512, HashDirectMapped, RoundRobinReplacement>; pub static mut KEY_MAP: OnceCell<KeyMap> = OnceCell::new(); diff --git a/src/deserialize/decode.rs b/src/deserialize/decode.rs index bc06ded407d452ae09d2e349a7bb9c77647e42dc_c3JjL2Rlc2VyaWFsaXplL2RlY29kZS5ycw==..8a8e4a42d063aef375dd9ff07a551d7278ec6dc7_c3JjL2Rlc2VyaWFsaXplL2RlY29kZS5ycw== 100644 --- a/src/deserialize/decode.rs +++ b/src/deserialize/decode.rs @@ -10,7 +10,7 @@ use std::borrow::Cow; use std::fmt; use std::ptr::NonNull; -use wyhash::wyhash; +use wy::hash32; pub fn deserialize( ptr: *mut pyo3::ffi::PyObject, @@ -167,4 +167,5 @@ while let Some(key) = map.next_key::<Cow<str>>()? { let pykey: *mut pyo3::ffi::PyObject; let pyhash: pyo3::ffi::Py_hash_t; + let value = map.next_value_seed(self)?; if likely!(key.len() <= 64) { @@ -170,5 +171,5 @@ if likely!(key.len() <= 64) { - let hash = unsafe { wyhash(key.as_bytes(), HASH_SEED) }; + let hash = unsafe { hash32(key.as_bytes(), HASH_SEED) }; { let map = unsafe { KEY_MAP @@ -179,6 +180,7 @@ || hash, || { let pyob = unicode_from_str(&key); - CachedKey::new(pyob, hash_str(pyob)) + hash_str(pyob); + CachedKey::new(pyob) }, ); @@ -183,10 +185,9 @@ }, ); - let tmp = entry.get(); - pykey = tmp.0; - pyhash = tmp.1; + pykey = entry.get(); + pyhash = unsafe { (*pykey.cast::<PyASCIIObject>()).hash } } } else { pykey = unicode_from_str(&key); pyhash = hash_str(pykey); } @@ -188,9 +189,8 @@ } } else { pykey = unicode_from_str(&key); pyhash = hash_str(pykey); } - let value = map.next_value_seed(self)?; let _ = ffi!(_PyDict_SetItem_KnownHash( dict_ptr, pykey, diff --git a/src/typeref.rs b/src/typeref.rs index bc06ded407d452ae09d2e349a7bb9c77647e42dc_c3JjL3R5cGVyZWYucnM=..8a8e4a42d063aef375dd9ff07a551d7278ec6dc7_c3JjL3R5cGVyZWYucnM= 100644 --- a/src/typeref.rs +++ b/src/typeref.rs @@ -17,7 +17,7 @@ pub uint32: *mut PyTypeObject, pub uint8: *mut PyTypeObject, } -pub static mut HASH_SEED: u64 = 0; +pub static mut HASH_SEED: u32 = 0; pub static mut NONE: *mut PyObject = 0 as *mut PyObject; pub static mut TRUE: *mut PyObject = 0 as *mut PyObject; @@ -102,7 +102,7 @@ ARRAY_STRUCT_STR = pyo3::ffi::PyUnicode_InternFromString("__array_struct__\0".as_ptr() as *const c_char); VALUE_STR = pyo3::ffi::PyUnicode_InternFromString("value\0".as_ptr() as *const c_char); - HASH_SEED = (VALUE_STR as u64).wrapping_mul(DICT_TYPE as u64); + HASH_SEED = ((VALUE_STR as u64).wrapping_mul(DICT_TYPE as u64)) as u32; DEFAULT = PyUnicode_InternFromString("default\0".as_ptr() as *const c_char); OPTION = PyUnicode_InternFromString("option\0".as_ptr() as *const c_char); JsonEncodeError = pyo3::ffi::PyExc_TypeError;