# HG changeset patch # User ijl <ijl@mailbox.org> # Date 1603926019 0 # Wed Oct 28 23:00:19 2020 +0000 # Node ID 8a8e4a42d063aef375dd9ff07a551d7278ec6dc7 # Parent bc06ded407d452ae09d2e349a7bb9c77647e42dc Reduce work in cache diff --git a/Cargo.lock b/Cargo.lock --- a/Cargo.lock +++ b/Cargo.lock @@ -2,9 +2,9 @@ # It is not intended for manual editing. [[package]] name = "arrayvec" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" [[package]] name = "associative-cache" @@ -124,7 +124,7 @@ "serde", "serde_json", "smallvec", - "wyhash", + "wy", ] [[package]] @@ -174,12 +174,6 @@ ] [[package]] -name = "rand_core" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" - -[[package]] name = "redox_syscall" version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -248,10 +242,7 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] -name = "wyhash" -version = "0.4.1" +name = "wy" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fe26121db27575e4fb30ceded9806fbfe0edb489f170a17506d9ad0b1aca41c" -dependencies = [ - "rand_core", -] +checksum = "f3dede6c845dff0b268e7b0c12bc92c97d1e9aafdce335e8e1d0eadbd52c7357" diff --git a/Cargo.toml b/Cargo.toml --- a/Cargo.toml +++ b/Cargo.toml @@ -60,7 +60,7 @@ serde = { version = "1", default_features = false } serde_json = { path = "./json", default_features = false, features = ["std"] } smallvec = { version = "1", default_features = false, features = ["const_generics", "union", "specialization", "write"] } -wyhash = { version = "0.4" } +wy = { version = "1" } [target.'cfg(not(any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "i686", target_arch = "armv7")))'.dependencies] encoding_rs = { path = "./encoding_rs", default_features = false } diff --git a/README.md b/README.md --- a/README.md +++ b/README.md @@ -590,7 +590,7 @@ orjson maintains a cache of map keys for the duration of the process. This causes a net reduction in memory usage by avoiding duplicate strings. The -keys must be at most 64 chars to be cached and 512 entries are stored. +keys must be at most 64 bytes to be cached and 512 entries are stored. It raises `JSONDecodeError` if given an invalid type or invalid JSON. This includes if the input contains `NaN`, `Infinity`, or `-Infinity`, diff --git a/src/deserialize/cache.rs b/src/deserialize/cache.rs --- a/src/deserialize/cache.rs +++ b/src/deserialize/cache.rs @@ -5,27 +5,25 @@ use once_cell::unsync::OnceCell; use std::os::raw::c_void; -#[derive(Clone)] +#[repr(transparent)] pub struct CachedKey { ptr: *mut c_void, - hash: pyo3::ffi::Py_hash_t, } unsafe impl Send for CachedKey {} unsafe impl Sync for CachedKey {} impl CachedKey { - pub fn new(ptr: *mut pyo3::ffi::PyObject, hash: pyo3::ffi::Py_hash_t) -> CachedKey { + pub fn new(ptr: *mut pyo3::ffi::PyObject) -> CachedKey { CachedKey { ptr: ptr as *mut c_void, - hash: hash, } } - pub fn get(&mut self) -> (*mut pyo3::ffi::PyObject, pyo3::ffi::Py_hash_t) { + pub fn get(&mut self) -> *mut pyo3::ffi::PyObject { let ptr = self.ptr as *mut pyo3::ffi::PyObject; ffi!(Py_INCREF(ptr)); - (ptr, self.hash) + ptr } } @@ -36,6 +34,6 @@ } pub type KeyMap = - AssociativeCache<u64, CachedKey, Capacity512, HashDirectMapped, RoundRobinReplacement>; + AssociativeCache<u32, CachedKey, Capacity512, HashDirectMapped, RoundRobinReplacement>; pub static mut KEY_MAP: OnceCell<KeyMap> = OnceCell::new(); diff --git a/src/deserialize/decode.rs b/src/deserialize/decode.rs --- a/src/deserialize/decode.rs +++ b/src/deserialize/decode.rs @@ -10,7 +10,7 @@ use std::borrow::Cow; use std::fmt; use std::ptr::NonNull; -use wyhash::wyhash; +use wy::hash32; pub fn deserialize( ptr: *mut pyo3::ffi::PyObject, @@ -167,8 +167,9 @@ while let Some(key) = map.next_key::<Cow<str>>()? { let pykey: *mut pyo3::ffi::PyObject; let pyhash: pyo3::ffi::Py_hash_t; + let value = map.next_value_seed(self)?; if likely!(key.len() <= 64) { - let hash = unsafe { wyhash(key.as_bytes(), HASH_SEED) }; + let hash = unsafe { hash32(key.as_bytes(), HASH_SEED) }; { let map = unsafe { KEY_MAP @@ -179,18 +180,17 @@ || hash, || { let pyob = unicode_from_str(&key); - CachedKey::new(pyob, hash_str(pyob)) + hash_str(pyob); + CachedKey::new(pyob) }, ); - let tmp = entry.get(); - pykey = tmp.0; - pyhash = tmp.1; + pykey = entry.get(); + pyhash = unsafe { (*pykey.cast::<PyASCIIObject>()).hash } } } else { pykey = unicode_from_str(&key); pyhash = hash_str(pykey); } - let value = map.next_value_seed(self)?; let _ = ffi!(_PyDict_SetItem_KnownHash( dict_ptr, pykey, diff --git a/src/typeref.rs b/src/typeref.rs --- a/src/typeref.rs +++ b/src/typeref.rs @@ -17,7 +17,7 @@ pub uint32: *mut PyTypeObject, pub uint8: *mut PyTypeObject, } -pub static mut HASH_SEED: u64 = 0; +pub static mut HASH_SEED: u32 = 0; pub static mut NONE: *mut PyObject = 0 as *mut PyObject; pub static mut TRUE: *mut PyObject = 0 as *mut PyObject; @@ -102,7 +102,7 @@ ARRAY_STRUCT_STR = pyo3::ffi::PyUnicode_InternFromString("__array_struct__\0".as_ptr() as *const c_char); VALUE_STR = pyo3::ffi::PyUnicode_InternFromString("value\0".as_ptr() as *const c_char); - HASH_SEED = (VALUE_STR as u64).wrapping_mul(DICT_TYPE as u64); + HASH_SEED = ((VALUE_STR as u64).wrapping_mul(DICT_TYPE as u64)) as u32; DEFAULT = PyUnicode_InternFromString("default\0".as_ptr() as *const c_char); OPTION = PyUnicode_InternFromString("option\0".as_ptr() as *const c_char); JsonEncodeError = pyo3::ffi::PyExc_TypeError;