diff --git a/README.md b/README.md index 003c12335774cec4816bdefc6d38b7c4ec126665_UkVBRE1FLm1k..d0519d5809b751939f67cf4688dc6349ab17cead_UkVBRE1FLm1k 100644 --- a/README.md +++ b/README.md @@ -784,8 +784,8 @@ ### numpy orjson natively serializes `numpy.ndarray` and individual `numpy.float64`, -`numpy.float32`, `numpy.int64`, `numpy.int32`, `numpy.uint64`, and -`numpy.uint32` instances. Arrays may have a +`numpy.float32`, `numpy.int64`, `numpy.int32`, `numpy.int8`, `numpy.uint64`, +`numpy.uint32`, and `numpy.uint8` instances. Arrays may have a `dtype` of `numpy.bool`, `numpy.float32`, `numpy.float64`, `numpy.int32`, `numpy.int64`, `numpy.uint32`, `numpy.uint64`, `numpy.uintp`, or `numpy.intp`. orjson is faster than all compared libraries at serializing diff --git a/pynumpy b/pynumpy index 003c12335774cec4816bdefc6d38b7c4ec126665_cHludW1weQ==..d0519d5809b751939f67cf4688dc6349ab17cead_cHludW1weQ== 100755 --- a/pynumpy +++ b/pynumpy @@ -30,4 +30,8 @@ assert array.dtype == numpy.float64 elif kind == "bool": array = numpy.random.choice((True, False), size=(100000, 200)) +elif kind == "int8": + array = numpy.random.randint(((2 ** 7) - 1), size=(100000, 100), dtype=numpy.int8) +elif kind == "uint8": + array = numpy.random.randint(((2 ** 8) - 1), size=(100000, 100), dtype=numpy.uint8) else: @@ -33,5 +37,5 @@ else: - print("usage: pynumpy (bool|int32|float64)") + print("usage: pynumpy (bool|int32|float64|int8|uint8)") sys.exit(1) proc = psutil.Process() diff --git a/src/serialize/numpy.rs b/src/serialize/numpy.rs index 003c12335774cec4816bdefc6d38b7c4ec126665_c3JjL3NlcmlhbGl6ZS9udW1weS5ycw==..d0519d5809b751939f67cf4688dc6349ab17cead_c3JjL3NlcmlhbGl6ZS9udW1weS5ycw== 100644 --- a/src/serialize/numpy.rs +++ b/src/serialize/numpy.rs @@ -19,5 +19,6 @@ || ob_type == scalar_types.float32 || ob_type == scalar_types.int64 || ob_type == scalar_types.int32 + || ob_type == scalar_types.int8 || ob_type == scalar_types.uint64 || ob_type == scalar_types.uint32 @@ -22,5 +23,6 @@ || ob_type == scalar_types.uint64 || ob_type == scalar_types.uint32 + || ob_type == scalar_types.uint8 } } @@ -61,5 +63,6 @@ BOOL, F32, F64, + I8, I32, I64, @@ -64,5 +67,6 @@ I32, I64, + U8, U32, U64, } @@ -73,44 +77,6 @@ UnsupportedDataType, } -#[repr(transparent)] -pub struct NumpyScalar { - pub ptr: *mut pyo3::ffi::PyObject, -} - -impl NumpyScalar { - pub fn new(ptr: *mut PyObject) -> Self { - NumpyScalar { ptr } - } -} - -impl<'p> Serialize for NumpyScalar { - fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - where - S: Serializer, - { - unsafe { - let ob_type = ob_type!(self.ptr); - let scalar_types = NUMPY_TYPES.deref_mut().as_ref().unwrap(); - if ob_type == scalar_types.float64 { - (*(self.ptr as *mut NumpyFloat64)).serialize(serializer) - } else if ob_type == scalar_types.float32 { - (*(self.ptr as *mut NumpyFloat32)).serialize(serializer) - } else if ob_type == scalar_types.int64 { - (*(self.ptr as *mut NumpyInt64)).serialize(serializer) - } else if ob_type == scalar_types.int32 { - (*(self.ptr as *mut NumpyInt32)).serialize(serializer) - } else if ob_type == scalar_types.uint64 { - (*(self.ptr as *mut NumpyUint64)).serialize(serializer) - } else if ob_type == scalar_types.uint32 { - (*(self.ptr as *mut NumpyUint32)).serialize(serializer) - } else { - unreachable!() - } - } - } -} - // >>> arr = numpy.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], numpy.int32) // >>> arr.ndim // 3 @@ -127,6 +93,7 @@ } impl<'a> NumpyArray { + #[inline(never)] pub fn new(ptr: *mut PyObject) -> Result<Self, PyArrayError> { let capsule = ffi!(PyObject_GetAttr(ptr, ARRAY_STRUCT_STR)); let array = unsafe { (*(capsule as *mut PyCapsule)).pointer as *mut PyArrayInterface }; @@ -176,5 +143,6 @@ (098, 1) => Some(ItemType::BOOL), (102, 4) => Some(ItemType::F32), (102, 8) => Some(ItemType::F64), + (105, 1) => Some(ItemType::I8), (105, 4) => Some(ItemType::I32), (105, 8) => Some(ItemType::I64), @@ -179,5 +147,6 @@ (105, 4) => Some(ItemType::I32), (105, 8) => Some(ItemType::I64), + (117, 1) => Some(ItemType::U8), (117, 4) => Some(ItemType::U32), (117, 8) => Some(ItemType::U64), _ => None, @@ -237,6 +206,7 @@ } impl<'p> Serialize for NumpyArray { + #[inline(never)] fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer, @@ -248,7 +218,6 @@ for child in &self.children { seq.serialize_element(child).unwrap(); } - } else { let data_ptr = self.data(); let num_items = self.num_items(); @@ -277,6 +246,6 @@ seq.serialize_element(&DataTypeI32 { obj: each }).unwrap(); } } - ItemType::U64 => { - let slice: &[u64] = slice!(data_ptr as *const u64, num_items); + ItemType::I8 => { + let slice: &[i8] = slice!(data_ptr as *const i8, num_items); for &each in slice.iter() { @@ -282,5 +251,11 @@ for &each in slice.iter() { - seq.serialize_element(&DataTypeU64 { obj: each }).unwrap(); + seq.serialize_element(&DataTypeI8 { obj: each }).unwrap(); + } + } + ItemType::U8 => { + let slice: &[u8] = slice!(data_ptr as *const u8, num_items); + for &each in slice.iter() { + seq.serialize_element(&DataTypeU8 { obj: each }).unwrap(); } } ItemType::U32 => { @@ -289,6 +264,12 @@ seq.serialize_element(&DataTypeU32 { obj: each }).unwrap(); } } + ItemType::U64 => { + let slice: &[u64] = slice!(data_ptr as *const u64, num_items); + for &each in slice.iter() { + seq.serialize_element(&DataTypeU64 { obj: each }).unwrap(); + } + } ItemType::BOOL => { let slice: &[u8] = slice!(data_ptr as *const u8, num_items); for &each in slice.iter() { @@ -331,6 +312,20 @@ } #[repr(transparent)] +pub struct DataTypeI8 { + pub obj: i8, +} + +impl<'p> Serialize for DataTypeI8 { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + serializer.serialize_i8(self.obj) + } +} + +#[repr(transparent)] pub struct DataTypeI32 { pub obj: i32, } @@ -359,6 +354,20 @@ } #[repr(transparent)] +pub struct DataTypeU8 { + pub obj: u8, +} + +impl<'p> Serialize for DataTypeU8 { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + serializer.serialize_u8(self.obj) + } +} + +#[repr(transparent)] pub struct DataTypeU32 { pub obj: u32, } @@ -400,6 +409,64 @@ } } +#[repr(transparent)] +pub struct NumpyScalar { + pub ptr: *mut pyo3::ffi::PyObject, +} + +impl NumpyScalar { + pub fn new(ptr: *mut PyObject) -> Self { + NumpyScalar { ptr } + } +} + +impl<'p> Serialize for NumpyScalar { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + unsafe { + let ob_type = ob_type!(self.ptr); + let scalar_types = NUMPY_TYPES.deref_mut().as_ref().unwrap(); + if ob_type == scalar_types.float64 { + (*(self.ptr as *mut NumpyFloat64)).serialize(serializer) + } else if ob_type == scalar_types.float32 { + (*(self.ptr as *mut NumpyFloat32)).serialize(serializer) + } else if ob_type == scalar_types.int64 { + (*(self.ptr as *mut NumpyInt64)).serialize(serializer) + } else if ob_type == scalar_types.int32 { + (*(self.ptr as *mut NumpyInt32)).serialize(serializer) + } else if ob_type == scalar_types.int8 { + (*(self.ptr as *mut NumpyInt8)).serialize(serializer) + } else if ob_type == scalar_types.uint64 { + (*(self.ptr as *mut NumpyUint64)).serialize(serializer) + } else if ob_type == scalar_types.uint32 { + (*(self.ptr as *mut NumpyUint32)).serialize(serializer) + } else if ob_type == scalar_types.uint8 { + (*(self.ptr as *mut NumpyUint8)).serialize(serializer) + } else { + unreachable!() + } + } + } +} + +#[repr(C)] +pub struct NumpyInt8 { + pub ob_refcnt: Py_ssize_t, + pub ob_type: *mut PyTypeObject, + pub value: i8, +} + +impl<'p> Serialize for NumpyInt8 { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + serializer.serialize_i8(self.value) + } +} + #[repr(C)] pub struct NumpyInt32 { pub ob_refcnt: Py_ssize_t, @@ -433,6 +500,22 @@ } #[repr(C)] +pub struct NumpyUint8 { + pub ob_refcnt: Py_ssize_t, + pub ob_type: *mut PyTypeObject, + pub value: u8, +} + +impl<'p> Serialize for NumpyUint8 { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + serializer.serialize_u8(self.value) + } +} + +#[repr(C)] pub struct NumpyUint32 { pub ob_refcnt: Py_ssize_t, pub ob_type: *mut PyTypeObject, diff --git a/src/typeref.rs b/src/typeref.rs index 003c12335774cec4816bdefc6d38b7c4ec126665_c3JjL3R5cGVyZWYucnM=..d0519d5809b751939f67cf4688dc6349ab17cead_c3JjL3R5cGVyZWYucnM= 100644 --- a/src/typeref.rs +++ b/src/typeref.rs @@ -7,7 +7,8 @@ use std::sync::Once; pub struct NumpyTypes { + pub array: *mut PyTypeObject, pub float64: *mut PyTypeObject, pub float32: *mut PyTypeObject, pub int64: *mut PyTypeObject, pub int32: *mut PyTypeObject, @@ -10,6 +11,7 @@ pub float64: *mut PyTypeObject, pub float32: *mut PyTypeObject, pub int64: *mut PyTypeObject, pub int32: *mut PyTypeObject, + pub int8: *mut PyTypeObject, pub uint64: *mut PyTypeObject, pub uint32: *mut PyTypeObject, @@ -14,6 +16,6 @@ pub uint64: *mut PyTypeObject, pub uint32: *mut PyTypeObject, - pub array: *mut PyTypeObject, + pub uint8: *mut PyTypeObject, } pub static mut HASH_SEED: u64 = 0; @@ -146,7 +148,8 @@ array: look_up_numpy_type(numpy, "ndarray\0")?.as_ptr(), float32: look_up_numpy_type(numpy, "float32\0")?.as_ptr(), float64: look_up_numpy_type(numpy, "float64\0")?.as_ptr(), + int8: look_up_numpy_type(numpy, "int8\0")?.as_ptr(), int32: look_up_numpy_type(numpy, "int32\0")?.as_ptr(), int64: look_up_numpy_type(numpy, "int64\0")?.as_ptr(), uint32: look_up_numpy_type(numpy, "uint32\0")?.as_ptr(), uint64: look_up_numpy_type(numpy, "uint64\0")?.as_ptr(), @@ -149,7 +152,8 @@ int32: look_up_numpy_type(numpy, "int32\0")?.as_ptr(), int64: look_up_numpy_type(numpy, "int64\0")?.as_ptr(), uint32: look_up_numpy_type(numpy, "uint32\0")?.as_ptr(), uint64: look_up_numpy_type(numpy, "uint64\0")?.as_ptr(), + uint8: look_up_numpy_type(numpy, "uint8\0")?.as_ptr(), }); Py_XDECREF(numpy); types diff --git a/test/test_numpy.py b/test/test_numpy.py index 003c12335774cec4816bdefc6d38b7c4ec126665_dGVzdC90ZXN0X251bXB5LnB5..d0519d5809b751939f67cf4688dc6349ab17cead_dGVzdC90ZXN0X251bXB5LnB5 100644 --- a/test/test_numpy.py +++ b/test/test_numpy.py @@ -54,6 +54,24 @@ b"[0,18446744073709551615]", ) + def test_numpy_array_d1_i8(self): + self.assertEqual( + orjson.dumps( + numpy.array([-128, 127], numpy.int8), + option=orjson.OPT_SERIALIZE_NUMPY, + ), + b"[-128,127]", + ) + + def test_numpy_array_d1_u8(self): + self.assertEqual( + orjson.dumps( + numpy.array([0, 255], numpy.uint8), + option=orjson.OPT_SERIALIZE_NUMPY, + ), + b"[0,255]", + ) + def test_numpy_array_d1_i32(self): self.assertEqual( orjson.dumps( @@ -117,6 +135,24 @@ b"[[1.0,2.0,3.0],[4.0,5.0,6.0]]", ) + def test_numpy_array_d3_i8(self): + self.assertEqual( + orjson.dumps( + numpy.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], numpy.int8), + option=orjson.OPT_SERIALIZE_NUMPY, + ), + b"[[[1,2],[3,4]],[[5,6],[7,8]]]", + ) + + def test_numpy_array_d3_u8(self): + self.assertEqual( + orjson.dumps( + numpy.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], numpy.uint8), + option=orjson.OPT_SERIALIZE_NUMPY, + ), + b"[[[1,2],[3,4]],[[5,6],[7,8]]]", + ) + def test_numpy_array_d3_i32(self): self.assertEqual( orjson.dumps( @@ -159,7 +195,7 @@ ) def test_numpy_array_unsupported_dtype(self): - array = numpy.array([[1, 2], [3, 4]], numpy.int8) + array = numpy.array([[1, 2], [3, 4]], numpy.float16) with self.assertRaises(orjson.JSONEncodeError): orjson.dumps(array, option=orjson.OPT_SERIALIZE_NUMPY) self.assertEqual( @@ -314,8 +350,20 @@ array.tolist(), ) - def test_numpy_primitives(self): - # int32 + def test_numpy_scalar_int8(self): + self.assertEqual( + orjson.dumps(numpy.int8(0), option=orjson.OPT_SERIALIZE_NUMPY), b"0" + ) + self.assertEqual( + orjson.dumps(numpy.int8(127), option=orjson.OPT_SERIALIZE_NUMPY), + b"127", + ) + self.assertEqual( + orjson.dumps(numpy.int8(--128), option=orjson.OPT_SERIALIZE_NUMPY), + b"-128", + ) + + def test_numpy_scalar_int32(self): self.assertEqual( orjson.dumps(numpy.int32(1), option=orjson.OPT_SERIALIZE_NUMPY), b"1" ) @@ -327,7 +375,8 @@ orjson.dumps(numpy.int32(-2147483648), option=orjson.OPT_SERIALIZE_NUMPY), b"-2147483648", ) - # int 64 + + def test_numpy_scalar_int64(self): self.assertEqual( orjson.dumps( numpy.int64(-9223372036854775808), option=orjson.OPT_SERIALIZE_NUMPY @@ -340,7 +389,17 @@ ), b"9223372036854775807", ) - # uint32 + + def test_numpy_scalar_uint8(self): + self.assertEqual( + orjson.dumps(numpy.uint8(0), option=orjson.OPT_SERIALIZE_NUMPY), b"0" + ) + self.assertEqual( + orjson.dumps(numpy.uint8(255), option=orjson.OPT_SERIALIZE_NUMPY), + b"255", + ) + + def test_numpy_scalar_uint32(self): self.assertEqual( orjson.dumps(numpy.uint32(0), option=orjson.OPT_SERIALIZE_NUMPY), b"0" ) @@ -348,7 +407,8 @@ orjson.dumps(numpy.uint32(4294967295), option=orjson.OPT_SERIALIZE_NUMPY), b"4294967295", ) - # uint64 + + def test_numpy_scalar_uint64(self): self.assertEqual( orjson.dumps(numpy.uint64(0), option=orjson.OPT_SERIALIZE_NUMPY), b"0" ) @@ -359,8 +419,8 @@ b"18446744073709551615", ) - # float32 + def test_numpy_scalar_float32(self): self.assertEqual( orjson.dumps(numpy.float32(1.0), option=orjson.OPT_SERIALIZE_NUMPY), b"1.0" ) @@ -363,8 +423,8 @@ self.assertEqual( orjson.dumps(numpy.float32(1.0), option=orjson.OPT_SERIALIZE_NUMPY), b"1.0" ) - # float64 + def test_numpy_scalar_float64(self): self.assertEqual( orjson.dumps(numpy.float64(123.123), option=orjson.OPT_SERIALIZE_NUMPY), b"123.123",