diff --git a/Cargo.lock b/Cargo.lock index 25d0d125f5d28c737c3310adb54dcf4ea3b6639c_Q2FyZ28ubG9jaw==..3376f44bf758ba01710f18c40227c9e23f9756cd_Q2FyZ28ubG9jaw== 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -85,5 +85,5 @@ [[package]] name = "libc" -version = "0.2.72" +version = "0.2.73" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -89,5 +89,5 @@ source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9f8082297d534141b30c8d39e9b1773713ab50fdbe4ff30f750d063b3bfd701" +checksum = "bd7d4bd64732af4bf3a67f367c27df8520ad7e230c5817b8ff485864d80242b9" [[package]] name = "lock_api" diff --git a/ci/azure-pipelines.yml b/ci/azure-pipelines.yml index 25d0d125f5d28c737c3310adb54dcf4ea3b6639c_Y2kvYXp1cmUtcGlwZWxpbmVzLnltbA==..3376f44bf758ba01710f18c40227c9e23f9756cd_Y2kvYXp1cmUtcGlwZWxpbmVzLnltbA== 100644 --- a/ci/azure-pipelines.yml +++ b/ci/azure-pipelines.yml @@ -4,7 +4,7 @@ - '*' variables: - toolchain: nightly + toolchain: nightly-2020-07-01 jobs: diff --git a/ci/drone.yml b/ci/drone.yml index 25d0d125f5d28c737c3310adb54dcf4ea3b6639c_Y2kvZHJvbmUueW1s..3376f44bf758ba01710f18c40227c9e23f9756cd_Y2kvZHJvbmUueW1s 100644 --- a/ci/drone.yml +++ b/ci/drone.yml @@ -14,7 +14,7 @@ TWINE_PASSWORD: from_secret: twine_password commands: - - curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain nightly --profile minimal -y + - curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain nightly-2020-07-01 --profile minimal -y - python3.9 -m pip install --upgrade pip maturin - maturin build --no-sdist --release --strip --manylinux 2014 --interpreter python3.9 - python3.9 -m pip install target/wheels/orjson*.whl @@ -38,7 +38,7 @@ TWINE_PASSWORD: from_secret: twine_password commands: - - curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain nightly --profile minimal -y + - curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain nightly-2020-07-01 --profile minimal -y - python3.8 -m pip install --upgrade pip maturin - maturin build --no-sdist --release --strip --manylinux 2014 --interpreter python3.8 - python3.8 -m pip install target/wheels/orjson*.whl @@ -62,7 +62,7 @@ TWINE_PASSWORD: from_secret: twine_password commands: - - curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain nightly --profile minimal -y + - curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain nightly-2020-07-01 --profile minimal -y - python3.7 -m pip install --upgrade pip maturin - maturin build --no-sdist --release --strip --manylinux 2014 --interpreter python3.7 - python3.7 -m pip install target/wheels/orjson*.whl @@ -86,7 +86,7 @@ TWINE_PASSWORD: from_secret: twine_password commands: - - curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain nightly --profile minimal -y + - curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain nightly-2020-07-01 --profile minimal -y - python3.6 -m pip install --upgrade pip maturin - maturin build --no-sdist --release --strip --manylinux 2014 --interpreter python3.6 - python3.6 -m pip install target/wheels/orjson*.whl diff --git a/src/array.rs b/src/array.rs deleted file mode 100644 index 25d0d125f5d28c737c3310adb54dcf4ea3b6639c_c3JjL2FycmF5LnJz..0000000000000000000000000000000000000000 --- a/src/array.rs +++ /dev/null @@ -1,340 +0,0 @@ -// SPDX-License-Identifier: (Apache-2.0 OR MIT) - -use crate::typeref::ARRAY_STRUCT_STR; -use pyo3::ffi::*; -use serde::ser::{Serialize, SerializeSeq, Serializer}; -use std::os::raw::{c_char, c_int, c_void}; - -macro_rules! slice { - ($ptr:expr, $size:expr) => { - unsafe { std::slice::from_raw_parts($ptr, $size) } - }; -} - -#[repr(C)] -pub struct PyCapsule { - pub ob_refcnt: Py_ssize_t, - pub ob_type: *mut PyTypeObject, - pub pointer: *mut c_void, - pub name: *const c_char, - pub context: *mut c_void, - pub destructor: *mut c_void, // should be typedef void (*PyCapsule_Destructor)(PyObject *); -} - -// https://docs.scipy.org/doc/numpy/reference/arrays.interface.html#c.__array_struct__ - -#[repr(C)] -pub struct PyArrayInterface { - pub two: c_int, - pub nd: c_int, - pub typekind: c_char, - pub itemsize: c_int, - pub flags: c_int, - pub shape: *mut Py_intptr_t, - pub strides: *mut Py_intptr_t, - pub data: *mut c_void, - pub descr: *mut PyObject, -} - -#[derive(Copy, Clone, PartialEq)] -pub enum ItemType { - BOOL, - F32, - F64, - I32, - I64, - U32, - U64, -} - -pub enum PyArrayError { - Malformed, - NotContiguous, - UnsupportedDataType, -} - -// >>> arr = numpy.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], numpy.int32) -// >>> arr.ndim -// 3 -// >>> arr.shape -// (2, 2, 2) -// >>> arr.strides -// (16, 8, 4) -pub struct PyArray { - array: *mut PyArrayInterface, - position: Vec<isize>, - children: Vec<PyArray>, - depth: usize, - capsule: *mut PyCapsule, -} - -impl<'a> PyArray { - #[cold] - pub fn new(ptr: *mut PyObject) -> Result<Self, PyArrayError> { - let capsule = ffi!(PyObject_GetAttr(ptr, ARRAY_STRUCT_STR)); - let array = unsafe { (*(capsule as *mut PyCapsule)).pointer as *mut PyArrayInterface }; - if unsafe { (*array).two != 2 } { - ffi!(Py_DECREF(capsule)); - Err(PyArrayError::Malformed) - } else if unsafe { (*array).flags } & 0x1 != 0x1 { - ffi!(Py_DECREF(capsule)); - Err(PyArrayError::NotContiguous) - } else { - let num_dimensions = unsafe { (*array).nd as usize }; - if num_dimensions == 0 { - return Err(PyArrayError::UnsupportedDataType); - } - let mut pyarray = PyArray { - array: array, - position: vec![0; num_dimensions], - children: Vec::with_capacity(num_dimensions), - depth: 0, - capsule: capsule as *mut PyCapsule, - }; - if pyarray.kind().is_none() { - Err(PyArrayError::UnsupportedDataType) - } else { - if pyarray.dimensions() > 1 { - pyarray.build(); - } - Ok(pyarray) - } - } - } - - fn from_parent(&self, position: Vec<isize>, num_children: usize) -> Self { - let mut arr = PyArray { - array: self.array, - position: position, - children: Vec::with_capacity(num_children), - depth: self.depth + 1, - capsule: self.capsule, - }; - arr.build(); - arr - } - - fn kind(&self) -> Option<ItemType> { - match unsafe { ((*self.array).typekind, (*self.array).itemsize) } { - (098, 1) => Some(ItemType::BOOL), - (102, 4) => Some(ItemType::F32), - (102, 8) => Some(ItemType::F64), - (105, 4) => Some(ItemType::I32), - (105, 8) => Some(ItemType::I64), - (117, 4) => Some(ItemType::U32), - (117, 8) => Some(ItemType::U64), - _ => None, - } - } - - fn build(&mut self) { - if self.depth < self.dimensions() - 1 { - for i in 0..=self.shape()[self.depth] - 1 { - let mut position: Vec<isize> = self.position.iter().copied().collect(); - position[self.depth] = i; - let num_children: usize; - if self.depth < self.dimensions() - 2 { - num_children = self.shape()[self.depth + 1] as usize; - } else { - num_children = 0; - } - self.children.push(self.from_parent(position, num_children)) - } - } - } - - fn data(&self) -> *mut c_void { - let offset = self - .strides() - .iter() - .zip(self.position.iter().copied()) - .take(self.depth) - .map(|(a, b)| a * b) - .sum::<isize>(); - unsafe { (*self.array).data.offset(offset) } - } - - fn num_items(&self) -> usize { - self.shape()[self.shape().len() - 1] as usize - } - - fn dimensions(&self) -> usize { - unsafe { (*self.array).nd as usize } - } - - fn shape(&self) -> &[isize] { - slice!((*self.array).shape as *const isize, self.dimensions()) - } - - fn strides(&self) -> &[isize] { - slice!((*self.array).strides as *const isize, self.dimensions()) - } -} - -impl Drop for PyArray { - fn drop(&mut self) { - if self.depth == 0 { - ffi!(Py_XDECREF(self.capsule as *mut pyo3::ffi::PyObject)) - } - } -} - -impl<'p> Serialize for PyArray { - fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - where - S: Serializer, - { - let mut seq = serializer.serialize_seq(None).unwrap(); - if !self.children.is_empty() { - for child in &self.children { - seq.serialize_element(child).unwrap(); - } - } else { - let data_ptr = self.data(); - let num_items = self.num_items(); - match self.kind().unwrap() { - ItemType::F64 => { - let slice: &[f64] = slice!(data_ptr as *const f64, num_items); - for &each in slice.iter() { - seq.serialize_element(&DataTypeF64 { obj: each }).unwrap(); - } - } - ItemType::F32 => { - let slice: &[f32] = slice!(data_ptr as *const f32, num_items); - for &each in slice.iter() { - seq.serialize_element(&DataTypeF32 { obj: each }).unwrap(); - } - } - ItemType::I64 => { - let slice: &[i64] = slice!(data_ptr as *const i64, num_items); - for &each in slice.iter() { - seq.serialize_element(&DataTypeI64 { obj: each }).unwrap(); - } - } - ItemType::I32 => { - let slice: &[i32] = slice!(data_ptr as *const i32, num_items); - for &each in slice.iter() { - seq.serialize_element(&DataTypeI32 { obj: each }).unwrap(); - } - } - ItemType::U64 => { - let slice: &[u64] = slice!(data_ptr as *const u64, num_items); - for &each in slice.iter() { - seq.serialize_element(&DataTypeU64 { obj: each }).unwrap(); - } - } - ItemType::U32 => { - let slice: &[u32] = slice!(data_ptr as *const u32, num_items); - for &each in slice.iter() { - seq.serialize_element(&DataTypeU32 { obj: each }).unwrap(); - } - } - ItemType::BOOL => { - let slice: &[u8] = slice!(data_ptr as *const u8, num_items); - for &each in slice.iter() { - seq.serialize_element(&DataTypeBOOL { obj: each }).unwrap(); - } - } - } - } - seq.end() - } -} - -#[repr(transparent)] -struct DataTypeF32 { - pub obj: f32, -} - -impl<'p> Serialize for DataTypeF32 { - fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - where - S: Serializer, - { - serializer.serialize_f32(self.obj) - } -} - -#[repr(transparent)] -struct DataTypeF64 { - pub obj: f64, -} - -impl<'p> Serialize for DataTypeF64 { - fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - where - S: Serializer, - { - serializer.serialize_f64(self.obj) - } -} - -#[repr(transparent)] -struct DataTypeI32 { - pub obj: i32, -} - -impl<'p> Serialize for DataTypeI32 { - fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - where - S: Serializer, - { - serializer.serialize_i32(self.obj) - } -} - -#[repr(transparent)] -struct DataTypeI64 { - pub obj: i64, -} - -impl<'p> Serialize for DataTypeI64 { - fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - where - S: Serializer, - { - serializer.serialize_i64(self.obj) - } -} - -#[repr(transparent)] -struct DataTypeU32 { - pub obj: u32, -} - -impl<'p> Serialize for DataTypeU32 { - fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - where - S: Serializer, - { - serializer.serialize_u32(self.obj) - } -} - -#[repr(transparent)] -struct DataTypeU64 { - pub obj: u64, -} - -impl<'p> Serialize for DataTypeU64 { - fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - where - S: Serializer, - { - serializer.serialize_u64(self.obj) - } -} - -#[repr(transparent)] -struct DataTypeBOOL { - pub obj: u8, -} - -impl<'p> Serialize for DataTypeBOOL { - fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - where - S: Serializer, - { - serializer.serialize_bool(self.obj == 1) - } -} diff --git a/src/bytes.rs b/src/bytes.rs deleted file mode 100644 index 25d0d125f5d28c737c3310adb54dcf4ea3b6639c_c3JjL2J5dGVzLnJz..0000000000000000000000000000000000000000 --- a/src/bytes.rs +++ /dev/null @@ -1,25 +0,0 @@ -// SPDX-License-Identifier: (Apache-2.0 OR MIT) - -use pyo3::ffi::*; -use std::os::raw::c_char; - -#[repr(C)] -pub struct PyBytesObject { - pub ob_refcnt: Py_ssize_t, - pub ob_type: *mut PyTypeObject, - pub ob_size: Py_ssize_t, - pub ob_shash: Py_hash_t, - pub ob_sval: [c_char; 1], -} - -#[allow(non_snake_case)] -#[inline(always)] -pub unsafe fn PyBytes_AS_STRING(op: *mut PyObject) -> *const c_char { - &(*op.cast::<PyBytesObject>()).ob_sval as *const c_char -} - -#[allow(non_snake_case)] -#[inline(always)] -pub unsafe fn PyBytes_GET_SIZE(op: *mut PyObject) -> Py_ssize_t { - (*op.cast::<PyBytesObject>()).ob_size -} diff --git a/src/dataclass.rs b/src/dataclass.rs deleted file mode 100644 index 25d0d125f5d28c737c3310adb54dcf4ea3b6639c_c3JjL2RhdGFjbGFzcy5ycw==..0000000000000000000000000000000000000000 --- a/src/dataclass.rs +++ /dev/null @@ -1,95 +0,0 @@ -// SPDX-License-Identifier: (Apache-2.0 OR MIT) - -use crate::dict::*; -use crate::encode::*; -use crate::exc::*; -use crate::opt::*; -use crate::typeref::*; -use crate::unicode::*; - -use serde::ser::{Serialize, SerializeMap, Serializer}; - -use std::ptr::NonNull; - -pub struct DataclassSerializer { - ptr: *mut pyo3::ffi::PyObject, - opts: Opt, - default_calls: u8, - recursion: u8, - default: Option<NonNull<pyo3::ffi::PyObject>>, -} - -impl DataclassSerializer { - pub fn new( - ptr: *mut pyo3::ffi::PyObject, - opts: Opt, - default_calls: u8, - recursion: u8, - default: Option<NonNull<pyo3::ffi::PyObject>>, - ) -> Self { - DataclassSerializer { - ptr: ptr, - opts: opts, - default_calls: default_calls, - recursion: recursion, - default: default, - } - } -} - -impl<'p> Serialize for DataclassSerializer { - #[inline(never)] - fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - where - S: Serializer, - { - let fields = ffi!(PyObject_GetAttr(self.ptr, DATACLASS_FIELDS_STR)); - ffi!(Py_DECREF(fields)); - let len = unsafe { PyDict_GET_SIZE(fields) as usize }; - if unlikely!(len == 0) { - return serializer.serialize_map(Some(0)).unwrap().end(); - } - let mut map = serializer.serialize_map(None).unwrap(); - let mut pos = 0isize; - let mut str_size: pyo3::ffi::Py_ssize_t = 0; - let mut attr: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); - let mut field: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); - for _ in 0..=len - 1 { - unsafe { - pyo3::ffi::_PyDict_Next( - fields, - &mut pos, - &mut attr, - &mut field, - std::ptr::null_mut(), - ) - }; - if unsafe { ffi!(PyObject_GetAttr(field, FIELD_TYPE_STR)) != FIELD_TYPE.as_ptr() } { - continue; - } - { - let data = read_utf8_from_str(attr, &mut str_size); - if unlikely!(data.is_null()) { - err!(INVALID_STR); - } - let key_as_str = str_from_slice!(data, str_size); - if key_as_str.as_bytes()[0] == b'_' { - continue; - } - map.serialize_key(key_as_str).unwrap(); - } - - let value = ffi!(PyObject_GetAttr(self.ptr, attr)); - ffi!(Py_DECREF(value)); - - map.serialize_value(&SerializePyObject::new( - value, - self.opts, - self.default_calls, - self.recursion + 1, - self.default, - ))? - } - map.end() - } -} diff --git a/src/datetime.rs b/src/datetime.rs deleted file mode 100644 index 25d0d125f5d28c737c3310adb54dcf4ea3b6639c_c3JjL2RhdGV0aW1lLnJz..0000000000000000000000000000000000000000 --- a/src/datetime.rs +++ /dev/null @@ -1,279 +0,0 @@ -// SPDX-License-Identifier: (Apache-2.0 OR MIT) - -use crate::exc::*; -use crate::opt::*; -use crate::typeref::*; -use serde::ser::{Serialize, Serializer}; - -pub type DateTimeBuffer = smallvec::SmallVec<[u8; 32]>; - -macro_rules! write_double_digit { - ($buf:ident, $value:ident) => { - if $value < 10 { - $buf.push(b'0'); - } - $buf.extend_from_slice(itoa::Buffer::new().format($value).as_bytes()); - }; -} - -macro_rules! write_microsecond { - ($buf:ident, $microsecond:ident) => { - if $microsecond != 0 { - let mut buf = itoa::Buffer::new(); - let formatted = buf.format($microsecond); - $buf.extend_from_slice( - &[b'.', b'0', b'0', b'0', b'0', b'0', b'0'][..(7 - formatted.len())], - ); - $buf.extend_from_slice(formatted.as_bytes()); - } - }; -} - -#[repr(transparent)] -pub struct Date { - ptr: *mut pyo3::ffi::PyObject, -} - -impl Date { - pub fn new(ptr: *mut pyo3::ffi::PyObject) -> Self { - Date { ptr: ptr } - } - pub fn write_buf(&self, buf: &mut DateTimeBuffer) { - { - let year = ffi!(PyDateTime_GET_YEAR(self.ptr)) as i32; - buf.extend_from_slice(itoa::Buffer::new().format(year).as_bytes()); - } - buf.push(b'-'); - { - let month = ffi!(PyDateTime_GET_MONTH(self.ptr)) as u32; - write_double_digit!(buf, month); - } - buf.push(b'-'); - { - let day = ffi!(PyDateTime_GET_DAY(self.ptr)) as u32; - write_double_digit!(buf, day); - } - } -} -impl<'p> Serialize for Date { - fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - where - S: Serializer, - { - let mut buf: DateTimeBuffer = smallvec::SmallVec::with_capacity(32); - self.write_buf(&mut buf); - serializer.serialize_str(str_from_slice!(buf.as_ptr(), buf.len())) - } -} - -pub enum TimeError { - HasTimezone, -} - -pub struct Time { - ptr: *mut pyo3::ffi::PyObject, - opts: Opt, -} - -impl Time { - pub fn new(ptr: *mut pyo3::ffi::PyObject, opts: Opt) -> Result<Self, TimeError> { - if unsafe { (*(ptr as *mut pyo3::ffi::PyDateTime_Time)).hastzinfo == 1 } { - return Err(TimeError::HasTimezone); - } - Ok(Time { - ptr: ptr, - opts: opts, - }) - } - pub fn write_buf(&self, buf: &mut DateTimeBuffer) { - { - let hour = ffi!(PyDateTime_TIME_GET_HOUR(self.ptr)) as u8; - write_double_digit!(buf, hour); - } - buf.push(b':'); - { - let minute = ffi!(PyDateTime_TIME_GET_MINUTE(self.ptr)) as u8; - write_double_digit!(buf, minute); - } - buf.push(b':'); - { - let second = ffi!(PyDateTime_TIME_GET_SECOND(self.ptr)) as u8; - write_double_digit!(buf, second); - } - if self.opts & OMIT_MICROSECONDS == 0 { - let microsecond = ffi!(PyDateTime_TIME_GET_MICROSECOND(self.ptr)) as u32; - write_microsecond!(buf, microsecond); - } - } -} - -impl<'p> Serialize for Time { - fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - where - S: Serializer, - { - let mut buf: DateTimeBuffer = smallvec::SmallVec::with_capacity(32); - self.write_buf(&mut buf); - serializer.serialize_str(str_from_slice!(buf.as_ptr(), buf.len())) - } -} - -pub enum DateTimeError { - LibraryUnsupported, -} - -pub struct DateTime { - ptr: *mut pyo3::ffi::PyObject, - opts: Opt, -} - -impl DateTime { - pub fn new(ptr: *mut pyo3::ffi::PyObject, opts: Opt) -> Self { - DateTime { - ptr: ptr, - opts: opts, - } - } - pub fn write_buf(&self, buf: &mut DateTimeBuffer) -> Result<(), DateTimeError> { - let has_tz = unsafe { (*(self.ptr as *mut pyo3::ffi::PyDateTime_DateTime)).hastzinfo == 1 }; - let offset_day: i32; - let mut offset_second: i32; - if !has_tz { - offset_second = 0; - offset_day = 0; - } else { - let tzinfo = ffi!(PyDateTime_DATE_GET_TZINFO(self.ptr)); - if ffi!(PyObject_HasAttr(tzinfo, CONVERT_METHOD_STR)) == 1 { - // pendulum - let offset = ffi!(PyObject_CallMethodObjArgs( - self.ptr, - UTCOFFSET_METHOD_STR, - std::ptr::null_mut() as *mut pyo3::ffi::PyObject - )); - offset_second = ffi!(PyDateTime_DELTA_GET_SECONDS(offset)) as i32; - offset_day = ffi!(PyDateTime_DELTA_GET_DAYS(offset)); - } else if ffi!(PyObject_HasAttr(tzinfo, NORMALIZE_METHOD_STR)) == 1 { - // pytz - let method_ptr = ffi!(PyObject_CallMethodObjArgs( - tzinfo, - NORMALIZE_METHOD_STR, - self.ptr, - std::ptr::null_mut() as *mut pyo3::ffi::PyObject - )); - let offset = ffi!(PyObject_CallMethodObjArgs( - method_ptr, - UTCOFFSET_METHOD_STR, - std::ptr::null_mut() as *mut pyo3::ffi::PyObject - )); - offset_second = ffi!(PyDateTime_DELTA_GET_SECONDS(offset)) as i32; - offset_day = ffi!(PyDateTime_DELTA_GET_DAYS(offset)); - } else if ffi!(PyObject_HasAttr(tzinfo, DST_STR)) == 1 { - // dateutil/arrow, datetime.timezone.utc - let offset = ffi!(PyObject_CallMethodObjArgs( - tzinfo, - UTCOFFSET_METHOD_STR, - self.ptr, - std::ptr::null_mut() as *mut pyo3::ffi::PyObject - )); - offset_second = ffi!(PyDateTime_DELTA_GET_SECONDS(offset)) as i32; - offset_day = ffi!(PyDateTime_DELTA_GET_DAYS(offset)); - } else { - return Err(DateTimeError::LibraryUnsupported); - } - }; - - buf.extend_from_slice( - itoa::Buffer::new() - .format(ffi!(PyDateTime_GET_YEAR(self.ptr)) as i32) - .as_bytes(), - ); - buf.push(b'-'); - { - let month = ffi!(PyDateTime_GET_MONTH(self.ptr)) as u8; - write_double_digit!(buf, month); - } - buf.push(b'-'); - { - let day = ffi!(PyDateTime_GET_DAY(self.ptr)) as u8; - write_double_digit!(buf, day); - } - buf.push(b'T'); - { - let hour = ffi!(PyDateTime_DATE_GET_HOUR(self.ptr)) as u8; - write_double_digit!(buf, hour); - } - buf.push(b':'); - { - let minute = ffi!(PyDateTime_DATE_GET_MINUTE(self.ptr)) as u8; - write_double_digit!(buf, minute); - } - buf.push(b':'); - { - let second = ffi!(PyDateTime_DATE_GET_SECOND(self.ptr)) as u8; - write_double_digit!(buf, second); - } - if self.opts & OMIT_MICROSECONDS == 0 { - let microsecond = ffi!(PyDateTime_DATE_GET_MICROSECOND(self.ptr)) as u32; - write_microsecond!(buf, microsecond); - } - if has_tz || self.opts & NAIVE_UTC != 0 { - if offset_second == 0 { - if self.opts & UTC_Z != 0 { - buf.push(b'Z'); - } else { - buf.extend_from_slice(&[b'+', b'0', b'0', b':', b'0', b'0']); - } - } else { - if offset_day == -1 { - // datetime.timedelta(days=-1, seconds=68400) -> -05:00 - buf.push(b'-'); - offset_second = 86400 - offset_second; - } else { - // datetime.timedelta(seconds=37800) -> +10:30 - buf.push(b'+'); - } - { - let offset_minute = offset_second / 60; - let offset_hour = offset_minute / 60; - write_double_digit!(buf, offset_hour); - buf.push(b':'); - - let mut offset_minute_print = offset_minute % 60; - - { - // https://tools.ietf.org/html/rfc3339#section-5.8 - // "exactly 19 minutes and 32.13 seconds ahead of UTC" - // "closest representable UTC offset" - // "+20:00" - let offset_excess_second = - offset_second - (offset_minute_print * 60 + offset_hour * 3600); - if offset_excess_second >= 30 { - offset_minute_print += 1; - } - } - - if offset_minute_print < 10 { - buf.push(b'0'); - } - buf.extend_from_slice( - itoa::Buffer::new().format(offset_minute_print).as_bytes(), - ); - } - } - } - Ok(()) - } -} - -impl<'p> Serialize for DateTime { - fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - where - S: Serializer, - { - let mut buf: DateTimeBuffer = smallvec::SmallVec::with_capacity(32); - if self.write_buf(&mut buf).is_err() { - err!(DATETIME_LIBRARY_UNSUPPORTED) - } - serializer.serialize_str(str_from_slice!(buf.as_ptr(), buf.len())) - } -} diff --git a/src/decode.rs b/src/decode.rs deleted file mode 100644 index 25d0d125f5d28c737c3310adb54dcf4ea3b6639c_c3JjL2RlY29kZS5ycw==..0000000000000000000000000000000000000000 --- a/src/decode.rs +++ /dev/null @@ -1,244 +0,0 @@ -// SPDX-License-Identifier: (Apache-2.0 OR MIT) - -use crate::bytes::*; -use crate::exc::*; -use crate::typeref::*; -use crate::unicode::*; -use associative_cache::replacement::RoundRobinReplacement; -use associative_cache::*; -use once_cell::unsync::OnceCell; -use serde::de::{self, DeserializeSeed, Deserializer, MapAccess, SeqAccess, Visitor}; -use smallvec::SmallVec; -use std::borrow::Cow; -use std::fmt; -use std::os::raw::c_void; -use std::ptr::NonNull; -use wyhash::wyhash; - -#[derive(Clone)] -pub struct CachedKey { - ptr: *mut c_void, - hash: pyo3::ffi::Py_hash_t, -} - -unsafe impl Send for CachedKey {} -unsafe impl Sync for CachedKey {} - -impl CachedKey { - fn new(ptr: *mut pyo3::ffi::PyObject, hash: pyo3::ffi::Py_hash_t) -> CachedKey { - CachedKey { - ptr: ptr as *mut c_void, - hash: hash, - } - } - - fn get(&mut self) -> (*mut pyo3::ffi::PyObject, pyo3::ffi::Py_hash_t) { - let ptr = self.ptr as *mut pyo3::ffi::PyObject; - ffi!(Py_INCREF(ptr)); - (ptr, self.hash) - } -} - -impl Drop for CachedKey { - fn drop(&mut self) { - ffi!(Py_DECREF(self.ptr as *mut pyo3::ffi::PyObject)); - } -} - -pub type KeyMap = - AssociativeCache<u64, CachedKey, Capacity512, HashDirectMapped, RoundRobinReplacement>; - -pub static mut KEY_MAP: OnceCell<KeyMap> = OnceCell::new(); - -pub fn deserialize( - ptr: *mut pyo3::ffi::PyObject, -) -> std::result::Result<NonNull<pyo3::ffi::PyObject>, String> { - let obj_type_ptr = ob_type!(ptr); - let contents: &[u8]; - if is_type!(obj_type_ptr, STR_TYPE) { - let mut str_size: pyo3::ffi::Py_ssize_t = 0; - let uni = read_utf8_from_str(ptr, &mut str_size); - if unlikely!(uni.is_null()) { - return Err(INVALID_STR.to_string()); - } - contents = unsafe { std::slice::from_raw_parts(uni, str_size as usize) }; - } else { - let buffer: *const u8; - let length: usize; - if is_type!(obj_type_ptr, BYTES_TYPE) { - buffer = unsafe { PyBytes_AS_STRING(ptr) as *const u8 }; - length = unsafe { PyBytes_GET_SIZE(ptr) as usize }; - } else if is_type!(obj_type_ptr, BYTEARRAY_TYPE) { - buffer = ffi!(PyByteArray_AsString(ptr)) as *const u8; - length = ffi!(PyByteArray_Size(ptr)) as usize; - } else { - return Err("Input must be bytes, bytearray, or str".to_string()); - } - contents = unsafe { std::slice::from_raw_parts(buffer, length) }; - if encoding_rs::Encoding::utf8_valid_up_to(contents) != length { - return Err(INVALID_STR.to_string()); - } - } - - let data = unsafe { std::str::from_utf8_unchecked(contents) }; - let mut deserializer = serde_json::Deserializer::from_str(data); - - let seed = JsonValue {}; - match seed.deserialize(&mut deserializer) { - Ok(obj) => { - deserializer.end().map_err(|e| e.to_string())?; - Ok(obj) - } - Err(e) => Err(e.to_string()), - } -} - -#[derive(Clone, Copy)] -struct JsonValue; - -impl<'de> DeserializeSeed<'de> for JsonValue { - type Value = NonNull<pyo3::ffi::PyObject>; - - fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error> - where - D: Deserializer<'de>, - { - deserializer.deserialize_any(self) - } -} - -impl<'de> Visitor<'de> for JsonValue { - type Value = NonNull<pyo3::ffi::PyObject>; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("JSON") - } - - fn visit_unit<E>(self) -> Result<Self::Value, E> { - ffi!(Py_INCREF(NONE)); - Ok(nonnull!(NONE)) - } - - fn visit_bool<E>(self, value: bool) -> Result<Self::Value, E> - where - E: de::Error, - { - if value { - ffi!(Py_INCREF(TRUE)); - Ok(nonnull!(TRUE)) - } else { - ffi!(Py_INCREF(FALSE)); - Ok(nonnull!(FALSE)) - } - } - - fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E> - where - E: de::Error, - { - Ok(nonnull!(ffi!(PyLong_FromLongLong(value)))) - } - - fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E> - where - E: de::Error, - { - Ok(nonnull!(ffi!(PyLong_FromUnsignedLongLong(value)))) - } - - fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E> - where - E: de::Error, - { - Ok(nonnull!(ffi!(PyFloat_FromDouble(value)))) - } - - fn visit_string<E>(self, value: String) -> Result<Self::Value, E> - where - E: de::Error, - { - Ok(nonnull!(str_to_pyobject!(value.as_str()))) - } - - fn visit_borrowed_str<E>(self, value: &str) -> Result<Self::Value, E> - where - E: de::Error, - { - Ok(nonnull!(str_to_pyobject!(value))) - } - - fn visit_str<E>(self, value: &str) -> Result<Self::Value, E> - where - E: de::Error, - { - Ok(nonnull!(str_to_pyobject!(value))) - } - - fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error> - where - A: SeqAccess<'de>, - { - match seq.next_element_seed(self) { - Ok(None) => Ok(nonnull!(ffi!(PyList_New(0)))), - Ok(Some(elem)) => { - let mut elements: SmallVec<[*mut pyo3::ffi::PyObject; 8]> = - SmallVec::with_capacity(8); - elements.push(elem.as_ptr()); - while let Some(elem) = seq.next_element_seed(self)? { - elements.push(elem.as_ptr()); - } - let ptr = ffi!(PyList_New(elements.len() as pyo3::ffi::Py_ssize_t)); - for (i, &obj) in elements.iter().enumerate() { - ffi!(PyList_SET_ITEM(ptr, i as pyo3::ffi::Py_ssize_t, obj)); - } - Ok(nonnull!(ptr)) - } - Err(err) => std::result::Result::Err(err), - } - } - - fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error> - where - A: MapAccess<'de>, - { - let dict_ptr = ffi!(PyDict_New()); - while let Some(key) = map.next_key::<Cow<str>>()? { - let pykey: *mut pyo3::ffi::PyObject; - let pyhash: pyo3::ffi::Py_hash_t; - if likely!(key.len() <= 64) { - let hash = unsafe { wyhash(key.as_bytes(), HASH_SEED) }; - { - let map = unsafe { - KEY_MAP - .get_mut() - .unwrap_or_else(|| unsafe { std::hint::unreachable_unchecked() }) - }; - let entry = map.entry(&hash).or_insert_with( - || hash, - || { - let pyob = str_to_pyobject!(&key); - CachedKey::new(pyob, hash_str(pyob)) - }, - ); - let tmp = entry.get(); - pykey = tmp.0; - pyhash = tmp.1; - } - } else { - pykey = str_to_pyobject!(&key); - pyhash = hash_str(pykey); - } - let value = map.next_value_seed(self)?; - let _ = ffi!(_PyDict_SetItem_KnownHash( - dict_ptr, - pykey, - value.as_ptr(), - pyhash - )); - // counter Py_INCREF in insertdict - ffi!(Py_DECREF(pykey)); - ffi!(Py_DECREF(value.as_ptr())); - } - Ok(nonnull!(dict_ptr)) - } -} diff --git a/src/default.rs b/src/default.rs deleted file mode 100644 index 25d0d125f5d28c737c3310adb54dcf4ea3b6639c_c3JjL2RlZmF1bHQucnM=..0000000000000000000000000000000000000000 --- a/src/default.rs +++ /dev/null @@ -1,88 +0,0 @@ -// SPDX-License-Identifier: (Apache-2.0 OR MIT) - -use crate::encode::*; -use crate::opt::*; - -use serde::ser::{Serialize, Serializer}; -use std::ffi::CStr; - -use std::ptr::NonNull; - -macro_rules! obj_name { - ($obj:expr) => { - unsafe { CStr::from_ptr((*$obj).tp_name).to_string_lossy() } - }; -} - -pub struct DefaultSerializer { - ptr: *mut pyo3::ffi::PyObject, - opts: Opt, - default_calls: u8, - recursion: u8, - default: Option<NonNull<pyo3::ffi::PyObject>>, -} - -impl DefaultSerializer { - pub fn new( - ptr: *mut pyo3::ffi::PyObject, - opts: Opt, - default_calls: u8, - recursion: u8, - default: Option<NonNull<pyo3::ffi::PyObject>>, - ) -> Self { - DefaultSerializer { - ptr: ptr, - opts: opts, - default_calls: default_calls, - recursion: recursion, - default: default, - } - } -} - -impl<'p> Serialize for DefaultSerializer { - #[inline(never)] - fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - where - S: Serializer, - { - match self.default { - Some(callable) => { - if unlikely!(self.default_calls == RECURSION_LIMIT) { - err!("default serializer exceeds recursion limit") - } - let default_obj = ffi!(PyObject_CallFunctionObjArgs( - callable.as_ptr(), - self.ptr, - std::ptr::null_mut() as *mut pyo3::ffi::PyObject - )); - if default_obj.is_null() { - err!(format_args!( - "Type is not JSON serializable: {}", - obj_name!(ob_type!(self.ptr)) - )) - } else if !ffi!(PyErr_Occurred()).is_null() { - err!(format_args!( - "Type raised exception in default function: {}", - obj_name!(ob_type!(self.ptr)) - )) - } else { - let res = SerializePyObject::new( - default_obj, - self.opts, - self.default_calls + 1, - self.recursion, - self.default, - ) - .serialize(serializer); - ffi!(Py_DECREF(default_obj)); - res - } - } - None => err!(format_args!( - "Type is not JSON serializable: {}", - obj_name!(ob_type!(self.ptr)) - )), - } - } -} diff --git a/src/deserialize/cache.rs b/src/deserialize/cache.rs new file mode 100644 index 0000000000000000000000000000000000000000..3376f44bf758ba01710f18c40227c9e23f9756cd_c3JjL2Rlc2VyaWFsaXplL2NhY2hlLnJz --- /dev/null +++ b/src/deserialize/cache.rs @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + +use associative_cache::replacement::RoundRobinReplacement; +use associative_cache::*; +use once_cell::unsync::OnceCell; +use std::os::raw::c_void; + +#[derive(Clone)] +pub struct CachedKey { + ptr: *mut c_void, + hash: pyo3::ffi::Py_hash_t, +} + +unsafe impl Send for CachedKey {} +unsafe impl Sync for CachedKey {} + +impl CachedKey { + pub fn new(ptr: *mut pyo3::ffi::PyObject, hash: pyo3::ffi::Py_hash_t) -> CachedKey { + CachedKey { + ptr: ptr as *mut c_void, + hash: hash, + } + } + + pub fn get(&mut self) -> (*mut pyo3::ffi::PyObject, pyo3::ffi::Py_hash_t) { + let ptr = self.ptr as *mut pyo3::ffi::PyObject; + ffi!(Py_INCREF(ptr)); + (ptr, self.hash) + } +} + +impl Drop for CachedKey { + fn drop(&mut self) { + ffi!(Py_DECREF(self.ptr as *mut pyo3::ffi::PyObject)); + } +} + +pub type KeyMap = + AssociativeCache<u64, CachedKey, Capacity512, HashDirectMapped, RoundRobinReplacement>; + +pub static mut KEY_MAP: OnceCell<KeyMap> = OnceCell::new(); diff --git a/src/deserialize/decode.rs b/src/deserialize/decode.rs new file mode 100644 index 0000000000000000000000000000000000000000..3376f44bf758ba01710f18c40227c9e23f9756cd_c3JjL2Rlc2VyaWFsaXplL2RlY29kZS5ycw== --- /dev/null +++ b/src/deserialize/decode.rs @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + +use crate::deserialize::cache::*; +use crate::exc::*; +use crate::ffi::*; +use crate::typeref::*; +use crate::unicode::*; +use serde::de::{self, DeserializeSeed, Deserializer, MapAccess, SeqAccess, Visitor}; +use smallvec::SmallVec; +use std::borrow::Cow; +use std::fmt; +use std::ptr::NonNull; +use wyhash::wyhash; + +pub fn deserialize( + ptr: *mut pyo3::ffi::PyObject, +) -> std::result::Result<NonNull<pyo3::ffi::PyObject>, String> { + let obj_type_ptr = ob_type!(ptr); + let contents: &[u8]; + if is_type!(obj_type_ptr, STR_TYPE) { + let mut str_size: pyo3::ffi::Py_ssize_t = 0; + let uni = read_utf8_from_str(ptr, &mut str_size); + if unlikely!(uni.is_null()) { + return Err(INVALID_STR.to_string()); + } + contents = unsafe { std::slice::from_raw_parts(uni, str_size as usize) }; + } else { + let buffer: *const u8; + let length: usize; + if is_type!(obj_type_ptr, BYTES_TYPE) { + buffer = unsafe { PyBytes_AS_STRING(ptr) as *const u8 }; + length = unsafe { PyBytes_GET_SIZE(ptr) as usize }; + } else if is_type!(obj_type_ptr, BYTEARRAY_TYPE) { + buffer = ffi!(PyByteArray_AsString(ptr)) as *const u8; + length = ffi!(PyByteArray_Size(ptr)) as usize; + } else { + return Err("Input must be bytes, bytearray, or str".to_string()); + } + contents = unsafe { std::slice::from_raw_parts(buffer, length) }; + if encoding_rs::Encoding::utf8_valid_up_to(contents) != length { + return Err(INVALID_STR.to_string()); + } + } + + let data = unsafe { std::str::from_utf8_unchecked(contents) }; + let mut deserializer = serde_json::Deserializer::from_str(data); + + let seed = JsonValue {}; + match seed.deserialize(&mut deserializer) { + Ok(obj) => { + deserializer.end().map_err(|e| e.to_string())?; + Ok(obj) + } + Err(e) => Err(e.to_string()), + } +} + +#[derive(Clone, Copy)] +struct JsonValue; + +impl<'de> DeserializeSeed<'de> for JsonValue { + type Value = NonNull<pyo3::ffi::PyObject>; + + fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error> + where + D: Deserializer<'de>, + { + deserializer.deserialize_any(self) + } +} + +impl<'de> Visitor<'de> for JsonValue { + type Value = NonNull<pyo3::ffi::PyObject>; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("JSON") + } + + fn visit_unit<E>(self) -> Result<Self::Value, E> { + ffi!(Py_INCREF(NONE)); + Ok(nonnull!(NONE)) + } + + fn visit_bool<E>(self, value: bool) -> Result<Self::Value, E> + where + E: de::Error, + { + if value { + ffi!(Py_INCREF(TRUE)); + Ok(nonnull!(TRUE)) + } else { + ffi!(Py_INCREF(FALSE)); + Ok(nonnull!(FALSE)) + } + } + + fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E> + where + E: de::Error, + { + Ok(nonnull!(ffi!(PyLong_FromLongLong(value)))) + } + + fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E> + where + E: de::Error, + { + Ok(nonnull!(ffi!(PyLong_FromUnsignedLongLong(value)))) + } + + fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E> + where + E: de::Error, + { + Ok(nonnull!(ffi!(PyFloat_FromDouble(value)))) + } + + fn visit_string<E>(self, value: String) -> Result<Self::Value, E> + where + E: de::Error, + { + Ok(nonnull!(str_to_pyobject!(value.as_str()))) + } + + fn visit_borrowed_str<E>(self, value: &str) -> Result<Self::Value, E> + where + E: de::Error, + { + Ok(nonnull!(str_to_pyobject!(value))) + } + + fn visit_str<E>(self, value: &str) -> Result<Self::Value, E> + where + E: de::Error, + { + Ok(nonnull!(str_to_pyobject!(value))) + } + + fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error> + where + A: SeqAccess<'de>, + { + match seq.next_element_seed(self) { + Ok(None) => Ok(nonnull!(ffi!(PyList_New(0)))), + Ok(Some(elem)) => { + let mut elements: SmallVec<[*mut pyo3::ffi::PyObject; 8]> = + SmallVec::with_capacity(8); + elements.push(elem.as_ptr()); + while let Some(elem) = seq.next_element_seed(self)? { + elements.push(elem.as_ptr()); + } + let ptr = ffi!(PyList_New(elements.len() as pyo3::ffi::Py_ssize_t)); + for (i, &obj) in elements.iter().enumerate() { + ffi!(PyList_SET_ITEM(ptr, i as pyo3::ffi::Py_ssize_t, obj)); + } + Ok(nonnull!(ptr)) + } + Err(err) => std::result::Result::Err(err), + } + } + + fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error> + where + A: MapAccess<'de>, + { + let dict_ptr = ffi!(PyDict_New()); + while let Some(key) = map.next_key::<Cow<str>>()? { + let pykey: *mut pyo3::ffi::PyObject; + let pyhash: pyo3::ffi::Py_hash_t; + if likely!(key.len() <= 64) { + let hash = unsafe { wyhash(key.as_bytes(), HASH_SEED) }; + { + let map = unsafe { + KEY_MAP + .get_mut() + .unwrap_or_else(|| unsafe { std::hint::unreachable_unchecked() }) + }; + let entry = map.entry(&hash).or_insert_with( + || hash, + || { + let pyob = str_to_pyobject!(&key); + CachedKey::new(pyob, hash_str(pyob)) + }, + ); + let tmp = entry.get(); + pykey = tmp.0; + pyhash = tmp.1; + } + } else { + pykey = str_to_pyobject!(&key); + pyhash = hash_str(pykey); + } + let value = map.next_value_seed(self)?; + let _ = ffi!(_PyDict_SetItem_KnownHash( + dict_ptr, + pykey, + value.as_ptr(), + pyhash + )); + // counter Py_INCREF in insertdict + ffi!(Py_DECREF(pykey)); + ffi!(Py_DECREF(value.as_ptr())); + } + Ok(nonnull!(dict_ptr)) + } +} diff --git a/src/deserialize/mod.rs b/src/deserialize/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..3376f44bf758ba01710f18c40227c9e23f9756cd_c3JjL2Rlc2VyaWFsaXplL21vZC5ycw== --- /dev/null +++ b/src/deserialize/mod.rs @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + +mod cache; +mod decode; + +pub use cache::KeyMap; +pub use cache::KEY_MAP; +pub use decode::deserialize; diff --git a/src/dict.rs b/src/dict.rs deleted file mode 100644 index 25d0d125f5d28c737c3310adb54dcf4ea3b6639c_c3JjL2RpY3QucnM=..0000000000000000000000000000000000000000 --- a/src/dict.rs +++ /dev/null @@ -1,313 +0,0 @@ -// SPDX-License-Identifier: (Apache-2.0 OR MIT) - -use crate::datetime::*; -use crate::encode::pyobject_to_obtype; -use crate::encode::*; -use crate::exc::*; -use crate::opt::*; -use crate::typeref::*; -use crate::unicode::*; -use crate::uuid::*; -use inlinable_string::InlinableString; -use pyo3::ffi::*; -use serde::ser::{Serialize, SerializeMap, Serializer}; -use smallvec::SmallVec; -use std::ptr::NonNull; - -#[repr(C)] -pub struct PyDictObject { - pub ob_refcnt: Py_ssize_t, - pub ob_type: *mut PyTypeObject, - pub ma_used: Py_ssize_t, - pub ma_version_tag: u64, - pub ma_keys: *mut pyo3::ffi::PyObject, - pub ma_values: *mut *mut pyo3::ffi::PyObject, -} - -#[allow(non_snake_case)] -#[inline(always)] -pub unsafe fn PyDict_GET_SIZE(op: *mut PyObject) -> Py_ssize_t { - (*op.cast::<PyDictObject>()).ma_used -} - -pub struct DictSortedKey { - ptr: *mut pyo3::ffi::PyObject, - opts: Opt, - default_calls: u8, - recursion: u8, - default: Option<NonNull<pyo3::ffi::PyObject>>, - len: usize, -} - -impl DictSortedKey { - pub fn new( - ptr: *mut pyo3::ffi::PyObject, - opts: Opt, - default_calls: u8, - recursion: u8, - default: Option<NonNull<pyo3::ffi::PyObject>>, - len: usize, - ) -> Self { - DictSortedKey { - ptr: ptr, - opts: opts, - default_calls: default_calls, - recursion: recursion, - default: default, - len: len, - } - } -} - -impl<'p> Serialize for DictSortedKey { - #[inline(never)] - fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - where - S: Serializer, - { - let mut items: SmallVec<[(&str, *mut pyo3::ffi::PyObject); 8]> = - SmallVec::with_capacity(self.len); - let mut pos = 0isize; - let mut str_size: pyo3::ffi::Py_ssize_t = 0; - let mut key: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); - let mut value: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); - for _ in 0..=self.len - 1 { - unsafe { - pyo3::ffi::_PyDict_Next( - self.ptr, - &mut pos, - &mut key, - &mut value, - std::ptr::null_mut(), - ) - }; - if unlikely!(ob_type!(key) != STR_TYPE) { - err!("Dict key must be str") - } - let data = read_utf8_from_str(key, &mut str_size); - if unlikely!(data.is_null()) { - err!(INVALID_STR) - } - items.push((str_from_slice!(data, str_size), value)); - } - - items.sort_unstable_by(|a, b| a.0.cmp(b.0)); - - let mut map = serializer.serialize_map(None).unwrap(); - for (key, val) in items.iter() { - map.serialize_entry( - key, - &SerializePyObject::new( - *val, - self.opts, - self.default_calls, - self.recursion + 1, - self.default, - ), - )?; - } - map.end() - } -} - -enum NonStrError { - DatetimeLibraryUnsupported, - IntegerRange, - InvalidStr, - TimeTzinfo, - UnsupportedType, -} - -pub struct NonStrKey { - ptr: *mut pyo3::ffi::PyObject, - opts: Opt, - default_calls: u8, - recursion: u8, - default: Option<NonNull<pyo3::ffi::PyObject>>, - len: usize, -} - -impl NonStrKey { - pub fn new( - ptr: *mut pyo3::ffi::PyObject, - opts: Opt, - default_calls: u8, - recursion: u8, - default: Option<NonNull<pyo3::ffi::PyObject>>, - len: usize, - ) -> Self { - NonStrKey { - ptr: ptr, - opts: opts, - default_calls: default_calls, - recursion: recursion, - default: default, - len: len, - } - } - - fn pyobject_to_string( - &self, - key: *mut pyo3::ffi::PyObject, - opts: crate::opt::Opt, - ) -> Result<InlinableString, NonStrError> { - match pyobject_to_obtype(key, opts) { - ObType::None => Ok(InlinableString::from("null")), - ObType::Bool => { - let key_as_str: &str; - if unsafe { key == TRUE } { - key_as_str = "true"; - } else { - key_as_str = "false"; - } - Ok(InlinableString::from(key_as_str)) - } - ObType::Int => { - let val = ffi!(PyLong_AsLongLong(key)); - if unlikely!(val == -1 && !pyo3::ffi::PyErr_Occurred().is_null()) { - return Err(NonStrError::IntegerRange); - } - Ok(InlinableString::from(itoa::Buffer::new().format(val))) - } - ObType::Float => { - let val = ffi!(PyFloat_AS_DOUBLE(key)); - if !val.is_finite() { - Ok(InlinableString::from("null")) - } else { - Ok(InlinableString::from(ryu::Buffer::new().format_finite(val))) - } - } - ObType::Datetime => { - let mut buf: DateTimeBuffer = smallvec::SmallVec::with_capacity(32); - let dt = DateTime::new(key, opts); - if dt.write_buf(&mut buf).is_err() { - return Err(NonStrError::DatetimeLibraryUnsupported); - } - let key_as_str = str_from_slice!(buf.as_ptr(), buf.len()); - Ok(InlinableString::from(key_as_str)) - } - ObType::Date => { - let mut buf: DateTimeBuffer = smallvec::SmallVec::with_capacity(32); - Date::new(key).write_buf(&mut buf); - let key_as_str = str_from_slice!(buf.as_ptr(), buf.len()); - Ok(InlinableString::from(key_as_str)) - } - ObType::Time => match Time::new(key, opts) { - Ok(val) => { - let mut buf: DateTimeBuffer = smallvec::SmallVec::with_capacity(32); - val.write_buf(&mut buf); - let key_as_str = str_from_slice!(buf.as_ptr(), buf.len()); - Ok(InlinableString::from(key_as_str)) - } - Err(TimeError::HasTimezone) => Err(NonStrError::TimeTzinfo), - }, - ObType::Uuid => { - let mut buf: UUIDBuffer = smallvec::SmallVec::with_capacity(64); - UUID::new(key).write_buf(&mut buf); - let key_as_str = str_from_slice!(buf.as_ptr(), buf.len()); - Ok(InlinableString::from(key_as_str)) - } - ObType::Enum => { - let value = ffi!(PyObject_GetAttr(key, VALUE_STR)); - ffi!(Py_DECREF(value)); - self.pyobject_to_string(value, opts) - } - ObType::Str => { - // because of ObType::Enum - let mut str_size: pyo3::ffi::Py_ssize_t = 0; - let uni = read_utf8_from_str(key, &mut str_size); - if unlikely!(uni.is_null()) { - Err(NonStrError::InvalidStr) - } else { - Ok(InlinableString::from(str_from_slice!(uni, str_size))) - } - } - ObType::StrSubclass => { - let mut str_size: pyo3::ffi::Py_ssize_t = 0; - let uni = ffi!(PyUnicode_AsUTF8AndSize(key, &mut str_size)) as *const u8; - if unlikely!(uni.is_null()) { - Err(NonStrError::InvalidStr) - } else { - Ok(InlinableString::from(str_from_slice!(uni, str_size))) - } - } - ObType::Tuple - | ObType::Array - | ObType::Dict - | ObType::List - | ObType::Dataclass - | ObType::Unknown => Err(NonStrError::UnsupportedType), - } - } -} - -impl<'p> Serialize for NonStrKey { - #[inline(never)] - fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - where - S: Serializer, - { - let mut items: SmallVec<[(InlinableString, *mut pyo3::ffi::PyObject); 8]> = - SmallVec::with_capacity(self.len); - let mut pos = 0isize; - let mut str_size: pyo3::ffi::Py_ssize_t = 0; - let mut key: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); - let mut value: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); - let opts = self.opts & NOT_PASSTHROUGH; - for _ in 0..=self.len - 1 { - unsafe { - pyo3::ffi::_PyDict_Next( - self.ptr, - &mut pos, - &mut key, - &mut value, - std::ptr::null_mut(), - ) - }; - if is_type!(ob_type!(key), STR_TYPE) { - let data = read_utf8_from_str(key, &mut str_size); - if unlikely!(data.is_null()) { - err!(INVALID_STR) - } - items.push(( - InlinableString::from(str_from_slice!(data, str_size)), - value, - )); - } else { - match self.pyobject_to_string(key, opts) { - Ok(key_as_str) => items.push((key_as_str, value)), - Err(NonStrError::TimeTzinfo) => err!(TIME_HAS_TZINFO), - Err(NonStrError::IntegerRange) => { - err!("Dict integer key must be within 64-bit range") - } - Err(NonStrError::DatetimeLibraryUnsupported) => { - err!(DATETIME_LIBRARY_UNSUPPORTED) - } - Err(NonStrError::InvalidStr) => err!(INVALID_STR), - Err(NonStrError::UnsupportedType) => { - err!("Dict key must a type serializable with OPT_NON_STR_KEYS") - } - } - } - } - - if opts & SORT_KEYS != 0 { - items.sort_unstable_by(|a, b| a.0.cmp(&b.0)); - } - - let mut map = serializer.serialize_map(None).unwrap(); - for (key, val) in items.iter() { - map.serialize_entry( - str_from_slice!(key.as_ptr(), key.len()), - &SerializePyObject::new( - *val, - self.opts, - self.default_calls, - self.recursion + 1, - self.default, - ), - )?; - } - map.end() - } -} diff --git a/src/encode.rs b/src/encode.rs deleted file mode 100644 index 25d0d125f5d28c737c3310adb54dcf4ea3b6639c_c3JjL2VuY29kZS5ycw==..0000000000000000000000000000000000000000 --- a/src/encode.rs +++ /dev/null @@ -1,459 +0,0 @@ -// SPDX-License-Identifier: (Apache-2.0 OR MIT) - -use crate::array::*; -use crate::dataclass::*; -use crate::datetime::*; -use crate::default::*; -use crate::dict::*; -use crate::exc::*; -use crate::ffi::*; -use crate::iter::*; -use crate::opt::*; -use crate::typeref::*; -use crate::unicode::*; -use crate::uuid::*; -use crate::writer::*; -use serde::ser::{Serialize, SerializeMap, SerializeSeq, Serializer}; -use std::io::Write; -use std::ptr::NonNull; - -// https://tools.ietf.org/html/rfc7159#section-6 -// "[-(2**53)+1, (2**53)-1]" -const STRICT_INT_MIN: i64 = -9007199254740991; -const STRICT_INT_MAX: i64 = 9007199254740991; - -pub const RECURSION_LIMIT: u8 = 255; - -pub fn serialize( - ptr: *mut pyo3::ffi::PyObject, - default: Option<NonNull<pyo3::ffi::PyObject>>, - opts: Opt, -) -> Result<NonNull<pyo3::ffi::PyObject>, String> { - let mut buf = BytesWriter::new(); - let obtype = pyobject_to_obtype(ptr, opts); - match obtype { - ObType::List | ObType::Dict | ObType::Dataclass | ObType::Array => { - buf.resize(1024); - } - _ => {} - } - buf.prefetch(); - let obj = SerializePyObject::with_obtype(ptr, obtype, opts, 0, 0, default); - let res; - if likely!(opts & INDENT_2 != INDENT_2) { - res = serde_json::to_writer(&mut buf, &obj); - } else { - res = serde_json::to_writer_pretty(&mut buf, &obj); - } - match res { - Ok(_) => { - if opts & APPEND_NEWLINE != 0 { - buf.write(b"\n").unwrap(); - } - Ok(buf.finish()) - } - Err(err) => { - ffi!(_Py_Dealloc(buf.finish().as_ptr())); - Err(err.to_string()) - } - } -} - -#[derive(Copy, Clone)] -pub enum ObType { - Str, - Int, - Bool, - None, - Float, - List, - Dict, - Datetime, - Date, - Time, - Tuple, - Uuid, - Dataclass, - Array, - Enum, - StrSubclass, - Unknown, -} - -#[inline] -pub fn pyobject_to_obtype(obj: *mut pyo3::ffi::PyObject, opts: Opt) -> ObType { - unsafe { - let ob_type = ob_type!(obj); - if ob_type == STR_TYPE { - ObType::Str - } else if ob_type == INT_TYPE { - ObType::Int - } else if ob_type == BOOL_TYPE { - ObType::Bool - } else if ob_type == NONE_TYPE { - ObType::None - } else if ob_type == FLOAT_TYPE { - ObType::Float - } else if ob_type == LIST_TYPE { - ObType::List - } else if ob_type == DICT_TYPE { - ObType::Dict - } else if ob_type == DATETIME_TYPE && opts & PASSTHROUGH_DATETIME == 0 { - ObType::Datetime - } else { - pyobject_to_obtype_unlikely(obj, opts) - } - } -} - -macro_rules! is_subclass { - ($ob_type:expr, $flag:ident) => { - unsafe { (((*$ob_type).tp_flags & pyo3::ffi::$flag) != 0) } - }; -} - -#[inline(never)] -pub fn pyobject_to_obtype_unlikely(obj: *mut pyo3::ffi::PyObject, opts: Opt) -> ObType { - unsafe { - let ob_type = ob_type!(obj); - if ob_type == DATE_TYPE && opts & PASSTHROUGH_DATETIME == 0 { - ObType::Date - } else if ob_type == TIME_TYPE && opts & PASSTHROUGH_DATETIME == 0 { - ObType::Time - } else if ob_type == TUPLE_TYPE { - ObType::Tuple - } else if ob_type == UUID_TYPE { - ObType::Uuid - } else if (*(ob_type as *mut LocalPyTypeObject)).ob_type == ENUM_TYPE { - ObType::Enum - } else if is_subclass!(ob_type, Py_TPFLAGS_UNICODE_SUBCLASS) - && opts & PASSTHROUGH_SUBCLASS == 0 - { - ObType::StrSubclass - } else if is_subclass!(ob_type, Py_TPFLAGS_LONG_SUBCLASS) - && opts & PASSTHROUGH_SUBCLASS == 0 - { - ObType::Int - } else if is_subclass!(ob_type, Py_TPFLAGS_LIST_SUBCLASS) - && opts & PASSTHROUGH_SUBCLASS == 0 - { - ObType::List - } else if is_subclass!(ob_type, Py_TPFLAGS_DICT_SUBCLASS) - && opts & PASSTHROUGH_SUBCLASS == 0 - { - ObType::Dict - } else if ffi!(PyDict_Contains((*ob_type).tp_dict, DATACLASS_FIELDS_STR)) == 1 { - ObType::Dataclass - } else if opts & SERIALIZE_NUMPY != 0 - && ARRAY_TYPE.is_some() - && ob_type == ARRAY_TYPE.unwrap().as_ptr() - { - ObType::Array - } else { - ObType::Unknown - } - } -} - -pub struct SerializePyObject { - ptr: *mut pyo3::ffi::PyObject, - obtype: ObType, - opts: Opt, - default_calls: u8, - recursion: u8, - default: Option<NonNull<pyo3::ffi::PyObject>>, -} - -impl SerializePyObject { - #[inline] - pub fn new( - ptr: *mut pyo3::ffi::PyObject, - opts: Opt, - default_calls: u8, - recursion: u8, - default: Option<NonNull<pyo3::ffi::PyObject>>, - ) -> Self { - SerializePyObject { - ptr: ptr, - obtype: pyobject_to_obtype(ptr, opts), - opts: opts, - default_calls: default_calls, - recursion: recursion, - default: default, - } - } - - #[inline] - pub fn with_obtype( - ptr: *mut pyo3::ffi::PyObject, - obtype: ObType, - opts: Opt, - default_calls: u8, - recursion: u8, - default: Option<NonNull<pyo3::ffi::PyObject>>, - ) -> Self { - SerializePyObject { - ptr: ptr, - obtype: obtype, - opts: opts, - default_calls: default_calls, - recursion: recursion, - default: default, - } - } -} - -impl<'p> Serialize for SerializePyObject { - fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - where - S: Serializer, - { - match self.obtype { - ObType::Str => { - let mut str_size: pyo3::ffi::Py_ssize_t = 0; - let uni = read_utf8_from_str(self.ptr, &mut str_size); - if unlikely!(uni.is_null()) { - err!(INVALID_STR) - } - serializer.serialize_str(str_from_slice!(uni, str_size)) - } - ObType::StrSubclass => { - let mut str_size: pyo3::ffi::Py_ssize_t = 0; - let uni = ffi!(PyUnicode_AsUTF8AndSize(self.ptr, &mut str_size)) as *const u8; - if unlikely!(uni.is_null()) { - err!(INVALID_STR) - } - serializer.serialize_str(str_from_slice!(uni, str_size)) - } - ObType::Int => { - let val = ffi!(PyLong_AsLongLong(self.ptr)); - if unlikely!(val == -1) && !ffi!(PyErr_Occurred()).is_null() { - err!("Integer exceeds 64-bit range") - } else if unlikely!(self.opts & STRICT_INTEGER != 0) - && (val > STRICT_INT_MAX || val < STRICT_INT_MIN) - { - err!("Integer exceeds 53-bit range") - } - serializer.serialize_i64(val) - } - ObType::None => serializer.serialize_unit(), - ObType::Float => serializer.serialize_f64(ffi!(PyFloat_AS_DOUBLE(self.ptr))), - ObType::Bool => serializer.serialize_bool(unsafe { self.ptr == TRUE }), - ObType::Datetime => DateTime::new(self.ptr, self.opts).serialize(serializer), - ObType::Date => Date::new(self.ptr).serialize(serializer), - ObType::Time => match Time::new(self.ptr, self.opts) { - Ok(val) => val.serialize(serializer), - Err(TimeError::HasTimezone) => err!(TIME_HAS_TZINFO), - }, - ObType::Uuid => UUID::new(self.ptr).serialize(serializer), - ObType::Dict => { - if unlikely!(self.recursion == RECURSION_LIMIT) { - err!(RECURSION_LIMIT_REACHED) - } - let len = unsafe { PyDict_GET_SIZE(self.ptr) as usize }; - if unlikely!(len == 0) { - serializer.serialize_map(Some(0)).unwrap().end() - } else if likely!(self.opts & SORT_OR_NON_STR_KEYS == 0) { - let mut map = serializer.serialize_map(None).unwrap(); - let mut pos = 0isize; - let mut str_size: pyo3::ffi::Py_ssize_t = 0; - let mut key: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); - let mut value: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); - for _ in 0..=len - 1 { - unsafe { - pyo3::ffi::_PyDict_Next( - self.ptr, - &mut pos, - &mut key, - &mut value, - std::ptr::null_mut(), - ) - }; - if unlikely!(ob_type!(key) != STR_TYPE) { - err!(KEY_MUST_BE_STR) - } - { - let data = read_utf8_from_str(key, &mut str_size); - if unlikely!(data.is_null()) { - err!(INVALID_STR) - } - map.serialize_key(str_from_slice!(data, str_size)).unwrap(); - } - - map.serialize_value(&SerializePyObject::new( - value, - self.opts, - self.default_calls, - self.recursion + 1, - self.default, - ))?; - } - map.end() - } else if self.opts & NON_STR_KEYS != 0 { - NonStrKey::new( - self.ptr, - self.opts, - self.default_calls, - self.recursion, - self.default, - len, - ) - .serialize(serializer) - } else { - DictSortedKey::new( - self.ptr, - self.opts, - self.default_calls, - self.recursion, - self.default, - len, - ) - .serialize(serializer) - } - } - ObType::List => { - if unlikely!(self.recursion == RECURSION_LIMIT) { - err!(RECURSION_LIMIT_REACHED) - } - let len = ffi!(PyList_GET_SIZE(self.ptr)) as usize; - if len == 0 { - serializer.serialize_seq(Some(0)).unwrap().end() - } else { - let mut type_ptr = std::ptr::null_mut(); - let mut ob_type = ObType::Str; - - let mut seq = serializer.serialize_seq(None).unwrap(); - for i in 0..=len - 1 { - let elem = unsafe { - *(*(self.ptr as *mut pyo3::ffi::PyListObject)) - .ob_item - .offset(i as isize) - }; - if ob_type!(elem) != type_ptr { - type_ptr = ob_type!(elem); - ob_type = pyobject_to_obtype(elem, self.opts); - } - seq.serialize_element(&SerializePyObject::with_obtype( - elem, - ob_type, - self.opts, - self.default_calls, - self.recursion + 1, - self.default, - ))?; - } - seq.end() - } - } - ObType::Tuple => { - let mut seq = serializer.serialize_seq(None).unwrap(); - for elem in PyTupleIterator::new(self.ptr) { - seq.serialize_element(&SerializePyObject::new( - elem.as_ptr(), - self.opts, - self.default_calls, - self.recursion + 1, - self.default, - ))? - } - seq.end() - } - ObType::Dataclass => { - if unlikely!(self.recursion == RECURSION_LIMIT) { - err!(RECURSION_LIMIT_REACHED) - } - let dict = ffi!(PyObject_GetAttr(self.ptr, DICT_STR)); - if !dict.is_null() { - ffi!(Py_DECREF(dict)); - let len = unsafe { PyDict_GET_SIZE(dict) as usize }; - if unlikely!(len == 0) { - return serializer.serialize_map(Some(0)).unwrap().end(); - } - let mut map = serializer.serialize_map(None).unwrap(); - let mut pos = 0isize; - let mut str_size: pyo3::ffi::Py_ssize_t = 0; - let mut key: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); - let mut value: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); - for _ in 0..=len - 1 { - unsafe { - pyo3::ffi::_PyDict_Next( - dict, - &mut pos, - &mut key, - &mut value, - std::ptr::null_mut(), - ) - }; - if unlikely!(ob_type!(key) != STR_TYPE) { - err!(KEY_MUST_BE_STR) - } - { - let data = read_utf8_from_str(key, &mut str_size); - if unlikely!(data.is_null()) { - err!(INVALID_STR) - } - let key_as_str = str_from_slice!(data, str_size); - if unlikely!(key_as_str.as_bytes()[0] == b'_') { - continue; - } - map.serialize_key(key_as_str).unwrap(); - } - - map.serialize_value(&SerializePyObject::new( - value, - self.opts, - self.default_calls, - self.recursion + 1, - self.default, - ))?; - } - map.end() - } else { - unsafe { pyo3::ffi::PyErr_Clear() }; - DataclassSerializer::new( - self.ptr, - self.opts, - self.default_calls, - self.recursion, - self.default, - ) - .serialize(serializer) - } - } - ObType::Enum => { - let value = ffi!(PyObject_GetAttr(self.ptr, VALUE_STR)); - ffi!(Py_DECREF(value)); - SerializePyObject::new( - value, - self.opts, - self.default_calls, - self.recursion, - self.default, - ) - .serialize(serializer) - } - ObType::Array => match PyArray::new(self.ptr) { - Ok(val) => val.serialize(serializer), - Err(PyArrayError::Malformed) => err!("numpy array is malformed"), - Err(PyArrayError::NotContiguous) | Err(PyArrayError::UnsupportedDataType) => { - DefaultSerializer::new( - self.ptr, - self.opts, - self.default_calls, - self.recursion, - self.default, - ) - .serialize(serializer) - } - }, - ObType::Unknown => DefaultSerializer::new( - self.ptr, - self.opts, - self.default_calls, - self.recursion, - self.default, - ) - .serialize(serializer), - } - } -} diff --git a/src/ffi.rs b/src/ffi.rs index 25d0d125f5d28c737c3310adb54dcf4ea3b6639c_c3JjL2ZmaS5ycw==..3376f44bf758ba01710f18c40227c9e23f9756cd_c3JjL2ZmaS5ycw== 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -19,3 +19,40 @@ pub tp_name: *const c_char, // ... } + +#[repr(C)] +pub struct PyDictObject { + pub ob_refcnt: Py_ssize_t, + pub ob_type: *mut PyTypeObject, + pub ma_used: Py_ssize_t, + pub ma_version_tag: u64, + pub ma_keys: *mut pyo3::ffi::PyObject, + pub ma_values: *mut *mut pyo3::ffi::PyObject, +} + +#[allow(non_snake_case)] +#[inline(always)] +pub unsafe fn PyDict_GET_SIZE(op: *mut PyObject) -> Py_ssize_t { + (*op.cast::<PyDictObject>()).ma_used +} + +#[repr(C)] +pub struct PyBytesObject { + pub ob_refcnt: Py_ssize_t, + pub ob_type: *mut PyTypeObject, + pub ob_size: Py_ssize_t, + pub ob_shash: Py_hash_t, + pub ob_sval: [c_char; 1], +} + +#[allow(non_snake_case)] +#[inline(always)] +pub unsafe fn PyBytes_AS_STRING(op: *mut PyObject) -> *const c_char { + &(*op.cast::<PyBytesObject>()).ob_sval as *const c_char +} + +#[allow(non_snake_case)] +#[inline(always)] +pub unsafe fn PyBytes_GET_SIZE(op: *mut PyObject) -> Py_ssize_t { + (*op.cast::<PyBytesObject>()).ob_size +} diff --git a/src/iter.rs b/src/iter.rs deleted file mode 100644 index 25d0d125f5d28c737c3310adb54dcf4ea3b6639c_c3JjL2l0ZXIucnM=..0000000000000000000000000000000000000000 --- a/src/iter.rs +++ /dev/null @@ -1,34 +0,0 @@ -// SPDX-License-Identifier: (Apache-2.0 OR MIT) - -use std::ptr::NonNull; - -pub struct PyTupleIterator { - list: *mut pyo3::ffi::PyObject, - len: isize, - idx: isize, -} - -impl PyTupleIterator { - pub fn new(list: *mut pyo3::ffi::PyObject) -> Self { - PyTupleIterator { - list: list, - len: ffi!(PyTuple_GET_SIZE(list)), - idx: 0, - } - } -} - -impl Iterator for PyTupleIterator { - type Item = NonNull<pyo3::ffi::PyObject>; - - #[inline] - fn next(&mut self) -> Option<NonNull<pyo3::ffi::PyObject>> { - if self.len == self.idx { - None - } else { - let item = nonnull!(ffi!(PyTuple_GET_ITEM(self.list, self.idx as isize))); - self.idx += 1; - Some(item) - } - } -} diff --git a/src/lib.rs b/src/lib.rs index 25d0d125f5d28c737c3310adb54dcf4ea3b6639c_c3JjL2xpYi5ycw==..3376f44bf758ba01710f18c40227c9e23f9756cd_c3JjL2xpYi5ycw== 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,13 +7,6 @@ #[macro_use] mod util; -mod array; -mod bytes; -mod dataclass; -mod datetime; -mod decode; -mod default; -mod dict; -mod encode; +mod deserialize; mod exc; mod ffi; @@ -18,5 +11,3 @@ mod exc; mod ffi; -mod iter; -mod module; mod opt; @@ -22,3 +13,4 @@ mod opt; +mod serialize; mod typeref; mod unicode; @@ -23,5 +15,144 @@ mod typeref; mod unicode; -mod uuid; -mod writer; + +use pyo3::ffi::*; +use std::borrow::Cow; +use std::os::raw::c_char; +use std::ptr::NonNull; + +const DUMPS_DOC: &str = "dumps(obj, /, default, option)\n--\n\nSerialize Python objects to JSON.\0"; +const LOADS_DOC: &str = "loads(obj, /)\n--\n\nDeserialize JSON to Python objects.\0"; + +macro_rules! opt { + ($mptr:expr, $name:expr, $opt:expr) => { + unsafe { + #[cfg(not(target_os = "windows"))] + PyModule_AddIntConstant($mptr, $name.as_ptr() as *const c_char, $opt as i64); + #[cfg(target_os = "windows")] + PyModule_AddIntConstant($mptr, $name.as_ptr() as *const c_char, $opt as i32); + } + }; +} + +#[allow(non_snake_case)] +#[no_mangle] +pub unsafe extern "C" fn PyInit_orjson() -> *mut PyObject { + let mut init = PyModuleDef_INIT; + init.m_name = "orjson\0".as_ptr() as *const c_char; + let mptr = PyModule_Create(Box::into_raw(Box::new(init))); + + let version = env!("CARGO_PKG_VERSION"); + unsafe { + PyModule_AddObject( + mptr, + "__version__\0".as_ptr() as *const c_char, + PyUnicode_FromStringAndSize(version.as_ptr() as *const c_char, version.len() as isize), + ) + }; + + let wrapped_dumps: PyMethodDef; + + #[cfg(python37)] + { + wrapped_dumps = PyMethodDef { + ml_name: "dumps\0".as_ptr() as *const c_char, + ml_meth: Some(unsafe { + std::mem::transmute::<crate::ffi::_PyCFunctionFastWithKeywords, PyCFunction>(dumps) + }), + ml_flags: pyo3::ffi::METH_FASTCALL | METH_KEYWORDS, + ml_doc: DUMPS_DOC.as_ptr() as *const c_char, + }; + } + + #[cfg(not(python37))] + { + wrapped_dumps = PyMethodDef { + ml_name: "dumps\0".as_ptr() as *const c_char, + ml_meth: Some(unsafe { + std::mem::transmute::<PyCFunctionWithKeywords, PyCFunction>(dumps) + }), + ml_flags: METH_VARARGS | METH_KEYWORDS, + ml_doc: DUMPS_DOC.as_ptr() as *const c_char, + }; + } + + unsafe { + PyModule_AddObject( + mptr, + "dumps\0".as_ptr() as *const c_char, + PyCFunction_New(Box::into_raw(Box::new(wrapped_dumps)), std::ptr::null_mut()), + ) + }; + + let wrapped_loads = PyMethodDef { + ml_name: "loads\0".as_ptr() as *const c_char, + ml_meth: Some(loads), + ml_flags: METH_O, + ml_doc: LOADS_DOC.as_ptr() as *const c_char, + }; + + unsafe { + PyModule_AddObject( + mptr, + "loads\0".as_ptr() as *const c_char, + PyCFunction_New(Box::into_raw(Box::new(wrapped_loads)), std::ptr::null_mut()), + ) + }; + + opt!(mptr, "OPT_APPEND_NEWLINE\0", opt::APPEND_NEWLINE); + opt!(mptr, "OPT_INDENT_2\0", opt::INDENT_2); + opt!(mptr, "OPT_NAIVE_UTC\0", opt::NAIVE_UTC); + opt!(mptr, "OPT_NON_STR_KEYS\0", opt::NON_STR_KEYS); + opt!(mptr, "OPT_OMIT_MICROSECONDS\0", opt::OMIT_MICROSECONDS); + opt!( + mptr, + "OPT_PASSTHROUGH_DATETIME\0", + opt::PASSTHROUGH_DATETIME + ); + opt!( + mptr, + "OPT_PASSTHROUGH_SUBCLASS\0", + opt::PASSTHROUGH_SUBCLASS + ); + opt!(mptr, "OPT_SERIALIZE_DATACLASS\0", opt::SERIALIZE_DATACLASS); + opt!(mptr, "OPT_SERIALIZE_NUMPY\0", opt::SERIALIZE_NUMPY); + opt!(mptr, "OPT_SERIALIZE_UUID\0", opt::SERIALIZE_UUID); + opt!(mptr, "OPT_SORT_KEYS\0", opt::SORT_KEYS); + opt!(mptr, "OPT_STRICT_INTEGER\0", opt::STRICT_INTEGER); + opt!(mptr, "OPT_UTC_Z\0", opt::UTC_Z); + + typeref::init_typerefs(); + + unsafe { + PyModule_AddObject( + mptr, + "JSONDecodeError\0".as_ptr() as *const c_char, + typeref::JsonDecodeError, + ); + PyModule_AddObject( + mptr, + "JSONEncodeError\0".as_ptr() as *const c_char, + typeref::JsonEncodeError, + ) + }; + + mptr +} + +#[cold] +fn raise_loads_exception(msg: Cow<str>) -> *mut PyObject { + unsafe { + let err_msg = + PyUnicode_FromStringAndSize(msg.as_ptr() as *const c_char, msg.len() as isize); + let args = PyTuple_New(3); + let doc = PyUnicode_New(0, 255); + let pos = PyLong_FromLongLong(0); + PyTuple_SET_ITEM(args, 0, err_msg); + PyTuple_SET_ITEM(args, 1, doc); + PyTuple_SET_ITEM(args, 2, pos); + PyErr_SetObject(typeref::JsonDecodeError, args); + Py_DECREF(args); + }; + std::ptr::null_mut() +} @@ -27,4 +158,155 @@ -pub use module::dumps; -pub use module::loads; -pub use module::PyInit_orjson; +#[cold] +fn raise_dumps_exception(msg: Cow<str>) -> *mut PyObject { + unsafe { + let err_msg = + PyUnicode_FromStringAndSize(msg.as_ptr() as *const c_char, msg.len() as isize); + PyErr_SetObject(typeref::JsonEncodeError, err_msg); + Py_DECREF(err_msg); + }; + std::ptr::null_mut() +} + +#[no_mangle] +pub unsafe extern "C" fn loads(_self: *mut PyObject, obj: *mut PyObject) -> *mut PyObject { + match crate::deserialize::deserialize(obj) { + Ok(val) => val.as_ptr(), + Err(err) => raise_loads_exception(Cow::Owned(err)), + } +} + +#[cfg(python37)] +#[no_mangle] +pub unsafe extern "C" fn dumps( + _self: *mut PyObject, + args: *const *mut PyObject, + nargs: Py_ssize_t, + kwnames: *mut PyObject, +) -> *mut PyObject { + let mut default: Option<NonNull<PyObject>> = None; + let mut optsptr: Option<NonNull<PyObject>> = None; + + let num_args = pyo3::ffi::PyVectorcall_NARGS(nargs as isize); + if unlikely!(num_args == 0) { + return raise_dumps_exception(Cow::Borrowed( + "dumps() missing 1 required positional argument: 'obj'", + )); + } + if num_args & 2 == 2 { + default = Some(NonNull::new_unchecked(*args.offset(1))); + } + if num_args & 3 == 3 { + optsptr = Some(NonNull::new_unchecked(*args.offset(2))); + } + if !kwnames.is_null() { + for i in 0..=PyTuple_GET_SIZE(kwnames) - 1 { + let arg = PyTuple_GET_ITEM(kwnames, i as Py_ssize_t); + if arg == typeref::DEFAULT { + if unlikely!(num_args & 2 == 2) { + return raise_dumps_exception(Cow::Borrowed( + "dumps() got multiple values for argument: 'default'", + )); + } + default = Some(NonNull::new_unchecked(*args.offset(num_args + i))); + } else if arg == typeref::OPTION { + if unlikely!(num_args & 3 == 3) { + return raise_dumps_exception(Cow::Borrowed( + "dumps() got multiple values for argument: 'option'", + )); + } + optsptr = Some(NonNull::new_unchecked(*args.offset(num_args + i))); + } else { + return raise_dumps_exception(Cow::Borrowed( + "dumps() got an unexpected keyword argument", + )); + } + } + } + + let mut optsbits: i32 = 0; + if optsptr.is_some() { + if (*optsptr.unwrap().as_ptr()).ob_type != typeref::INT_TYPE { + return raise_dumps_exception(Cow::Borrowed("Invalid opts")); + } + optsbits = PyLong_AsLong(optsptr.unwrap().as_ptr()) as i32; + if optsbits < 0 || optsbits > opt::MAX_OPT { + return raise_dumps_exception(Cow::Borrowed("Invalid opts")); + } + } + + match crate::serialize::serialize(*args, default, optsbits as opt::Opt) { + Ok(val) => val.as_ptr(), + Err(err) => raise_dumps_exception(Cow::Borrowed(&err)), + } +} + +#[cfg(not(python37))] +#[no_mangle] +pub unsafe extern "C" fn dumps( + _self: *mut PyObject, + args: *mut PyObject, + kwds: *mut PyObject, +) -> *mut PyObject { + let mut default: Option<NonNull<PyObject>> = None; + let mut optsptr: Option<NonNull<PyObject>> = None; + + let num_args = PyTuple_GET_SIZE(args); + if unlikely!(num_args == 0) { + return raise_dumps_exception(Cow::Borrowed( + "dumps() missing 1 required positional argument: 'obj'", + )); + } + if num_args & 2 == 2 { + default = Some(NonNull::new_unchecked(PyTuple_GET_ITEM(args, 1))); + } + if num_args & 3 == 3 { + optsptr = Some(NonNull::new_unchecked(PyTuple_GET_ITEM(args, 2))); + } + + if !kwds.is_null() { + let len = unsafe { crate::ffi::PyDict_GET_SIZE(kwds) as usize }; + let mut pos = 0isize; + let mut arg: *mut PyObject = std::ptr::null_mut(); + let mut val: *mut PyObject = std::ptr::null_mut(); + for _ in 0..=len - 1 { + unsafe { _PyDict_Next(kwds, &mut pos, &mut arg, &mut val, std::ptr::null_mut()) }; + if arg == typeref::DEFAULT { + if unlikely!(num_args & 2 == 2) { + return raise_dumps_exception(Cow::Borrowed( + "dumps() got multiple values for argument: 'default'", + )); + } + default = Some(NonNull::new_unchecked(val)); + } else if arg == typeref::OPTION { + if unlikely!(num_args & 3 == 3) { + return raise_dumps_exception(Cow::Borrowed( + "dumps() got multiple values for argument: 'option'", + )); + } + optsptr = Some(NonNull::new_unchecked(val)); + } else if arg.is_null() { + break; + } else { + return raise_dumps_exception(Cow::Borrowed( + "dumps() got an unexpected keyword argument", + )); + } + } + } + + let mut optsbits: i32 = 0; + if optsptr.is_some() { + if (*optsptr.unwrap().as_ptr()).ob_type != typeref::INT_TYPE { + return raise_dumps_exception(Cow::Borrowed("Invalid opts")); + } + optsbits = PyLong_AsLong(optsptr.unwrap().as_ptr()) as i32; + if optsbits < 0 || optsbits > opt::MAX_OPT { + return raise_dumps_exception(Cow::Borrowed("Invalid opts")); + } + } + + match crate::serialize::serialize(PyTuple_GET_ITEM(args, 0), default, optsbits as opt::Opt) { + Ok(val) => val.as_ptr(), + Err(err) => raise_dumps_exception(Cow::Owned(err)), + } +} diff --git a/src/module.rs b/src/module.rs deleted file mode 100644 index 25d0d125f5d28c737c3310adb54dcf4ea3b6639c_c3JjL21vZHVsZS5ycw==..0000000000000000000000000000000000000000 --- a/src/module.rs +++ /dev/null @@ -1,302 +0,0 @@ -// SPDX-License-Identifier: (Apache-2.0 OR MIT) - -use crate::decode; -use crate::encode; -use crate::opt; -use crate::typeref; - -use pyo3::ffi::*; -use std::borrow::Cow; -use std::os::raw::c_char; -use std::ptr::NonNull; - -const DUMPS_DOC: &str = "dumps(obj, /, default, option)\n--\n\nSerialize Python objects to JSON.\0"; -const LOADS_DOC: &str = "loads(obj, /)\n--\n\nDeserialize JSON to Python objects.\0"; - -macro_rules! opt { - ($mptr:expr, $name:expr, $opt:expr) => { - unsafe { - #[cfg(not(target_os = "windows"))] - PyModule_AddIntConstant($mptr, $name.as_ptr() as *const c_char, $opt as i64); - #[cfg(target_os = "windows")] - PyModule_AddIntConstant($mptr, $name.as_ptr() as *const c_char, $opt as i32); - } - }; -} - -#[allow(non_snake_case)] -#[no_mangle] -pub unsafe extern "C" fn PyInit_orjson() -> *mut PyObject { - let mut init = PyModuleDef_INIT; - init.m_name = "orjson\0".as_ptr() as *const c_char; - let mptr = PyModule_Create(Box::into_raw(Box::new(init))); - - let version = env!("CARGO_PKG_VERSION"); - unsafe { - PyModule_AddObject( - mptr, - "__version__\0".as_ptr() as *const c_char, - PyUnicode_FromStringAndSize(version.as_ptr() as *const c_char, version.len() as isize), - ) - }; - - let wrapped_dumps: PyMethodDef; - - #[cfg(python37)] - { - wrapped_dumps = PyMethodDef { - ml_name: "dumps\0".as_ptr() as *const c_char, - ml_meth: Some(unsafe { - std::mem::transmute::<crate::ffi::_PyCFunctionFastWithKeywords, PyCFunction>(dumps) - }), - ml_flags: pyo3::ffi::METH_FASTCALL | METH_KEYWORDS, - ml_doc: DUMPS_DOC.as_ptr() as *const c_char, - }; - } - - #[cfg(not(python37))] - { - wrapped_dumps = PyMethodDef { - ml_name: "dumps\0".as_ptr() as *const c_char, - ml_meth: Some(unsafe { - std::mem::transmute::<PyCFunctionWithKeywords, PyCFunction>(dumps) - }), - ml_flags: METH_VARARGS | METH_KEYWORDS, - ml_doc: DUMPS_DOC.as_ptr() as *const c_char, - }; - } - - unsafe { - PyModule_AddObject( - mptr, - "dumps\0".as_ptr() as *const c_char, - PyCFunction_New(Box::into_raw(Box::new(wrapped_dumps)), std::ptr::null_mut()), - ) - }; - - let wrapped_loads = PyMethodDef { - ml_name: "loads\0".as_ptr() as *const c_char, - ml_meth: Some(loads), - ml_flags: METH_O, - ml_doc: LOADS_DOC.as_ptr() as *const c_char, - }; - - unsafe { - PyModule_AddObject( - mptr, - "loads\0".as_ptr() as *const c_char, - PyCFunction_New(Box::into_raw(Box::new(wrapped_loads)), std::ptr::null_mut()), - ) - }; - - opt!(mptr, "OPT_APPEND_NEWLINE\0", opt::APPEND_NEWLINE); - opt!(mptr, "OPT_INDENT_2\0", opt::INDENT_2); - opt!(mptr, "OPT_NAIVE_UTC\0", opt::NAIVE_UTC); - opt!(mptr, "OPT_NON_STR_KEYS\0", opt::NON_STR_KEYS); - opt!(mptr, "OPT_OMIT_MICROSECONDS\0", opt::OMIT_MICROSECONDS); - opt!( - mptr, - "OPT_PASSTHROUGH_DATETIME\0", - opt::PASSTHROUGH_DATETIME - ); - opt!( - mptr, - "OPT_PASSTHROUGH_SUBCLASS\0", - opt::PASSTHROUGH_SUBCLASS - ); - opt!(mptr, "OPT_SERIALIZE_DATACLASS\0", opt::SERIALIZE_DATACLASS); - opt!(mptr, "OPT_SERIALIZE_NUMPY\0", opt::SERIALIZE_NUMPY); - opt!(mptr, "OPT_SERIALIZE_UUID\0", opt::SERIALIZE_UUID); - opt!(mptr, "OPT_SORT_KEYS\0", opt::SORT_KEYS); - opt!(mptr, "OPT_STRICT_INTEGER\0", opt::STRICT_INTEGER); - opt!(mptr, "OPT_UTC_Z\0", opt::UTC_Z); - - typeref::init_typerefs(); - - unsafe { - PyModule_AddObject( - mptr, - "JSONDecodeError\0".as_ptr() as *const c_char, - typeref::JsonDecodeError, - ); - PyModule_AddObject( - mptr, - "JSONEncodeError\0".as_ptr() as *const c_char, - typeref::JsonEncodeError, - ) - }; - - mptr -} - -#[cold] -fn raise_loads_exception(msg: Cow<str>) -> *mut PyObject { - unsafe { - let err_msg = - PyUnicode_FromStringAndSize(msg.as_ptr() as *const c_char, msg.len() as isize); - let args = PyTuple_New(3); - let doc = PyUnicode_New(0, 255); - let pos = PyLong_FromLongLong(0); - PyTuple_SET_ITEM(args, 0, err_msg); - PyTuple_SET_ITEM(args, 1, doc); - PyTuple_SET_ITEM(args, 2, pos); - PyErr_SetObject(typeref::JsonDecodeError, args); - Py_DECREF(args); - }; - std::ptr::null_mut() -} - -#[cold] -fn raise_dumps_exception(msg: Cow<str>) -> *mut PyObject { - unsafe { - let err_msg = - PyUnicode_FromStringAndSize(msg.as_ptr() as *const c_char, msg.len() as isize); - PyErr_SetObject(typeref::JsonEncodeError, err_msg); - Py_DECREF(err_msg); - }; - std::ptr::null_mut() -} - -#[no_mangle] -pub unsafe extern "C" fn loads(_self: *mut PyObject, obj: *mut PyObject) -> *mut PyObject { - match decode::deserialize(obj) { - Ok(val) => val.as_ptr(), - Err(err) => raise_loads_exception(Cow::Owned(err)), - } -} - -#[cfg(python37)] -#[no_mangle] -pub unsafe extern "C" fn dumps( - _self: *mut PyObject, - args: *const *mut PyObject, - nargs: Py_ssize_t, - kwnames: *mut PyObject, -) -> *mut PyObject { - let mut default: Option<NonNull<PyObject>> = None; - let mut optsptr: Option<NonNull<PyObject>> = None; - - let num_args = pyo3::ffi::PyVectorcall_NARGS(nargs as isize); - if unlikely!(num_args == 0) { - return raise_dumps_exception(Cow::Borrowed( - "dumps() missing 1 required positional argument: 'obj'", - )); - } - if num_args & 2 == 2 { - default = Some(NonNull::new_unchecked(*args.offset(1))); - } - if num_args & 3 == 3 { - optsptr = Some(NonNull::new_unchecked(*args.offset(2))); - } - if !kwnames.is_null() { - for i in 0..=PyTuple_GET_SIZE(kwnames) - 1 { - let arg = PyTuple_GET_ITEM(kwnames, i as Py_ssize_t); - if arg == typeref::DEFAULT { - if unlikely!(num_args & 2 == 2) { - return raise_dumps_exception(Cow::Borrowed( - "dumps() got multiple values for argument: 'default'", - )); - } - default = Some(NonNull::new_unchecked(*args.offset(num_args + i))); - } else if arg == typeref::OPTION { - if unlikely!(num_args & 3 == 3) { - return raise_dumps_exception(Cow::Borrowed( - "dumps() got multiple values for argument: 'option'", - )); - } - optsptr = Some(NonNull::new_unchecked(*args.offset(num_args + i))); - } else { - return raise_dumps_exception(Cow::Borrowed( - "dumps() got an unexpected keyword argument", - )); - } - } - } - - let mut optsbits: i32 = 0; - if optsptr.is_some() { - if (*optsptr.unwrap().as_ptr()).ob_type != typeref::INT_TYPE { - return raise_dumps_exception(Cow::Borrowed("Invalid opts")); - } - optsbits = PyLong_AsLong(optsptr.unwrap().as_ptr()) as i32; - if optsbits < 0 || optsbits > opt::MAX_OPT { - return raise_dumps_exception(Cow::Borrowed("Invalid opts")); - } - } - - match encode::serialize(*args, default, optsbits as opt::Opt) { - Ok(val) => val.as_ptr(), - Err(err) => raise_dumps_exception(Cow::Borrowed(&err)), - } -} - -#[cfg(not(python37))] -#[no_mangle] -pub unsafe extern "C" fn dumps( - _self: *mut PyObject, - args: *mut PyObject, - kwds: *mut PyObject, -) -> *mut PyObject { - let mut default: Option<NonNull<PyObject>> = None; - let mut optsptr: Option<NonNull<PyObject>> = None; - - let num_args = PyTuple_GET_SIZE(args); - if unlikely!(num_args == 0) { - return raise_dumps_exception(Cow::Borrowed( - "dumps() missing 1 required positional argument: 'obj'", - )); - } - if num_args & 2 == 2 { - default = Some(NonNull::new_unchecked(PyTuple_GET_ITEM(args, 1))); - } - if num_args & 3 == 3 { - optsptr = Some(NonNull::new_unchecked(PyTuple_GET_ITEM(args, 2))); - } - - if !kwds.is_null() { - let len = unsafe { crate::dict::PyDict_GET_SIZE(kwds) as usize }; - let mut pos = 0isize; - let mut arg: *mut PyObject = std::ptr::null_mut(); - let mut val: *mut PyObject = std::ptr::null_mut(); - for _ in 0..=len - 1 { - unsafe { _PyDict_Next(kwds, &mut pos, &mut arg, &mut val, std::ptr::null_mut()) }; - if arg == typeref::DEFAULT { - if unlikely!(num_args & 2 == 2) { - return raise_dumps_exception(Cow::Borrowed( - "dumps() got multiple values for argument: 'default'", - )); - } - default = Some(NonNull::new_unchecked(val)); - } else if arg == typeref::OPTION { - if unlikely!(num_args & 3 == 3) { - return raise_dumps_exception(Cow::Borrowed( - "dumps() got multiple values for argument: 'option'", - )); - } - optsptr = Some(NonNull::new_unchecked(val)); - } else if arg.is_null() { - break; - } else { - return raise_dumps_exception(Cow::Borrowed( - "dumps() got an unexpected keyword argument", - )); - } - } - } - - let mut optsbits: i32 = 0; - if optsptr.is_some() { - if (*optsptr.unwrap().as_ptr()).ob_type != typeref::INT_TYPE { - return raise_dumps_exception(Cow::Borrowed("Invalid opts")); - } - optsbits = PyLong_AsLong(optsptr.unwrap().as_ptr()) as i32; - if optsbits < 0 || optsbits > opt::MAX_OPT { - return raise_dumps_exception(Cow::Borrowed("Invalid opts")); - } - } - - match encode::serialize(PyTuple_GET_ITEM(args, 0), default, optsbits as opt::Opt) { - Ok(val) => val.as_ptr(), - Err(err) => raise_dumps_exception(Cow::Owned(err)), - } -} diff --git a/src/serialize/dataclass.rs b/src/serialize/dataclass.rs new file mode 100644 index 0000000000000000000000000000000000000000..3376f44bf758ba01710f18c40227c9e23f9756cd_c3JjL3NlcmlhbGl6ZS9kYXRhY2xhc3MucnM= --- /dev/null +++ b/src/serialize/dataclass.rs @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + +use crate::exc::*; +use crate::ffi::PyDict_GET_SIZE; +use crate::opt::*; +use crate::serialize::encode::*; +use crate::typeref::*; +use crate::unicode::*; + +use serde::ser::{Serialize, SerializeMap, Serializer}; + +use std::ptr::NonNull; + +pub struct DataclassSerializer { + ptr: *mut pyo3::ffi::PyObject, + opts: Opt, + default_calls: u8, + recursion: u8, + default: Option<NonNull<pyo3::ffi::PyObject>>, +} + +impl DataclassSerializer { + pub fn new( + ptr: *mut pyo3::ffi::PyObject, + opts: Opt, + default_calls: u8, + recursion: u8, + default: Option<NonNull<pyo3::ffi::PyObject>>, + ) -> Self { + DataclassSerializer { + ptr: ptr, + opts: opts, + default_calls: default_calls, + recursion: recursion, + default: default, + } + } +} + +impl<'p> Serialize for DataclassSerializer { + #[inline(never)] + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + let fields = ffi!(PyObject_GetAttr(self.ptr, DATACLASS_FIELDS_STR)); + ffi!(Py_DECREF(fields)); + let len = unsafe { PyDict_GET_SIZE(fields) as usize }; + if unlikely!(len == 0) { + return serializer.serialize_map(Some(0)).unwrap().end(); + } + let mut map = serializer.serialize_map(None).unwrap(); + let mut pos = 0isize; + let mut str_size: pyo3::ffi::Py_ssize_t = 0; + let mut attr: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); + let mut field: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); + for _ in 0..=len - 1 { + unsafe { + pyo3::ffi::_PyDict_Next( + fields, + &mut pos, + &mut attr, + &mut field, + std::ptr::null_mut(), + ) + }; + if unsafe { ffi!(PyObject_GetAttr(field, FIELD_TYPE_STR)) != FIELD_TYPE.as_ptr() } { + continue; + } + { + let data = read_utf8_from_str(attr, &mut str_size); + if unlikely!(data.is_null()) { + err!(INVALID_STR); + } + let key_as_str = str_from_slice!(data, str_size); + if key_as_str.as_bytes()[0] == b'_' { + continue; + } + map.serialize_key(key_as_str).unwrap(); + } + + let value = ffi!(PyObject_GetAttr(self.ptr, attr)); + ffi!(Py_DECREF(value)); + + map.serialize_value(&SerializePyObject::new( + value, + self.opts, + self.default_calls, + self.recursion + 1, + self.default, + ))? + } + map.end() + } +} diff --git a/src/serialize/datetime.rs b/src/serialize/datetime.rs new file mode 100644 index 0000000000000000000000000000000000000000..3376f44bf758ba01710f18c40227c9e23f9756cd_c3JjL3NlcmlhbGl6ZS9kYXRldGltZS5ycw== --- /dev/null +++ b/src/serialize/datetime.rs @@ -0,0 +1,279 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + +use crate::exc::*; +use crate::opt::*; +use crate::typeref::*; +use serde::ser::{Serialize, Serializer}; + +pub type DateTimeBuffer = smallvec::SmallVec<[u8; 32]>; + +macro_rules! write_double_digit { + ($buf:ident, $value:ident) => { + if $value < 10 { + $buf.push(b'0'); + } + $buf.extend_from_slice(itoa::Buffer::new().format($value).as_bytes()); + }; +} + +macro_rules! write_microsecond { + ($buf:ident, $microsecond:ident) => { + if $microsecond != 0 { + let mut buf = itoa::Buffer::new(); + let formatted = buf.format($microsecond); + $buf.extend_from_slice( + &[b'.', b'0', b'0', b'0', b'0', b'0', b'0'][..(7 - formatted.len())], + ); + $buf.extend_from_slice(formatted.as_bytes()); + } + }; +} + +#[repr(transparent)] +pub struct Date { + ptr: *mut pyo3::ffi::PyObject, +} + +impl Date { + pub fn new(ptr: *mut pyo3::ffi::PyObject) -> Self { + Date { ptr: ptr } + } + pub fn write_buf(&self, buf: &mut DateTimeBuffer) { + { + let year = ffi!(PyDateTime_GET_YEAR(self.ptr)) as i32; + buf.extend_from_slice(itoa::Buffer::new().format(year).as_bytes()); + } + buf.push(b'-'); + { + let month = ffi!(PyDateTime_GET_MONTH(self.ptr)) as u32; + write_double_digit!(buf, month); + } + buf.push(b'-'); + { + let day = ffi!(PyDateTime_GET_DAY(self.ptr)) as u32; + write_double_digit!(buf, day); + } + } +} +impl<'p> Serialize for Date { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + let mut buf: DateTimeBuffer = smallvec::SmallVec::with_capacity(32); + self.write_buf(&mut buf); + serializer.serialize_str(str_from_slice!(buf.as_ptr(), buf.len())) + } +} + +pub enum TimeError { + HasTimezone, +} + +pub struct Time { + ptr: *mut pyo3::ffi::PyObject, + opts: Opt, +} + +impl Time { + pub fn new(ptr: *mut pyo3::ffi::PyObject, opts: Opt) -> Result<Self, TimeError> { + if unsafe { (*(ptr as *mut pyo3::ffi::PyDateTime_Time)).hastzinfo == 1 } { + return Err(TimeError::HasTimezone); + } + Ok(Time { + ptr: ptr, + opts: opts, + }) + } + pub fn write_buf(&self, buf: &mut DateTimeBuffer) { + { + let hour = ffi!(PyDateTime_TIME_GET_HOUR(self.ptr)) as u8; + write_double_digit!(buf, hour); + } + buf.push(b':'); + { + let minute = ffi!(PyDateTime_TIME_GET_MINUTE(self.ptr)) as u8; + write_double_digit!(buf, minute); + } + buf.push(b':'); + { + let second = ffi!(PyDateTime_TIME_GET_SECOND(self.ptr)) as u8; + write_double_digit!(buf, second); + } + if self.opts & OMIT_MICROSECONDS == 0 { + let microsecond = ffi!(PyDateTime_TIME_GET_MICROSECOND(self.ptr)) as u32; + write_microsecond!(buf, microsecond); + } + } +} + +impl<'p> Serialize for Time { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + let mut buf: DateTimeBuffer = smallvec::SmallVec::with_capacity(32); + self.write_buf(&mut buf); + serializer.serialize_str(str_from_slice!(buf.as_ptr(), buf.len())) + } +} + +pub enum DateTimeError { + LibraryUnsupported, +} + +pub struct DateTime { + ptr: *mut pyo3::ffi::PyObject, + opts: Opt, +} + +impl DateTime { + pub fn new(ptr: *mut pyo3::ffi::PyObject, opts: Opt) -> Self { + DateTime { + ptr: ptr, + opts: opts, + } + } + pub fn write_buf(&self, buf: &mut DateTimeBuffer) -> Result<(), DateTimeError> { + let has_tz = unsafe { (*(self.ptr as *mut pyo3::ffi::PyDateTime_DateTime)).hastzinfo == 1 }; + let offset_day: i32; + let mut offset_second: i32; + if !has_tz { + offset_second = 0; + offset_day = 0; + } else { + let tzinfo = ffi!(PyDateTime_DATE_GET_TZINFO(self.ptr)); + if ffi!(PyObject_HasAttr(tzinfo, CONVERT_METHOD_STR)) == 1 { + // pendulum + let offset = ffi!(PyObject_CallMethodObjArgs( + self.ptr, + UTCOFFSET_METHOD_STR, + std::ptr::null_mut() as *mut pyo3::ffi::PyObject + )); + offset_second = ffi!(PyDateTime_DELTA_GET_SECONDS(offset)) as i32; + offset_day = ffi!(PyDateTime_DELTA_GET_DAYS(offset)); + } else if ffi!(PyObject_HasAttr(tzinfo, NORMALIZE_METHOD_STR)) == 1 { + // pytz + let method_ptr = ffi!(PyObject_CallMethodObjArgs( + tzinfo, + NORMALIZE_METHOD_STR, + self.ptr, + std::ptr::null_mut() as *mut pyo3::ffi::PyObject + )); + let offset = ffi!(PyObject_CallMethodObjArgs( + method_ptr, + UTCOFFSET_METHOD_STR, + std::ptr::null_mut() as *mut pyo3::ffi::PyObject + )); + offset_second = ffi!(PyDateTime_DELTA_GET_SECONDS(offset)) as i32; + offset_day = ffi!(PyDateTime_DELTA_GET_DAYS(offset)); + } else if ffi!(PyObject_HasAttr(tzinfo, DST_STR)) == 1 { + // dateutil/arrow, datetime.timezone.utc + let offset = ffi!(PyObject_CallMethodObjArgs( + tzinfo, + UTCOFFSET_METHOD_STR, + self.ptr, + std::ptr::null_mut() as *mut pyo3::ffi::PyObject + )); + offset_second = ffi!(PyDateTime_DELTA_GET_SECONDS(offset)) as i32; + offset_day = ffi!(PyDateTime_DELTA_GET_DAYS(offset)); + } else { + return Err(DateTimeError::LibraryUnsupported); + } + }; + + buf.extend_from_slice( + itoa::Buffer::new() + .format(ffi!(PyDateTime_GET_YEAR(self.ptr)) as i32) + .as_bytes(), + ); + buf.push(b'-'); + { + let month = ffi!(PyDateTime_GET_MONTH(self.ptr)) as u8; + write_double_digit!(buf, month); + } + buf.push(b'-'); + { + let day = ffi!(PyDateTime_GET_DAY(self.ptr)) as u8; + write_double_digit!(buf, day); + } + buf.push(b'T'); + { + let hour = ffi!(PyDateTime_DATE_GET_HOUR(self.ptr)) as u8; + write_double_digit!(buf, hour); + } + buf.push(b':'); + { + let minute = ffi!(PyDateTime_DATE_GET_MINUTE(self.ptr)) as u8; + write_double_digit!(buf, minute); + } + buf.push(b':'); + { + let second = ffi!(PyDateTime_DATE_GET_SECOND(self.ptr)) as u8; + write_double_digit!(buf, second); + } + if self.opts & OMIT_MICROSECONDS == 0 { + let microsecond = ffi!(PyDateTime_DATE_GET_MICROSECOND(self.ptr)) as u32; + write_microsecond!(buf, microsecond); + } + if has_tz || self.opts & NAIVE_UTC != 0 { + if offset_second == 0 { + if self.opts & UTC_Z != 0 { + buf.push(b'Z'); + } else { + buf.extend_from_slice(&[b'+', b'0', b'0', b':', b'0', b'0']); + } + } else { + if offset_day == -1 { + // datetime.timedelta(days=-1, seconds=68400) -> -05:00 + buf.push(b'-'); + offset_second = 86400 - offset_second; + } else { + // datetime.timedelta(seconds=37800) -> +10:30 + buf.push(b'+'); + } + { + let offset_minute = offset_second / 60; + let offset_hour = offset_minute / 60; + write_double_digit!(buf, offset_hour); + buf.push(b':'); + + let mut offset_minute_print = offset_minute % 60; + + { + // https://tools.ietf.org/html/rfc3339#section-5.8 + // "exactly 19 minutes and 32.13 seconds ahead of UTC" + // "closest representable UTC offset" + // "+20:00" + let offset_excess_second = + offset_second - (offset_minute_print * 60 + offset_hour * 3600); + if offset_excess_second >= 30 { + offset_minute_print += 1; + } + } + + if offset_minute_print < 10 { + buf.push(b'0'); + } + buf.extend_from_slice( + itoa::Buffer::new().format(offset_minute_print).as_bytes(), + ); + } + } + } + Ok(()) + } +} + +impl<'p> Serialize for DateTime { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + let mut buf: DateTimeBuffer = smallvec::SmallVec::with_capacity(32); + if self.write_buf(&mut buf).is_err() { + err!(DATETIME_LIBRARY_UNSUPPORTED) + } + serializer.serialize_str(str_from_slice!(buf.as_ptr(), buf.len())) + } +} diff --git a/src/serialize/default.rs b/src/serialize/default.rs new file mode 100644 index 0000000000000000000000000000000000000000..3376f44bf758ba01710f18c40227c9e23f9756cd_c3JjL3NlcmlhbGl6ZS9kZWZhdWx0LnJz --- /dev/null +++ b/src/serialize/default.rs @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + +use crate::opt::*; +use crate::serialize::encode::*; + +use serde::ser::{Serialize, Serializer}; +use std::ffi::CStr; + +use std::ptr::NonNull; + +macro_rules! obj_name { + ($obj:expr) => { + unsafe { CStr::from_ptr((*$obj).tp_name).to_string_lossy() } + }; +} + +pub struct DefaultSerializer { + ptr: *mut pyo3::ffi::PyObject, + opts: Opt, + default_calls: u8, + recursion: u8, + default: Option<NonNull<pyo3::ffi::PyObject>>, +} + +impl DefaultSerializer { + pub fn new( + ptr: *mut pyo3::ffi::PyObject, + opts: Opt, + default_calls: u8, + recursion: u8, + default: Option<NonNull<pyo3::ffi::PyObject>>, + ) -> Self { + DefaultSerializer { + ptr: ptr, + opts: opts, + default_calls: default_calls, + recursion: recursion, + default: default, + } + } +} + +impl<'p> Serialize for DefaultSerializer { + #[inline(never)] + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + match self.default { + Some(callable) => { + if unlikely!(self.default_calls == RECURSION_LIMIT) { + err!("default serializer exceeds recursion limit") + } + let default_obj = ffi!(PyObject_CallFunctionObjArgs( + callable.as_ptr(), + self.ptr, + std::ptr::null_mut() as *mut pyo3::ffi::PyObject + )); + if default_obj.is_null() { + err!(format_args!( + "Type is not JSON serializable: {}", + obj_name!(ob_type!(self.ptr)) + )) + } else if !ffi!(PyErr_Occurred()).is_null() { + err!(format_args!( + "Type raised exception in default function: {}", + obj_name!(ob_type!(self.ptr)) + )) + } else { + let res = SerializePyObject::new( + default_obj, + self.opts, + self.default_calls + 1, + self.recursion, + self.default, + ) + .serialize(serializer); + ffi!(Py_DECREF(default_obj)); + res + } + } + None => err!(format_args!( + "Type is not JSON serializable: {}", + obj_name!(ob_type!(self.ptr)) + )), + } + } +} diff --git a/src/serialize/dict.rs b/src/serialize/dict.rs new file mode 100644 index 0000000000000000000000000000000000000000..3376f44bf758ba01710f18c40227c9e23f9756cd_c3JjL3NlcmlhbGl6ZS9kaWN0LnJz --- /dev/null +++ b/src/serialize/dict.rs @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + +use crate::exc::*; +use crate::opt::*; +use crate::serialize::datetime::*; +use crate::serialize::encode::pyobject_to_obtype; +use crate::serialize::encode::*; +use crate::serialize::uuid::*; +use crate::typeref::*; +use crate::unicode::*; +use inlinable_string::InlinableString; +use serde::ser::{Serialize, SerializeMap, Serializer}; +use smallvec::SmallVec; +use std::ptr::NonNull; + +pub struct DictSortedKey { + ptr: *mut pyo3::ffi::PyObject, + opts: Opt, + default_calls: u8, + recursion: u8, + default: Option<NonNull<pyo3::ffi::PyObject>>, + len: usize, +} + +impl DictSortedKey { + pub fn new( + ptr: *mut pyo3::ffi::PyObject, + opts: Opt, + default_calls: u8, + recursion: u8, + default: Option<NonNull<pyo3::ffi::PyObject>>, + len: usize, + ) -> Self { + DictSortedKey { + ptr: ptr, + opts: opts, + default_calls: default_calls, + recursion: recursion, + default: default, + len: len, + } + } +} + +impl<'p> Serialize for DictSortedKey { + #[inline(never)] + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + let mut items: SmallVec<[(&str, *mut pyo3::ffi::PyObject); 8]> = + SmallVec::with_capacity(self.len); + let mut pos = 0isize; + let mut str_size: pyo3::ffi::Py_ssize_t = 0; + let mut key: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); + let mut value: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); + for _ in 0..=self.len - 1 { + unsafe { + pyo3::ffi::_PyDict_Next( + self.ptr, + &mut pos, + &mut key, + &mut value, + std::ptr::null_mut(), + ) + }; + if unlikely!(ob_type!(key) != STR_TYPE) { + err!("Dict key must be str") + } + let data = read_utf8_from_str(key, &mut str_size); + if unlikely!(data.is_null()) { + err!(INVALID_STR) + } + items.push((str_from_slice!(data, str_size), value)); + } + + items.sort_unstable_by(|a, b| a.0.cmp(b.0)); + + let mut map = serializer.serialize_map(None).unwrap(); + for (key, val) in items.iter() { + map.serialize_entry( + key, + &SerializePyObject::new( + *val, + self.opts, + self.default_calls, + self.recursion + 1, + self.default, + ), + )?; + } + map.end() + } +} + +enum NonStrError { + DatetimeLibraryUnsupported, + IntegerRange, + InvalidStr, + TimeTzinfo, + UnsupportedType, +} + +pub struct NonStrKey { + ptr: *mut pyo3::ffi::PyObject, + opts: Opt, + default_calls: u8, + recursion: u8, + default: Option<NonNull<pyo3::ffi::PyObject>>, + len: usize, +} + +impl NonStrKey { + pub fn new( + ptr: *mut pyo3::ffi::PyObject, + opts: Opt, + default_calls: u8, + recursion: u8, + default: Option<NonNull<pyo3::ffi::PyObject>>, + len: usize, + ) -> Self { + NonStrKey { + ptr: ptr, + opts: opts, + default_calls: default_calls, + recursion: recursion, + default: default, + len: len, + } + } + + fn pyobject_to_string( + &self, + key: *mut pyo3::ffi::PyObject, + opts: crate::opt::Opt, + ) -> Result<InlinableString, NonStrError> { + match pyobject_to_obtype(key, opts) { + ObType::None => Ok(InlinableString::from("null")), + ObType::Bool => { + let key_as_str: &str; + if unsafe { key == TRUE } { + key_as_str = "true"; + } else { + key_as_str = "false"; + } + Ok(InlinableString::from(key_as_str)) + } + ObType::Int => { + let val = ffi!(PyLong_AsLongLong(key)); + if unlikely!(val == -1 && !pyo3::ffi::PyErr_Occurred().is_null()) { + return Err(NonStrError::IntegerRange); + } + Ok(InlinableString::from(itoa::Buffer::new().format(val))) + } + ObType::Float => { + let val = ffi!(PyFloat_AS_DOUBLE(key)); + if !val.is_finite() { + Ok(InlinableString::from("null")) + } else { + Ok(InlinableString::from(ryu::Buffer::new().format_finite(val))) + } + } + ObType::Datetime => { + let mut buf: DateTimeBuffer = smallvec::SmallVec::with_capacity(32); + let dt = DateTime::new(key, opts); + if dt.write_buf(&mut buf).is_err() { + return Err(NonStrError::DatetimeLibraryUnsupported); + } + let key_as_str = str_from_slice!(buf.as_ptr(), buf.len()); + Ok(InlinableString::from(key_as_str)) + } + ObType::Date => { + let mut buf: DateTimeBuffer = smallvec::SmallVec::with_capacity(32); + Date::new(key).write_buf(&mut buf); + let key_as_str = str_from_slice!(buf.as_ptr(), buf.len()); + Ok(InlinableString::from(key_as_str)) + } + ObType::Time => match Time::new(key, opts) { + Ok(val) => { + let mut buf: DateTimeBuffer = smallvec::SmallVec::with_capacity(32); + val.write_buf(&mut buf); + let key_as_str = str_from_slice!(buf.as_ptr(), buf.len()); + Ok(InlinableString::from(key_as_str)) + } + Err(TimeError::HasTimezone) => Err(NonStrError::TimeTzinfo), + }, + ObType::Uuid => { + let mut buf: UUIDBuffer = smallvec::SmallVec::with_capacity(64); + UUID::new(key).write_buf(&mut buf); + let key_as_str = str_from_slice!(buf.as_ptr(), buf.len()); + Ok(InlinableString::from(key_as_str)) + } + ObType::Enum => { + let value = ffi!(PyObject_GetAttr(key, VALUE_STR)); + ffi!(Py_DECREF(value)); + self.pyobject_to_string(value, opts) + } + ObType::Str => { + // because of ObType::Enum + let mut str_size: pyo3::ffi::Py_ssize_t = 0; + let uni = read_utf8_from_str(key, &mut str_size); + if unlikely!(uni.is_null()) { + Err(NonStrError::InvalidStr) + } else { + Ok(InlinableString::from(str_from_slice!(uni, str_size))) + } + } + ObType::StrSubclass => { + let mut str_size: pyo3::ffi::Py_ssize_t = 0; + let uni = ffi!(PyUnicode_AsUTF8AndSize(key, &mut str_size)) as *const u8; + if unlikely!(uni.is_null()) { + Err(NonStrError::InvalidStr) + } else { + Ok(InlinableString::from(str_from_slice!(uni, str_size))) + } + } + ObType::Tuple + | ObType::Array + | ObType::Dict + | ObType::List + | ObType::Dataclass + | ObType::Unknown => Err(NonStrError::UnsupportedType), + } + } +} + +impl<'p> Serialize for NonStrKey { + #[inline(never)] + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + let mut items: SmallVec<[(InlinableString, *mut pyo3::ffi::PyObject); 8]> = + SmallVec::with_capacity(self.len); + let mut pos = 0isize; + let mut str_size: pyo3::ffi::Py_ssize_t = 0; + let mut key: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); + let mut value: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); + let opts = self.opts & NOT_PASSTHROUGH; + for _ in 0..=self.len - 1 { + unsafe { + pyo3::ffi::_PyDict_Next( + self.ptr, + &mut pos, + &mut key, + &mut value, + std::ptr::null_mut(), + ) + }; + if is_type!(ob_type!(key), STR_TYPE) { + let data = read_utf8_from_str(key, &mut str_size); + if unlikely!(data.is_null()) { + err!(INVALID_STR) + } + items.push(( + InlinableString::from(str_from_slice!(data, str_size)), + value, + )); + } else { + match self.pyobject_to_string(key, opts) { + Ok(key_as_str) => items.push((key_as_str, value)), + Err(NonStrError::TimeTzinfo) => err!(TIME_HAS_TZINFO), + Err(NonStrError::IntegerRange) => { + err!("Dict integer key must be within 64-bit range") + } + Err(NonStrError::DatetimeLibraryUnsupported) => { + err!(DATETIME_LIBRARY_UNSUPPORTED) + } + Err(NonStrError::InvalidStr) => err!(INVALID_STR), + Err(NonStrError::UnsupportedType) => { + err!("Dict key must a type serializable with OPT_NON_STR_KEYS") + } + } + } + } + + if opts & SORT_KEYS != 0 { + items.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + } + + let mut map = serializer.serialize_map(None).unwrap(); + for (key, val) in items.iter() { + map.serialize_entry( + str_from_slice!(key.as_ptr(), key.len()), + &SerializePyObject::new( + *val, + self.opts, + self.default_calls, + self.recursion + 1, + self.default, + ), + )?; + } + map.end() + } +} diff --git a/src/serialize/encode.rs b/src/serialize/encode.rs new file mode 100644 index 0000000000000000000000000000000000000000..3376f44bf758ba01710f18c40227c9e23f9756cd_c3JjL3NlcmlhbGl6ZS9lbmNvZGUucnM= --- /dev/null +++ b/src/serialize/encode.rs @@ -0,0 +1,460 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + +use crate::exc::*; +use crate::ffi::PyDict_GET_SIZE; +use crate::ffi::*; +use crate::opt::*; +use crate::serialize::dataclass::*; +use crate::serialize::datetime::*; +use crate::serialize::default::*; +use crate::serialize::dict::*; +use crate::serialize::numpy::*; +use crate::serialize::tuple::*; +use crate::serialize::uuid::*; +use crate::serialize::writer::*; +use crate::typeref::*; +use crate::unicode::*; +use serde::ser::{Serialize, SerializeMap, SerializeSeq, Serializer}; +use std::io::Write; +use std::ptr::NonNull; + +// https://tools.ietf.org/html/rfc7159#section-6 +// "[-(2**53)+1, (2**53)-1]" +const STRICT_INT_MIN: i64 = -9007199254740991; +const STRICT_INT_MAX: i64 = 9007199254740991; + +pub const RECURSION_LIMIT: u8 = 255; + +pub fn serialize( + ptr: *mut pyo3::ffi::PyObject, + default: Option<NonNull<pyo3::ffi::PyObject>>, + opts: Opt, +) -> Result<NonNull<pyo3::ffi::PyObject>, String> { + let mut buf = BytesWriter::new(); + let obtype = pyobject_to_obtype(ptr, opts); + match obtype { + ObType::List | ObType::Dict | ObType::Dataclass | ObType::Array => { + buf.resize(1024); + } + _ => {} + } + buf.prefetch(); + let obj = SerializePyObject::with_obtype(ptr, obtype, opts, 0, 0, default); + let res; + if likely!(opts & INDENT_2 != INDENT_2) { + res = serde_json::to_writer(&mut buf, &obj); + } else { + res = serde_json::to_writer_pretty(&mut buf, &obj); + } + match res { + Ok(_) => { + if opts & APPEND_NEWLINE != 0 { + buf.write(b"\n").unwrap(); + } + Ok(buf.finish()) + } + Err(err) => { + ffi!(_Py_Dealloc(buf.finish().as_ptr())); + Err(err.to_string()) + } + } +} + +#[derive(Copy, Clone)] +pub enum ObType { + Str, + Int, + Bool, + None, + Float, + List, + Dict, + Datetime, + Date, + Time, + Tuple, + Uuid, + Dataclass, + Array, + Enum, + StrSubclass, + Unknown, +} + +#[inline] +pub fn pyobject_to_obtype(obj: *mut pyo3::ffi::PyObject, opts: Opt) -> ObType { + unsafe { + let ob_type = ob_type!(obj); + if ob_type == STR_TYPE { + ObType::Str + } else if ob_type == INT_TYPE { + ObType::Int + } else if ob_type == BOOL_TYPE { + ObType::Bool + } else if ob_type == NONE_TYPE { + ObType::None + } else if ob_type == FLOAT_TYPE { + ObType::Float + } else if ob_type == LIST_TYPE { + ObType::List + } else if ob_type == DICT_TYPE { + ObType::Dict + } else if ob_type == DATETIME_TYPE && opts & PASSTHROUGH_DATETIME == 0 { + ObType::Datetime + } else { + pyobject_to_obtype_unlikely(obj, opts) + } + } +} + +macro_rules! is_subclass { + ($ob_type:expr, $flag:ident) => { + unsafe { (((*$ob_type).tp_flags & pyo3::ffi::$flag) != 0) } + }; +} + +#[inline(never)] +pub fn pyobject_to_obtype_unlikely(obj: *mut pyo3::ffi::PyObject, opts: Opt) -> ObType { + unsafe { + let ob_type = ob_type!(obj); + if ob_type == DATE_TYPE && opts & PASSTHROUGH_DATETIME == 0 { + ObType::Date + } else if ob_type == TIME_TYPE && opts & PASSTHROUGH_DATETIME == 0 { + ObType::Time + } else if ob_type == TUPLE_TYPE { + ObType::Tuple + } else if ob_type == UUID_TYPE { + ObType::Uuid + } else if (*(ob_type as *mut LocalPyTypeObject)).ob_type == ENUM_TYPE { + ObType::Enum + } else if is_subclass!(ob_type, Py_TPFLAGS_UNICODE_SUBCLASS) + && opts & PASSTHROUGH_SUBCLASS == 0 + { + ObType::StrSubclass + } else if is_subclass!(ob_type, Py_TPFLAGS_LONG_SUBCLASS) + && opts & PASSTHROUGH_SUBCLASS == 0 + { + ObType::Int + } else if is_subclass!(ob_type, Py_TPFLAGS_LIST_SUBCLASS) + && opts & PASSTHROUGH_SUBCLASS == 0 + { + ObType::List + } else if is_subclass!(ob_type, Py_TPFLAGS_DICT_SUBCLASS) + && opts & PASSTHROUGH_SUBCLASS == 0 + { + ObType::Dict + } else if ffi!(PyDict_Contains((*ob_type).tp_dict, DATACLASS_FIELDS_STR)) == 1 { + ObType::Dataclass + } else if opts & SERIALIZE_NUMPY != 0 + && ARRAY_TYPE.is_some() + && ob_type == ARRAY_TYPE.unwrap().as_ptr() + { + ObType::Array + } else { + ObType::Unknown + } + } +} + +pub struct SerializePyObject { + ptr: *mut pyo3::ffi::PyObject, + obtype: ObType, + opts: Opt, + default_calls: u8, + recursion: u8, + default: Option<NonNull<pyo3::ffi::PyObject>>, +} + +impl SerializePyObject { + #[inline] + pub fn new( + ptr: *mut pyo3::ffi::PyObject, + opts: Opt, + default_calls: u8, + recursion: u8, + default: Option<NonNull<pyo3::ffi::PyObject>>, + ) -> Self { + SerializePyObject { + ptr: ptr, + obtype: pyobject_to_obtype(ptr, opts), + opts: opts, + default_calls: default_calls, + recursion: recursion, + default: default, + } + } + + #[inline] + pub fn with_obtype( + ptr: *mut pyo3::ffi::PyObject, + obtype: ObType, + opts: Opt, + default_calls: u8, + recursion: u8, + default: Option<NonNull<pyo3::ffi::PyObject>>, + ) -> Self { + SerializePyObject { + ptr: ptr, + obtype: obtype, + opts: opts, + default_calls: default_calls, + recursion: recursion, + default: default, + } + } +} + +impl<'p> Serialize for SerializePyObject { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + match self.obtype { + ObType::Str => { + let mut str_size: pyo3::ffi::Py_ssize_t = 0; + let uni = read_utf8_from_str(self.ptr, &mut str_size); + if unlikely!(uni.is_null()) { + err!(INVALID_STR) + } + serializer.serialize_str(str_from_slice!(uni, str_size)) + } + ObType::StrSubclass => { + let mut str_size: pyo3::ffi::Py_ssize_t = 0; + let uni = ffi!(PyUnicode_AsUTF8AndSize(self.ptr, &mut str_size)) as *const u8; + if unlikely!(uni.is_null()) { + err!(INVALID_STR) + } + serializer.serialize_str(str_from_slice!(uni, str_size)) + } + ObType::Int => { + let val = ffi!(PyLong_AsLongLong(self.ptr)); + if unlikely!(val == -1) && !ffi!(PyErr_Occurred()).is_null() { + err!("Integer exceeds 64-bit range") + } else if unlikely!(self.opts & STRICT_INTEGER != 0) + && (val > STRICT_INT_MAX || val < STRICT_INT_MIN) + { + err!("Integer exceeds 53-bit range") + } + serializer.serialize_i64(val) + } + ObType::None => serializer.serialize_unit(), + ObType::Float => serializer.serialize_f64(ffi!(PyFloat_AS_DOUBLE(self.ptr))), + ObType::Bool => serializer.serialize_bool(unsafe { self.ptr == TRUE }), + ObType::Datetime => DateTime::new(self.ptr, self.opts).serialize(serializer), + ObType::Date => Date::new(self.ptr).serialize(serializer), + ObType::Time => match Time::new(self.ptr, self.opts) { + Ok(val) => val.serialize(serializer), + Err(TimeError::HasTimezone) => err!(TIME_HAS_TZINFO), + }, + ObType::Uuid => UUID::new(self.ptr).serialize(serializer), + ObType::Dict => { + if unlikely!(self.recursion == RECURSION_LIMIT) { + err!(RECURSION_LIMIT_REACHED) + } + let len = unsafe { PyDict_GET_SIZE(self.ptr) as usize }; + if unlikely!(len == 0) { + serializer.serialize_map(Some(0)).unwrap().end() + } else if likely!(self.opts & SORT_OR_NON_STR_KEYS == 0) { + let mut map = serializer.serialize_map(None).unwrap(); + let mut pos = 0isize; + let mut str_size: pyo3::ffi::Py_ssize_t = 0; + let mut key: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); + let mut value: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); + for _ in 0..=len - 1 { + unsafe { + pyo3::ffi::_PyDict_Next( + self.ptr, + &mut pos, + &mut key, + &mut value, + std::ptr::null_mut(), + ) + }; + if unlikely!(ob_type!(key) != STR_TYPE) { + err!(KEY_MUST_BE_STR) + } + { + let data = read_utf8_from_str(key, &mut str_size); + if unlikely!(data.is_null()) { + err!(INVALID_STR) + } + map.serialize_key(str_from_slice!(data, str_size)).unwrap(); + } + + map.serialize_value(&SerializePyObject::new( + value, + self.opts, + self.default_calls, + self.recursion + 1, + self.default, + ))?; + } + map.end() + } else if self.opts & NON_STR_KEYS != 0 { + NonStrKey::new( + self.ptr, + self.opts, + self.default_calls, + self.recursion, + self.default, + len, + ) + .serialize(serializer) + } else { + DictSortedKey::new( + self.ptr, + self.opts, + self.default_calls, + self.recursion, + self.default, + len, + ) + .serialize(serializer) + } + } + ObType::List => { + if unlikely!(self.recursion == RECURSION_LIMIT) { + err!(RECURSION_LIMIT_REACHED) + } + let len = ffi!(PyList_GET_SIZE(self.ptr)) as usize; + if len == 0 { + serializer.serialize_seq(Some(0)).unwrap().end() + } else { + let mut type_ptr = std::ptr::null_mut(); + let mut ob_type = ObType::Str; + + let mut seq = serializer.serialize_seq(None).unwrap(); + for i in 0..=len - 1 { + let elem = unsafe { + *(*(self.ptr as *mut pyo3::ffi::PyListObject)) + .ob_item + .offset(i as isize) + }; + if ob_type!(elem) != type_ptr { + type_ptr = ob_type!(elem); + ob_type = pyobject_to_obtype(elem, self.opts); + } + seq.serialize_element(&SerializePyObject::with_obtype( + elem, + ob_type, + self.opts, + self.default_calls, + self.recursion + 1, + self.default, + ))?; + } + seq.end() + } + } + ObType::Tuple => { + let mut seq = serializer.serialize_seq(None).unwrap(); + for elem in PyTupleIterator::new(self.ptr) { + seq.serialize_element(&SerializePyObject::new( + elem.as_ptr(), + self.opts, + self.default_calls, + self.recursion + 1, + self.default, + ))? + } + seq.end() + } + ObType::Dataclass => { + if unlikely!(self.recursion == RECURSION_LIMIT) { + err!(RECURSION_LIMIT_REACHED) + } + let dict = ffi!(PyObject_GetAttr(self.ptr, DICT_STR)); + if !dict.is_null() { + ffi!(Py_DECREF(dict)); + let len = unsafe { PyDict_GET_SIZE(dict) as usize }; + if unlikely!(len == 0) { + return serializer.serialize_map(Some(0)).unwrap().end(); + } + let mut map = serializer.serialize_map(None).unwrap(); + let mut pos = 0isize; + let mut str_size: pyo3::ffi::Py_ssize_t = 0; + let mut key: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); + let mut value: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); + for _ in 0..=len - 1 { + unsafe { + pyo3::ffi::_PyDict_Next( + dict, + &mut pos, + &mut key, + &mut value, + std::ptr::null_mut(), + ) + }; + if unlikely!(ob_type!(key) != STR_TYPE) { + err!(KEY_MUST_BE_STR) + } + { + let data = read_utf8_from_str(key, &mut str_size); + if unlikely!(data.is_null()) { + err!(INVALID_STR) + } + let key_as_str = str_from_slice!(data, str_size); + if unlikely!(key_as_str.as_bytes()[0] == b'_') { + continue; + } + map.serialize_key(key_as_str).unwrap(); + } + + map.serialize_value(&SerializePyObject::new( + value, + self.opts, + self.default_calls, + self.recursion + 1, + self.default, + ))?; + } + map.end() + } else { + unsafe { pyo3::ffi::PyErr_Clear() }; + DataclassSerializer::new( + self.ptr, + self.opts, + self.default_calls, + self.recursion, + self.default, + ) + .serialize(serializer) + } + } + ObType::Enum => { + let value = ffi!(PyObject_GetAttr(self.ptr, VALUE_STR)); + ffi!(Py_DECREF(value)); + SerializePyObject::new( + value, + self.opts, + self.default_calls, + self.recursion, + self.default, + ) + .serialize(serializer) + } + ObType::Array => match PyArray::new(self.ptr) { + Ok(val) => val.serialize(serializer), + Err(PyArrayError::Malformed) => err!("numpy array is malformed"), + Err(PyArrayError::NotContiguous) | Err(PyArrayError::UnsupportedDataType) => { + DefaultSerializer::new( + self.ptr, + self.opts, + self.default_calls, + self.recursion, + self.default, + ) + .serialize(serializer) + } + }, + ObType::Unknown => DefaultSerializer::new( + self.ptr, + self.opts, + self.default_calls, + self.recursion, + self.default, + ) + .serialize(serializer), + } + } +} diff --git a/src/serialize/mod.rs b/src/serialize/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..3376f44bf758ba01710f18c40227c9e23f9756cd_c3JjL3NlcmlhbGl6ZS9tb2QucnM= --- /dev/null +++ b/src/serialize/mod.rs @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + +mod dataclass; +mod datetime; +mod default; +mod dict; +mod encode; +mod numpy; +mod tuple; +mod uuid; +mod writer; + +pub use encode::serialize; diff --git a/src/serialize/numpy.rs b/src/serialize/numpy.rs new file mode 100644 index 0000000000000000000000000000000000000000..3376f44bf758ba01710f18c40227c9e23f9756cd_c3JjL3NlcmlhbGl6ZS9udW1weS5ycw== --- /dev/null +++ b/src/serialize/numpy.rs @@ -0,0 +1,340 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + +use crate::typeref::ARRAY_STRUCT_STR; +use pyo3::ffi::*; +use serde::ser::{Serialize, SerializeSeq, Serializer}; +use std::os::raw::{c_char, c_int, c_void}; + +macro_rules! slice { + ($ptr:expr, $size:expr) => { + unsafe { std::slice::from_raw_parts($ptr, $size) } + }; +} + +#[repr(C)] +pub struct PyCapsule { + pub ob_refcnt: Py_ssize_t, + pub ob_type: *mut PyTypeObject, + pub pointer: *mut c_void, + pub name: *const c_char, + pub context: *mut c_void, + pub destructor: *mut c_void, // should be typedef void (*PyCapsule_Destructor)(PyObject *); +} + +// https://docs.scipy.org/doc/numpy/reference/arrays.interface.html#c.__array_struct__ + +#[repr(C)] +pub struct PyArrayInterface { + pub two: c_int, + pub nd: c_int, + pub typekind: c_char, + pub itemsize: c_int, + pub flags: c_int, + pub shape: *mut Py_intptr_t, + pub strides: *mut Py_intptr_t, + pub data: *mut c_void, + pub descr: *mut PyObject, +} + +#[derive(Copy, Clone, PartialEq)] +pub enum ItemType { + BOOL, + F32, + F64, + I32, + I64, + U32, + U64, +} + +pub enum PyArrayError { + Malformed, + NotContiguous, + UnsupportedDataType, +} + +// >>> arr = numpy.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], numpy.int32) +// >>> arr.ndim +// 3 +// >>> arr.shape +// (2, 2, 2) +// >>> arr.strides +// (16, 8, 4) +pub struct PyArray { + array: *mut PyArrayInterface, + position: Vec<isize>, + children: Vec<PyArray>, + depth: usize, + capsule: *mut PyCapsule, +} + +impl<'a> PyArray { + #[cold] + pub fn new(ptr: *mut PyObject) -> Result<Self, PyArrayError> { + let capsule = ffi!(PyObject_GetAttr(ptr, ARRAY_STRUCT_STR)); + let array = unsafe { (*(capsule as *mut PyCapsule)).pointer as *mut PyArrayInterface }; + if unsafe { (*array).two != 2 } { + ffi!(Py_DECREF(capsule)); + Err(PyArrayError::Malformed) + } else if unsafe { (*array).flags } & 0x1 != 0x1 { + ffi!(Py_DECREF(capsule)); + Err(PyArrayError::NotContiguous) + } else { + let num_dimensions = unsafe { (*array).nd as usize }; + if num_dimensions == 0 { + return Err(PyArrayError::UnsupportedDataType); + } + let mut pyarray = PyArray { + array: array, + position: vec![0; num_dimensions], + children: Vec::with_capacity(num_dimensions), + depth: 0, + capsule: capsule as *mut PyCapsule, + }; + if pyarray.kind().is_none() { + Err(PyArrayError::UnsupportedDataType) + } else { + if pyarray.dimensions() > 1 { + pyarray.build(); + } + Ok(pyarray) + } + } + } + + fn from_parent(&self, position: Vec<isize>, num_children: usize) -> Self { + let mut arr = PyArray { + array: self.array, + position: position, + children: Vec::with_capacity(num_children), + depth: self.depth + 1, + capsule: self.capsule, + }; + arr.build(); + arr + } + + fn kind(&self) -> Option<ItemType> { + match unsafe { ((*self.array).typekind, (*self.array).itemsize) } { + (098, 1) => Some(ItemType::BOOL), + (102, 4) => Some(ItemType::F32), + (102, 8) => Some(ItemType::F64), + (105, 4) => Some(ItemType::I32), + (105, 8) => Some(ItemType::I64), + (117, 4) => Some(ItemType::U32), + (117, 8) => Some(ItemType::U64), + _ => None, + } + } + + fn build(&mut self) { + if self.depth < self.dimensions() - 1 { + for i in 0..=self.shape()[self.depth] - 1 { + let mut position: Vec<isize> = self.position.iter().copied().collect(); + position[self.depth] = i; + let num_children: usize; + if self.depth < self.dimensions() - 2 { + num_children = self.shape()[self.depth + 1] as usize; + } else { + num_children = 0; + } + self.children.push(self.from_parent(position, num_children)) + } + } + } + + fn data(&self) -> *mut c_void { + let offset = self + .strides() + .iter() + .zip(self.position.iter().copied()) + .take(self.depth) + .map(|(a, b)| a * b) + .sum::<isize>(); + unsafe { (*self.array).data.offset(offset) } + } + + fn num_items(&self) -> usize { + self.shape()[self.shape().len() - 1] as usize + } + + fn dimensions(&self) -> usize { + unsafe { (*self.array).nd as usize } + } + + fn shape(&self) -> &[isize] { + slice!((*self.array).shape as *const isize, self.dimensions()) + } + + fn strides(&self) -> &[isize] { + slice!((*self.array).strides as *const isize, self.dimensions()) + } +} + +impl Drop for PyArray { + fn drop(&mut self) { + if self.depth == 0 { + ffi!(Py_XDECREF(self.capsule as *mut pyo3::ffi::PyObject)) + } + } +} + +impl<'p> Serialize for PyArray { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + let mut seq = serializer.serialize_seq(None).unwrap(); + if !self.children.is_empty() { + for child in &self.children { + seq.serialize_element(child).unwrap(); + } + } else { + let data_ptr = self.data(); + let num_items = self.num_items(); + match self.kind().unwrap() { + ItemType::F64 => { + let slice: &[f64] = slice!(data_ptr as *const f64, num_items); + for &each in slice.iter() { + seq.serialize_element(&DataTypeF64 { obj: each }).unwrap(); + } + } + ItemType::F32 => { + let slice: &[f32] = slice!(data_ptr as *const f32, num_items); + for &each in slice.iter() { + seq.serialize_element(&DataTypeF32 { obj: each }).unwrap(); + } + } + ItemType::I64 => { + let slice: &[i64] = slice!(data_ptr as *const i64, num_items); + for &each in slice.iter() { + seq.serialize_element(&DataTypeI64 { obj: each }).unwrap(); + } + } + ItemType::I32 => { + let slice: &[i32] = slice!(data_ptr as *const i32, num_items); + for &each in slice.iter() { + seq.serialize_element(&DataTypeI32 { obj: each }).unwrap(); + } + } + ItemType::U64 => { + let slice: &[u64] = slice!(data_ptr as *const u64, num_items); + for &each in slice.iter() { + seq.serialize_element(&DataTypeU64 { obj: each }).unwrap(); + } + } + ItemType::U32 => { + let slice: &[u32] = slice!(data_ptr as *const u32, num_items); + for &each in slice.iter() { + seq.serialize_element(&DataTypeU32 { obj: each }).unwrap(); + } + } + ItemType::BOOL => { + let slice: &[u8] = slice!(data_ptr as *const u8, num_items); + for &each in slice.iter() { + seq.serialize_element(&DataTypeBOOL { obj: each }).unwrap(); + } + } + } + } + seq.end() + } +} + +#[repr(transparent)] +struct DataTypeF32 { + pub obj: f32, +} + +impl<'p> Serialize for DataTypeF32 { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + serializer.serialize_f32(self.obj) + } +} + +#[repr(transparent)] +struct DataTypeF64 { + pub obj: f64, +} + +impl<'p> Serialize for DataTypeF64 { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + serializer.serialize_f64(self.obj) + } +} + +#[repr(transparent)] +struct DataTypeI32 { + pub obj: i32, +} + +impl<'p> Serialize for DataTypeI32 { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + serializer.serialize_i32(self.obj) + } +} + +#[repr(transparent)] +struct DataTypeI64 { + pub obj: i64, +} + +impl<'p> Serialize for DataTypeI64 { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + serializer.serialize_i64(self.obj) + } +} + +#[repr(transparent)] +struct DataTypeU32 { + pub obj: u32, +} + +impl<'p> Serialize for DataTypeU32 { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + serializer.serialize_u32(self.obj) + } +} + +#[repr(transparent)] +struct DataTypeU64 { + pub obj: u64, +} + +impl<'p> Serialize for DataTypeU64 { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + serializer.serialize_u64(self.obj) + } +} + +#[repr(transparent)] +struct DataTypeBOOL { + pub obj: u8, +} + +impl<'p> Serialize for DataTypeBOOL { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + serializer.serialize_bool(self.obj == 1) + } +} diff --git a/src/serialize/tuple.rs b/src/serialize/tuple.rs new file mode 100644 index 0000000000000000000000000000000000000000..3376f44bf758ba01710f18c40227c9e23f9756cd_c3JjL3NlcmlhbGl6ZS90dXBsZS5ycw== --- /dev/null +++ b/src/serialize/tuple.rs @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + +use std::ptr::NonNull; + +pub struct PyTupleIterator { + list: *mut pyo3::ffi::PyObject, + len: isize, + idx: isize, +} + +impl PyTupleIterator { + pub fn new(list: *mut pyo3::ffi::PyObject) -> Self { + PyTupleIterator { + list: list, + len: ffi!(PyTuple_GET_SIZE(list)), + idx: 0, + } + } +} + +impl Iterator for PyTupleIterator { + type Item = NonNull<pyo3::ffi::PyObject>; + + #[inline] + fn next(&mut self) -> Option<NonNull<pyo3::ffi::PyObject>> { + if self.len == self.idx { + None + } else { + let item = nonnull!(ffi!(PyTuple_GET_ITEM(self.list, self.idx as isize))); + self.idx += 1; + Some(item) + } + } +} diff --git a/src/serialize/uuid.rs b/src/serialize/uuid.rs new file mode 100644 index 0000000000000000000000000000000000000000..3376f44bf758ba01710f18c40227c9e23f9756cd_c3JjL3NlcmlhbGl6ZS91dWlkLnJz --- /dev/null +++ b/src/serialize/uuid.rs @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + +use crate::typeref::*; +use serde::ser::{Serialize, Serializer}; +use smallvec::SmallVec; +use std::io::Write; +use std::os::raw::c_uchar; + +pub type UUIDBuffer = smallvec::SmallVec<[u8; 64]>; + +pub struct UUID { + ptr: *mut pyo3::ffi::PyObject, +} + +impl UUID { + pub fn new(ptr: *mut pyo3::ffi::PyObject) -> Self { + UUID { ptr: ptr } + } + pub fn write_buf(&self, buf: &mut UUIDBuffer) { + let value: u128; + { + // test_uuid_immutable, test_uuid_int + let py_int = ffi!(PyObject_GetAttr(self.ptr, INT_ATTR_STR)); + ffi!(Py_DECREF(py_int)); + let buffer: [c_uchar; 16] = [0; 16]; + unsafe { + // test_uuid_overflow + pyo3::ffi::_PyLong_AsByteArray( + py_int as *mut pyo3::ffi::PyLongObject, + buffer.as_ptr() as *const c_uchar, + 16, + 1, // little_endian + 0, // is_signed + ) + }; + value = u128::from_le_bytes(buffer); + } + + let mut hexadecimal: SmallVec<[u8; 32]> = SmallVec::with_capacity(32); + write!(hexadecimal, "{:032x}", value).unwrap(); + + buf.extend_from_slice(&hexadecimal[..8]); + buf.push(b'-'); + buf.extend_from_slice(&hexadecimal[8..12]); + buf.push(b'-'); + buf.extend_from_slice(&hexadecimal[12..16]); + buf.push(b'-'); + buf.extend_from_slice(&hexadecimal[16..20]); + buf.push(b'-'); + buf.extend_from_slice(&hexadecimal[20..]); + } +} +impl<'p> Serialize for UUID { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + let mut buf: UUIDBuffer = smallvec::SmallVec::with_capacity(64); + self.write_buf(&mut buf); + serializer.serialize_str(str_from_slice!(buf.as_ptr(), buf.len())) + } +} diff --git a/src/serialize/writer.rs b/src/serialize/writer.rs new file mode 100644 index 0000000000000000000000000000000000000000..3376f44bf758ba01710f18c40227c9e23f9756cd_c3JjL3NlcmlhbGl6ZS93cml0ZXIucnM= --- /dev/null +++ b/src/serialize/writer.rs @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + +use crate::ffi::PyBytesObject; +use core::ptr::NonNull; +use pyo3::ffi::*; +use std::os::raw::c_char; + +pub struct BytesWriter { + cap: usize, + len: usize, + bytes: *mut PyBytesObject, +} + +impl BytesWriter { + #[inline] + pub fn new() -> Self { + let buf = [0; 64]; + BytesWriter { + cap: 64, + len: 0, + bytes: unsafe { PyBytes_FromStringAndSize(buf.as_ptr(), 64) as *mut PyBytesObject }, + } + } + + #[inline] + pub fn finish(&mut self) -> NonNull<PyObject> { + unsafe { + (*self.bytes).ob_size = self.len as isize; + self.resize(self.len as isize); + NonNull::new_unchecked(self.bytes as *mut PyObject) + } + } + + #[inline] + fn buffer_ptr(&self) -> *mut u8 { + unsafe { + std::mem::transmute::<&[c_char; 1], *mut u8>( + &(*self.bytes.cast::<PyBytesObject>()).ob_sval, + ) + .offset(self.len as isize) + } + } + + #[inline] + pub fn resize(&mut self, len: isize) { + unsafe { + _PyBytes_Resize( + &mut self.bytes as *mut *mut PyBytesObject as *mut *mut PyObject, + len as isize, + ); + } + } + + #[inline] + pub fn prefetch(&self) { + unsafe { core::intrinsics::prefetch_write_data(self.buffer_ptr(), 3) }; + } + + #[inline] + fn grow(&mut self, len: usize) { + while self.cap - self.len < len { + self.cap *= 2; + } + self.resize(self.cap as isize); + } +} + +impl std::io::Write for BytesWriter { + #[inline] + fn write(&mut self, buf: &[u8]) -> std::result::Result<usize, std::io::Error> { + let to_write = buf.len(); + if unlikely!(self.len + to_write > self.cap) { + self.grow(to_write); + } + unsafe { + std::ptr::copy_nonoverlapping(buf.as_ptr() as *const u8, self.buffer_ptr(), to_write); + }; + self.len += to_write; + Ok(to_write) + } + #[inline] + fn write_all(&mut self, buf: &[u8]) -> std::result::Result<(), std::io::Error> { + self.write(buf).unwrap(); + Ok(()) + } + #[inline] + fn flush(&mut self) -> std::result::Result<(), std::io::Error> { + Ok(()) + } +} diff --git a/src/typeref.rs b/src/typeref.rs index 25d0d125f5d28c737c3310adb54dcf4ea3b6639c_c3JjL3R5cGVyZWYucnM=..3376f44bf758ba01710f18c40227c9e23f9756cd_c3JjL3R5cGVyZWYucnM= 100644 --- a/src/typeref.rs +++ b/src/typeref.rs @@ -56,8 +56,8 @@ pub fn init_typerefs() { INIT.call_once(|| unsafe { - assert!(crate::decode::KEY_MAP - .set(crate::decode::KeyMap::default()) + assert!(crate::deserialize::KEY_MAP + .set(crate::deserialize::KeyMap::default()) .is_ok()); PyDateTime_IMPORT(); NONE = Py_None(); diff --git a/src/uuid.rs b/src/uuid.rs deleted file mode 100644 index 25d0d125f5d28c737c3310adb54dcf4ea3b6639c_c3JjL3V1aWQucnM=..0000000000000000000000000000000000000000 --- a/src/uuid.rs +++ /dev/null @@ -1,62 +0,0 @@ -// SPDX-License-Identifier: (Apache-2.0 OR MIT) - -use crate::typeref::*; -use serde::ser::{Serialize, Serializer}; -use smallvec::SmallVec; -use std::io::Write; -use std::os::raw::c_uchar; - -pub type UUIDBuffer = smallvec::SmallVec<[u8; 64]>; - -pub struct UUID { - ptr: *mut pyo3::ffi::PyObject, -} - -impl UUID { - pub fn new(ptr: *mut pyo3::ffi::PyObject) -> Self { - UUID { ptr: ptr } - } - pub fn write_buf(&self, buf: &mut UUIDBuffer) { - let value: u128; - { - // test_uuid_immutable, test_uuid_int - let py_int = ffi!(PyObject_GetAttr(self.ptr, INT_ATTR_STR)); - ffi!(Py_DECREF(py_int)); - let buffer: [c_uchar; 16] = [0; 16]; - unsafe { - // test_uuid_overflow - pyo3::ffi::_PyLong_AsByteArray( - py_int as *mut pyo3::ffi::PyLongObject, - buffer.as_ptr() as *const c_uchar, - 16, - 1, // little_endian - 0, // is_signed - ) - }; - value = u128::from_le_bytes(buffer); - } - - let mut hexadecimal: SmallVec<[u8; 32]> = SmallVec::with_capacity(32); - write!(hexadecimal, "{:032x}", value).unwrap(); - - buf.extend_from_slice(&hexadecimal[..8]); - buf.push(b'-'); - buf.extend_from_slice(&hexadecimal[8..12]); - buf.push(b'-'); - buf.extend_from_slice(&hexadecimal[12..16]); - buf.push(b'-'); - buf.extend_from_slice(&hexadecimal[16..20]); - buf.push(b'-'); - buf.extend_from_slice(&hexadecimal[20..]); - } -} -impl<'p> Serialize for UUID { - fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - where - S: Serializer, - { - let mut buf: UUIDBuffer = smallvec::SmallVec::with_capacity(64); - self.write_buf(&mut buf); - serializer.serialize_str(str_from_slice!(buf.as_ptr(), buf.len())) - } -} diff --git a/src/writer.rs b/src/writer.rs deleted file mode 100644 index 25d0d125f5d28c737c3310adb54dcf4ea3b6639c_c3JjL3dyaXRlci5ycw==..0000000000000000000000000000000000000000 --- a/src/writer.rs +++ /dev/null @@ -1,90 +0,0 @@ -// SPDX-License-Identifier: (Apache-2.0 OR MIT) - -use crate::bytes::PyBytesObject; -use core::ptr::NonNull; -use pyo3::ffi::*; -use std::os::raw::c_char; - -pub struct BytesWriter { - cap: usize, - len: usize, - bytes: *mut PyBytesObject, -} - -impl BytesWriter { - #[inline] - pub fn new() -> Self { - let buf = [0; 64]; - BytesWriter { - cap: 64, - len: 0, - bytes: unsafe { PyBytes_FromStringAndSize(buf.as_ptr(), 64) as *mut PyBytesObject }, - } - } - - #[inline] - pub fn finish(&mut self) -> NonNull<PyObject> { - unsafe { - (*self.bytes).ob_size = self.len as isize; - self.resize(self.len as isize); - NonNull::new_unchecked(self.bytes as *mut PyObject) - } - } - - #[inline] - fn buffer_ptr(&self) -> *mut u8 { - unsafe { - std::mem::transmute::<&[c_char; 1], *mut u8>( - &(*self.bytes.cast::<PyBytesObject>()).ob_sval, - ) - .offset(self.len as isize) - } - } - - #[inline] - pub fn resize(&mut self, len: isize) { - unsafe { - _PyBytes_Resize( - &mut self.bytes as *mut *mut PyBytesObject as *mut *mut PyObject, - len as isize, - ); - } - } - - #[inline] - pub fn prefetch(&self) { - unsafe { core::intrinsics::prefetch_write_data(self.buffer_ptr(), 3) }; - } - - #[inline] - fn grow(&mut self, len: usize) { - while self.cap - self.len < len { - self.cap *= 2; - } - self.resize(self.cap as isize); - } -} - -impl std::io::Write for BytesWriter { - #[inline] - fn write(&mut self, buf: &[u8]) -> std::result::Result<usize, std::io::Error> { - let to_write = buf.len(); - if unlikely!(self.len + to_write > self.cap) { - self.grow(to_write); - } - unsafe { - std::ptr::copy_nonoverlapping(buf.as_ptr() as *const u8, self.buffer_ptr(), to_write); - }; - self.len += to_write; - Ok(to_write) - } - #[inline] - fn write_all(&mut self, buf: &[u8]) -> std::result::Result<(), std::io::Error> { - self.write(buf).unwrap(); - Ok(()) - } - #[inline] - fn flush(&mut self) -> std::result::Result<(), std::io::Error> { - Ok(()) - } -}