pyo3_bytes/
bytes.rs

1//! Support for Python buffer protocol
2
3use std::os::raw::c_int;
4use std::ptr::NonNull;
5
6use bytes::Bytes;
7
8use pyo3::buffer::PyBuffer;
9use pyo3::exceptions::PyValueError;
10use pyo3::ffi;
11use pyo3::prelude::*;
12
13/// A wrapper around a [`bytes::Bytes`][].
14///
15/// This implements both import and export via the Python buffer protocol.
16///
17/// ### Buffer protocol import
18///
19/// This can be very useful as a general way to support ingest of a Python buffer protocol object.
20///
21/// The underlying [Bytes] manages the external memory, automatically calling the Python
22/// buffer's release callback when the internal reference count reaches 0.
23///
24/// Note that converting this [`Bytes`] into a [BytesMut][::bytes::BytesMut] will always create a
25/// deep copy of the buffer into newly allocated memory, since this `Bytes` is constructed from an
26/// owner.
27///
28/// ### Buffer protocol export
29///
30/// PyBytes implements the Python buffer protocol to enable Python to access the underlying `Bytes`
31/// data view without copies. In Python, this `PyBytes` object can be passed to Python `bytes` or
32/// `memoryview` constructors, `numpy.frombuffer`, or any other function that supports buffer
33/// protocol input.
34#[pyclass(name = "Bytes", subclass, frozen)]
35#[derive(Debug, Hash, PartialEq, PartialOrd, Eq, Ord)]
36pub struct PyBytes(Bytes);
37
38impl AsRef<Bytes> for PyBytes {
39    fn as_ref(&self) -> &Bytes {
40        &self.0
41    }
42}
43
44impl AsRef<[u8]> for PyBytes {
45    fn as_ref(&self) -> &[u8] {
46        self.0.as_ref()
47    }
48}
49
50impl PyBytes {
51    /// Construct a new [PyBytes]
52    pub fn new(buffer: Bytes) -> Self {
53        Self(buffer)
54    }
55
56    /// Consume and return the [Bytes]
57    pub fn into_inner(self) -> Bytes {
58        self.0
59    }
60}
61
62impl From<PyBytes> for Bytes {
63    fn from(value: PyBytes) -> Self {
64        value.0
65    }
66}
67
68impl From<Bytes> for PyBytes {
69    fn from(value: Bytes) -> Self {
70        PyBytes(value)
71    }
72}
73
74#[pymethods]
75impl PyBytes {
76    // By setting the argument to PyBytes, this means that any buffer-protocol object is supported
77    // here, since it will use the FromPyObject impl.
78    #[new]
79    fn py_new(buf: PyBytes) -> Self {
80        buf
81    }
82
83    /// Copy this buffer's contents to a Python `bytes` object
84    fn to_bytes<'py>(&'py self, py: Python<'py>) -> Bound<'py, pyo3::types::PyBytes> {
85        pyo3::types::PyBytes::new(py, &self.0)
86    }
87
88    /// The number of bytes in this Bytes
89    fn __len__(&self) -> usize {
90        self.0.len()
91    }
92
93    fn __repr__(&self) -> String {
94        format!("Bytes(len={})", self.0.len())
95    }
96
97    /// This is taken from opendal:
98    /// https://github.com/apache/opendal/blob/d001321b0f9834bc1e2e7d463bcfdc3683e968c9/bindings/python/src/utils.rs#L51-L72
99    unsafe fn __getbuffer__(
100        slf: PyRef<Self>,
101        view: *mut ffi::Py_buffer,
102        flags: c_int,
103    ) -> PyResult<()> {
104        let bytes = slf.0.as_ref();
105        let ret = ffi::PyBuffer_FillInfo(
106            view,
107            slf.as_ptr() as *mut _,
108            bytes.as_ptr() as *mut _,
109            bytes.len().try_into().unwrap(),
110            1, // read only
111            flags,
112        );
113        if ret == -1 {
114            return Err(PyErr::fetch(slf.py()));
115        }
116        Ok(())
117    }
118
119    // Comment from david hewitt on discord:
120    // > I think normally `__getbuffer__` takes a pointer to the owning Python object, so you
121    // > don't need to treat the allocation as owned separately. It should be good enough to keep
122    // > the allocation owned by the object.
123    // https://discord.com/channels/1209263839632424990/1324816949464666194/1328299411427557397
124    unsafe fn __releasebuffer__(&self, _view: *mut ffi::Py_buffer) {}
125}
126
127impl<'py> FromPyObject<'py> for PyBytes {
128    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
129        let buffer = ob.extract::<PyBytesWrapper>()?;
130        let bytes = Bytes::from_owner(buffer);
131        Ok(Self(bytes))
132    }
133}
134
135/// A wrapper around a PyBuffer that applies a custom destructor that checks if the Python
136/// interpreter is still initialized before freeing the buffer memory.
137///
138/// This also implements AsRef<[u8]> because that is required for Bytes::from_owner
139#[derive(Debug)]
140struct PyBytesWrapper(Option<PyBuffer<u8>>);
141
142impl Drop for PyBytesWrapper {
143    fn drop(&mut self) {
144        // Only call the underlying Drop of PyBuffer if the Python interpreter is still
145        // initialized. Sometimes the Drop can attempt to happen after the Python interpreter was
146        // already finalized.
147        // https://github.com/kylebarron/arro3/issues/230
148        let is_initialized = unsafe { ffi::Py_IsInitialized() };
149        if let Some(val) = self.0.take() {
150            if is_initialized == 0 {
151                std::mem::forget(val);
152            } else {
153                std::mem::drop(val);
154            }
155        }
156    }
157}
158
159impl AsRef<[u8]> for PyBytesWrapper {
160    fn as_ref(&self) -> &[u8] {
161        let buffer = self.0.as_ref().expect("Buffer already disposed");
162        let len = buffer.item_count();
163
164        let ptr = NonNull::new(buffer.buf_ptr() as _).expect("Expected buffer ptr to be non null");
165
166        // Safety:
167        //
168        // This requires that the data will not be mutated from Python. Sadly, the buffer protocol
169        // does not uphold this invariant always for us, and the Python user must take care not to
170        // mutate the provided buffer.
171        unsafe { std::slice::from_raw_parts(ptr.as_ptr() as *const u8, len) }
172    }
173}
174
175fn validate_buffer(buf: &PyBuffer<u8>) -> PyResult<()> {
176    if !buf.is_c_contiguous() {
177        return Err(PyValueError::new_err("Buffer is not C contiguous"));
178    }
179
180    if buf.shape().iter().any(|s| *s == 0) {
181        return Err(PyValueError::new_err("0-length dimension not supported."));
182    }
183
184    if buf.strides().iter().any(|s| *s == 0) {
185        return Err(PyValueError::new_err("Non-zero strides not supported."));
186    }
187
188    Ok(())
189}
190
191impl<'py> FromPyObject<'py> for PyBytesWrapper {
192    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
193        let buffer = ob.extract::<PyBuffer<u8>>()?;
194        validate_buffer(&buffer)?;
195        Ok(Self(Some(buffer)))
196    }
197}