pyo3_bytes/bytes.rs
1//! Support for Python buffer protocol
2
3use std::os::raw::c_int;
4use std::ptr::NonNull;
5
6use bytes::Bytes;
7
8use pyo3::buffer::PyBuffer;
9use pyo3::exceptions::PyValueError;
10use pyo3::ffi;
11use pyo3::prelude::*;
12
13/// A wrapper around a [`bytes::Bytes`][].
14///
15/// This implements both import and export via the Python buffer protocol.
16///
17/// ### Buffer protocol import
18///
19/// This can be very useful as a general way to support ingest of a Python buffer protocol object.
20///
21/// The underlying [Bytes] manages the external memory, automatically calling the Python
22/// buffer's release callback when the internal reference count reaches 0.
23///
24/// Note that converting this [`Bytes`] into a [BytesMut][::bytes::BytesMut] will always create a
25/// deep copy of the buffer into newly allocated memory, since this `Bytes` is constructed from an
26/// owner.
27///
28/// ### Buffer protocol export
29///
30/// PyBytes implements the Python buffer protocol to enable Python to access the underlying `Bytes`
31/// data view without copies. In Python, this `PyBytes` object can be passed to Python `bytes` or
32/// `memoryview` constructors, `numpy.frombuffer`, or any other function that supports buffer
33/// protocol input.
34#[pyclass(name = "Bytes", subclass, frozen)]
35#[derive(Debug, Hash, PartialEq, PartialOrd, Eq, Ord)]
36pub struct PyBytes(Bytes);
37
38impl AsRef<Bytes> for PyBytes {
39 fn as_ref(&self) -> &Bytes {
40 &self.0
41 }
42}
43
44impl AsRef<[u8]> for PyBytes {
45 fn as_ref(&self) -> &[u8] {
46 self.0.as_ref()
47 }
48}
49
50impl PyBytes {
51 /// Construct a new [PyBytes]
52 pub fn new(buffer: Bytes) -> Self {
53 Self(buffer)
54 }
55
56 /// Consume and return the [Bytes]
57 pub fn into_inner(self) -> Bytes {
58 self.0
59 }
60}
61
62impl From<PyBytes> for Bytes {
63 fn from(value: PyBytes) -> Self {
64 value.0
65 }
66}
67
68impl From<Bytes> for PyBytes {
69 fn from(value: Bytes) -> Self {
70 PyBytes(value)
71 }
72}
73
74#[pymethods]
75impl PyBytes {
76 // By setting the argument to PyBytes, this means that any buffer-protocol object is supported
77 // here, since it will use the FromPyObject impl.
78 #[new]
79 fn py_new(buf: PyBytes) -> Self {
80 buf
81 }
82
83 /// Copy this buffer's contents to a Python `bytes` object
84 fn to_bytes<'py>(&'py self, py: Python<'py>) -> Bound<'py, pyo3::types::PyBytes> {
85 pyo3::types::PyBytes::new(py, &self.0)
86 }
87
88 /// The number of bytes in this Bytes
89 fn __len__(&self) -> usize {
90 self.0.len()
91 }
92
93 fn __repr__(&self) -> String {
94 format!("Bytes(len={})", self.0.len())
95 }
96
97 /// This is taken from opendal:
98 /// https://github.com/apache/opendal/blob/d001321b0f9834bc1e2e7d463bcfdc3683e968c9/bindings/python/src/utils.rs#L51-L72
99 unsafe fn __getbuffer__(
100 slf: PyRef<Self>,
101 view: *mut ffi::Py_buffer,
102 flags: c_int,
103 ) -> PyResult<()> {
104 let bytes = slf.0.as_ref();
105 let ret = ffi::PyBuffer_FillInfo(
106 view,
107 slf.as_ptr() as *mut _,
108 bytes.as_ptr() as *mut _,
109 bytes.len().try_into().unwrap(),
110 1, // read only
111 flags,
112 );
113 if ret == -1 {
114 return Err(PyErr::fetch(slf.py()));
115 }
116 Ok(())
117 }
118
119 // Comment from david hewitt on discord:
120 // > I think normally `__getbuffer__` takes a pointer to the owning Python object, so you
121 // > don't need to treat the allocation as owned separately. It should be good enough to keep
122 // > the allocation owned by the object.
123 // https://discord.com/channels/1209263839632424990/1324816949464666194/1328299411427557397
124 unsafe fn __releasebuffer__(&self, _view: *mut ffi::Py_buffer) {}
125}
126
127impl<'py> FromPyObject<'py> for PyBytes {
128 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
129 let buffer = ob.extract::<PyBytesWrapper>()?;
130 let bytes = Bytes::from_owner(buffer);
131 Ok(Self(bytes))
132 }
133}
134
135/// A wrapper around a PyBuffer that applies a custom destructor that checks if the Python
136/// interpreter is still initialized before freeing the buffer memory.
137///
138/// This also implements AsRef<[u8]> because that is required for Bytes::from_owner
139#[derive(Debug)]
140struct PyBytesWrapper(Option<PyBuffer<u8>>);
141
142impl Drop for PyBytesWrapper {
143 fn drop(&mut self) {
144 // Only call the underlying Drop of PyBuffer if the Python interpreter is still
145 // initialized. Sometimes the Drop can attempt to happen after the Python interpreter was
146 // already finalized.
147 // https://github.com/kylebarron/arro3/issues/230
148 let is_initialized = unsafe { ffi::Py_IsInitialized() };
149 if let Some(val) = self.0.take() {
150 if is_initialized == 0 {
151 std::mem::forget(val);
152 } else {
153 std::mem::drop(val);
154 }
155 }
156 }
157}
158
159impl AsRef<[u8]> for PyBytesWrapper {
160 fn as_ref(&self) -> &[u8] {
161 let buffer = self.0.as_ref().expect("Buffer already disposed");
162 let len = buffer.item_count();
163
164 let ptr = NonNull::new(buffer.buf_ptr() as _).expect("Expected buffer ptr to be non null");
165
166 // Safety:
167 //
168 // This requires that the data will not be mutated from Python. Sadly, the buffer protocol
169 // does not uphold this invariant always for us, and the Python user must take care not to
170 // mutate the provided buffer.
171 unsafe { std::slice::from_raw_parts(ptr.as_ptr() as *const u8, len) }
172 }
173}
174
175fn validate_buffer(buf: &PyBuffer<u8>) -> PyResult<()> {
176 if !buf.is_c_contiguous() {
177 return Err(PyValueError::new_err("Buffer is not C contiguous"));
178 }
179
180 if buf.shape().iter().any(|s| *s == 0) {
181 return Err(PyValueError::new_err("0-length dimension not supported."));
182 }
183
184 if buf.strides().iter().any(|s| *s == 0) {
185 return Err(PyValueError::new_err("Non-zero strides not supported."));
186 }
187
188 Ok(())
189}
190
191impl<'py> FromPyObject<'py> for PyBytesWrapper {
192 fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
193 let buffer = ob.extract::<PyBuffer<u8>>()?;
194 validate_buffer(&buffer)?;
195 Ok(Self(Some(buffer)))
196 }
197}