vortex_io/
io_buf.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Provides types that can be used by I/O frameworks to work with byte buffer-shaped data.
5
6use std::ops::Range;
7
8use bytes::Bytes;
9use vortex_buffer::Buffer;
10use vortex_buffer::ConstByteBuffer;
11use vortex_error::VortexExpect;
12
13/// Trait for types that can provide a readonly byte buffer interface to I/O frameworks.
14///
15/// # Safety
16/// The type must support contiguous raw memory access via pointer, such as `Vec` or `[u8]`.
17pub unsafe trait IoBuf: Unpin + 'static {
18    /// Returns a raw pointer to the vector’s buffer.
19    fn read_ptr(&self) -> *const u8;
20
21    /// Number of initialized bytes.
22    fn bytes_init(&self) -> usize;
23
24    /// Access the buffer as a byte slice
25    fn as_slice(&self) -> &[u8];
26
27    /// Access the buffer as a byte slice with begin and end indices
28    #[inline]
29    fn slice_owned(self, range: Range<usize>) -> OwnedSlice<Self>
30    where
31        Self: Sized,
32    {
33        // Validate range bounds
34        assert!(
35            range.start <= range.end,
36            "Invalid range: start ({}) must be <= end ({})",
37            range.start,
38            range.end
39        );
40        assert!(
41            range.end <= self.bytes_init(),
42            "Range end ({}) exceeds buffer length ({})",
43            range.end,
44            self.bytes_init()
45        );
46
47        OwnedSlice {
48            buf: self,
49            begin: range.start,
50            end: range.end,
51        }
52    }
53}
54
55/// An owned view into a contiguous sequence of bytes.
56pub struct OwnedSlice<T> {
57    buf: T,
58    begin: usize,
59    end: usize,
60}
61
62impl<T> OwnedSlice<T> {
63    /// Unwrap the slice into its underlying type.
64    pub fn into_inner(self) -> T {
65        self.buf
66    }
67}
68
69unsafe impl IoBuf for &'static [u8] {
70    #[inline]
71    fn read_ptr(&self) -> *const u8 {
72        self.as_ptr()
73    }
74
75    #[inline]
76    fn bytes_init(&self) -> usize {
77        self.len()
78    }
79
80    #[inline]
81    fn as_slice(&self) -> &[u8] {
82        self
83    }
84}
85
86unsafe impl<const N: usize> IoBuf for [u8; N] {
87    #[inline]
88    fn read_ptr(&self) -> *const u8 {
89        self.as_ptr()
90    }
91
92    #[inline]
93    fn bytes_init(&self) -> usize {
94        N
95    }
96
97    #[inline]
98    fn as_slice(&self) -> &[u8] {
99        self.as_ref()
100    }
101}
102
103unsafe impl IoBuf for Vec<u8> {
104    #[inline]
105    fn read_ptr(&self) -> *const u8 {
106        self.as_ptr()
107    }
108
109    #[inline]
110    fn bytes_init(&self) -> usize {
111        self.len()
112    }
113
114    #[inline]
115    fn as_slice(&self) -> &[u8] {
116        self.as_ref()
117    }
118}
119
120unsafe impl<T: IoBuf> IoBuf for OwnedSlice<T> {
121    #[inline]
122    fn read_ptr(&self) -> *const u8 {
123        debug_assert!(self.begin <= self.end, "Invalid slice bounds");
124        debug_assert!(
125            self.end <= self.buf.bytes_init(),
126            "Slice end exceeds buffer bounds"
127        );
128
129        let base_ptr = self.buf.read_ptr();
130        debug_assert!(!base_ptr.is_null(), "Base pointer is null");
131
132        // Check for potential pointer overflow in debug builds
133        #[cfg(debug_assertions)]
134        {
135            let max_offset = isize::MAX as usize;
136            assert!(
137                self.begin <= max_offset,
138                "Offset too large for pointer arithmetic"
139            );
140        }
141
142        unsafe { base_ptr.add(self.begin) }
143    }
144
145    #[inline]
146    fn bytes_init(&self) -> usize {
147        debug_assert!(self.begin <= self.end, "Invalid slice bounds");
148        self.end - self.begin
149    }
150
151    #[inline]
152    fn as_slice(&self) -> &[u8] {
153        let ptr = self.read_ptr();
154        let len = self.bytes_init();
155
156        debug_assert!(
157            !ptr.is_null() || len == 0,
158            "Null pointer with non-zero length"
159        );
160
161        unsafe { std::slice::from_raw_parts(ptr, len) }
162    }
163}
164
165unsafe impl IoBuf for Bytes {
166    fn read_ptr(&self) -> *const u8 {
167        self.as_ptr()
168    }
169
170    fn bytes_init(&self) -> usize {
171        self.len()
172    }
173
174    fn as_slice(&self) -> &[u8] {
175        self.as_ref()
176    }
177}
178
179unsafe impl<const A: usize> IoBuf for ConstByteBuffer<A> {
180    fn read_ptr(&self) -> *const u8 {
181        self.as_ptr()
182    }
183
184    fn bytes_init(&self) -> usize {
185        self.len()
186    }
187
188    fn as_slice(&self) -> &[u8] {
189        self.as_ref()
190    }
191}
192
193unsafe impl<T: Unpin + 'static> IoBuf for Buffer<T> {
194    fn read_ptr(&self) -> *const u8 {
195        self.as_ptr().cast()
196    }
197
198    fn bytes_init(&self) -> usize {
199        self.len()
200            .checked_mul(size_of::<T>())
201            .vortex_expect("Buffer size calculation overflow")
202    }
203
204    fn as_slice(&self) -> &[u8] {
205        unsafe { std::slice::from_raw_parts(self.read_ptr(), self.bytes_init()) }
206    }
207}
208
209#[cfg(test)]
210mod tests {
211    use rstest::rstest;
212
213    use super::*;
214
215    #[test]
216    fn test_static_slice_io_buf() {
217        let data: &'static [u8] = b"hello world";
218
219        assert_eq!(data.read_ptr(), data.as_ptr());
220        assert_eq!(data.bytes_init(), 11);
221        assert_eq!(data.as_slice(), b"hello world");
222    }
223
224    #[test]
225    fn test_static_empty_slice() {
226        let data: &'static [u8] = b"";
227
228        assert_eq!(data.bytes_init(), 0);
229        assert_eq!(data.as_slice(), b"");
230    }
231
232    #[rstest]
233    #[case([1u8, 2, 3, 4, 5], 5)]
234    #[case([0u8; 256], 256)]
235    #[case([255u8; 1], 1)]
236    fn test_array_io_buf<const N: usize>(#[case] array: [u8; N], #[case] expected_len: usize) {
237        assert_eq!(array.bytes_init(), expected_len);
238        assert_eq!(array.as_slice().len(), expected_len);
239        assert_eq!(array.read_ptr(), array.as_ptr());
240    }
241
242    #[test]
243    fn test_vec_io_buf() {
244        let vec = vec![1u8, 2, 3, 4, 5];
245
246        assert_eq!(vec.read_ptr(), vec.as_ptr());
247        assert_eq!(vec.bytes_init(), 5);
248        assert_eq!(vec.as_slice(), &[1, 2, 3, 4, 5]);
249    }
250
251    #[rstest]
252    #[case(vec![], 0)]
253    #[case(vec![42u8], 1)]
254    #[case(vec![1u8, 2, 3], 3)]
255    #[case(vec![0u8; 1024], 1024)]
256    fn test_vec_various_sizes(#[case] vec: Vec<u8>, #[case] expected_len: usize) {
257        assert_eq!(vec.bytes_init(), expected_len);
258        assert_eq!(vec.as_slice().len(), expected_len);
259    }
260
261    #[test]
262    fn test_owned_slice_basic() {
263        let data = vec![1u8, 2, 3, 4, 5];
264        let slice = data.slice_owned(1..4);
265
266        assert_eq!(slice.bytes_init(), 3);
267        assert_eq!(slice.as_slice(), &[2, 3, 4]);
268    }
269
270    #[rstest]
271    #[case(vec![1u8, 2, 3, 4, 5], 0..5, vec![1, 2, 3, 4, 5])]
272    #[case(vec![1u8, 2, 3, 4, 5], 1..4, vec![2, 3, 4])]
273    #[case(vec![1u8, 2, 3, 4, 5], 2..3, vec![3])]
274    #[case(vec![1u8, 2, 3, 4, 5], 0..0, vec![])]
275    #[case(vec![1u8, 2, 3, 4, 5], 5..5, vec![])]
276    fn test_owned_slice_ranges(
277        #[case] data: Vec<u8>,
278        #[case] range: Range<usize>,
279        #[case] expected: Vec<u8>,
280    ) {
281        let slice = data.slice_owned(range.clone());
282        assert_eq!(slice.bytes_init(), range.end - range.start);
283        assert_eq!(slice.as_slice(), &expected[..]);
284    }
285
286    #[test]
287    fn test_owned_slice_into_inner() {
288        let data = vec![1u8, 2, 3, 4, 5];
289        let slice = data.clone().slice_owned(1..4);
290        let recovered = slice.into_inner();
291
292        assert_eq!(recovered, data);
293    }
294
295    #[test]
296    fn test_nested_owned_slice() {
297        let data = vec![1u8, 2, 3, 4, 5, 6, 7, 8];
298        let slice1 = data.slice_owned(1..7); // [2, 3, 4, 5, 6, 7]
299        let slice2 = slice1.slice_owned(1..4); // [3, 4, 5]
300
301        assert_eq!(slice2.bytes_init(), 3);
302        assert_eq!(slice2.as_slice(), &[3, 4, 5]);
303    }
304
305    #[test]
306    fn test_bytes_io_buf() {
307        let bytes = Bytes::from_static(b"test data");
308
309        assert_eq!(bytes.read_ptr(), bytes.as_ptr());
310        assert_eq!(bytes.bytes_init(), 9);
311        assert_eq!(bytes.as_slice(), b"test data");
312    }
313
314    #[test]
315    fn test_bytes_empty() {
316        let bytes = Bytes::new();
317
318        assert_eq!(bytes.bytes_init(), 0);
319        assert_eq!(bytes.as_slice(), b"");
320    }
321
322    #[test]
323    fn test_const_byte_buffer() {
324        const ALIGNMENT: usize = 64;
325        let data = b"aligned data".to_vec();
326        let buffer = ConstByteBuffer::<ALIGNMENT>::copy_from(&data);
327
328        assert_eq!(buffer.bytes_init(), 12);
329        assert_eq!(buffer.as_slice(), b"aligned data");
330
331        // Verify alignment
332        let ptr_addr = buffer.read_ptr() as usize;
333        assert_eq!(ptr_addr % ALIGNMENT, 0);
334    }
335
336    macro_rules! test_const_buffer_alignment {
337        ($name:ident, $alignment:literal) => {
338            #[test]
339            fn $name() {
340                let data = b"test".to_vec();
341                let buffer = ConstByteBuffer::<$alignment>::copy_from(&data);
342                let ptr_addr = buffer.read_ptr() as usize;
343                assert_eq!(ptr_addr % $alignment, 0);
344                assert_eq!(buffer.bytes_init(), 4);
345            }
346        };
347    }
348
349    test_const_buffer_alignment!(test_const_byte_buffer_alignment_8, 8);
350    test_const_buffer_alignment!(test_const_byte_buffer_alignment_16, 16);
351    test_const_buffer_alignment!(test_const_byte_buffer_alignment_32, 32);
352    test_const_buffer_alignment!(test_const_byte_buffer_alignment_64, 64);
353    test_const_buffer_alignment!(test_const_byte_buffer_alignment_128, 128);
354    test_const_buffer_alignment!(test_const_byte_buffer_alignment_256, 256);
355
356    #[test]
357    fn test_buffer_u32() {
358        let data = vec![1u32, 2, 3, 4];
359        let mut buf_mut = vortex_buffer::BufferMut::<u32>::with_capacity(4);
360        buf_mut.extend_from_slice(&data);
361        let buffer: Buffer<u32> = buf_mut.freeze();
362
363        // The buffer has 4 u32 elements, bytes_init should be 4 * 4 = 16 bytes
364        assert_eq!(buffer.len(), 4); // 4 elements
365        assert_eq!(buffer.bytes_init(), 16); // 4 * size_of::<u32>()
366    }
367
368    #[test]
369    fn test_buffer_u64() {
370        let data = vec![100u64, 200, 300];
371        let mut buf_mut = vortex_buffer::BufferMut::<u64>::with_capacity(3);
372        buf_mut.extend_from_slice(&data);
373        let buffer: Buffer<u64> = buf_mut.freeze();
374
375        // The buffer has 3 u64 elements, bytes_init should be 3 * 8 = 24 bytes
376        assert_eq!(buffer.len(), 3); // 3 elements
377        assert_eq!(buffer.bytes_init(), 24); // 3 * size_of::<u64>()
378    }
379
380    #[test]
381    fn test_buffer_empty() {
382        let buffer: Buffer<u8> = Buffer::from(vec![]);
383
384        assert_eq!(buffer.bytes_init(), 0);
385        assert_eq!(buffer.as_slice(), &[] as &[u8]);
386    }
387
388    #[test]
389    fn test_buffer_various_types() {
390        // u8 buffer
391        let buffer = Buffer::from(vec![1u8, 2, 3]);
392        assert_eq!(buffer.bytes_init(), 3);
393
394        // u16 buffer
395        let mut buf_mut = vortex_buffer::BufferMut::<u16>::with_capacity(3);
396        buf_mut.extend_from_slice(&[1u16, 2, 3]);
397        let buffer: Buffer<u16> = buf_mut.freeze();
398        assert_eq!(buffer.bytes_init(), 6);
399
400        // u32 buffer
401        let mut buf_mut = vortex_buffer::BufferMut::<u32>::with_capacity(3);
402        buf_mut.extend_from_slice(&[1u32, 2, 3]);
403        let buffer: Buffer<u32> = buf_mut.freeze();
404        assert_eq!(buffer.bytes_init(), 12);
405
406        // u64 buffer
407        let mut buf_mut = vortex_buffer::BufferMut::<u64>::with_capacity(3);
408        buf_mut.extend_from_slice(&[1u64, 2, 3]);
409        let buffer: Buffer<u64> = buf_mut.freeze();
410        assert_eq!(buffer.bytes_init(), 24);
411    }
412
413    #[test]
414    fn test_pointer_validity() {
415        // Test that read_ptr returns valid pointers for different types
416        let vec = vec![1u8, 2, 3];
417        let slice: &'static [u8] = &[1, 2, 3];
418        let array = [1u8, 2, 3];
419
420        // These should not crash or cause UB
421        let _ = vec.read_ptr();
422        let _ = slice.read_ptr();
423        let _ = array.read_ptr();
424
425        // Verify pointer consistency
426        assert_eq!(vec.read_ptr(), vec.as_ptr());
427        assert_eq!(slice.read_ptr(), slice.as_ptr());
428        assert_eq!(array.read_ptr(), array.as_ptr());
429    }
430
431    #[test]
432    fn test_slice_owned_preserves_data() {
433        let original = vec![10u8, 20, 30, 40, 50];
434        let slice = original.clone().slice_owned(1..4);
435
436        // Verify the slice sees the correct data
437        assert_eq!(slice.as_slice(), &[20, 30, 40]);
438
439        // Verify we can recover the original
440        let recovered = slice.into_inner();
441        assert_eq!(recovered, original);
442    }
443
444    // Panic tests for bounds checking
445    #[test]
446    #[should_panic(expected = "Invalid range")]
447    fn test_owned_slice_invalid_range() {
448        let data = vec![1, 2, 3];
449        #[expect(
450            clippy::reversed_empty_ranges,
451            reason = "intentionally testing invalid range"
452        )]
453        data.slice_owned(5..3); // start > end
454    }
455
456    #[test]
457    #[should_panic(expected = "exceeds buffer length")]
458    fn test_owned_slice_out_of_bounds() {
459        let data = vec![1, 2, 3];
460        data.slice_owned(1..10); // end > len
461    }
462
463    #[test]
464    fn test_owned_slice_zero_sized_at_boundary() {
465        let data = vec![1, 2, 3];
466        let slice = data.slice_owned(3..3); // Zero-sized at end
467        assert_eq!(slice.bytes_init(), 0);
468    }
469
470    #[test]
471    #[should_panic(expected = "exceeds buffer length")]
472    fn test_owned_slice_start_out_of_bounds() {
473        let data = vec![1, 2, 3];
474        data.slice_owned(10..11); // start > len
475    }
476
477    // Buffer overflow protection tests
478    #[test]
479    fn test_buffer_size_calculation_u8() {
480        let buffer: Buffer<u8> = Buffer::from(vec![1, 2, 3]);
481        assert_eq!(buffer.bytes_init(), 3);
482    }
483
484    #[test]
485    fn test_buffer_size_calculation_large_type() {
486        use vortex_buffer::BufferMut;
487
488        // Test with a struct containing a large array
489        #[repr(C)]
490        struct LargeType {
491            _data: [u8; 1024],
492        }
493
494        let mut buf = BufferMut::<LargeType>::with_capacity(10);
495        // Extend with 10 elements
496        for _ in 0..10 {
497            buf.push(LargeType { _data: [0u8; 1024] });
498        }
499        let buffer = buf.freeze();
500
501        // This should use checked arithmetic and not overflow
502        let size = buffer.bytes_init();
503        assert_eq!(size, 10 * 1024);
504    }
505
506    #[test]
507    fn test_buffer_size_near_max() {
508        // Test with a moderately large buffer that won't cause OOM
509        let large_size = 1_000_000;
510        let buffer: Buffer<u8> = Buffer::from(vec![0u8; large_size]);
511        assert_eq!(buffer.bytes_init(), large_size);
512    }
513}