vortex_io/
io_buf.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Provides types that can be used by I/O frameworks to work with byte buffer-shaped data.
5
6use std::ops::Range;
7
8use bytes::Bytes;
9use vortex_buffer::{Buffer, ConstByteBuffer};
10use vortex_error::VortexExpect;
11
12/// Trait for types that can provide a readonly byte buffer interface to I/O frameworks.
13///
14/// # Safety
15/// The type must support contiguous raw memory access via pointer, such as `Vec` or `[u8]`.
16pub unsafe trait IoBuf: Unpin + 'static {
17    /// Returns a raw pointer to the vector’s buffer.
18    fn read_ptr(&self) -> *const u8;
19
20    /// Number of initialized bytes.
21    fn bytes_init(&self) -> usize;
22
23    /// Access the buffer as a byte slice
24    fn as_slice(&self) -> &[u8];
25
26    /// Access the buffer as a byte slice with begin and end indices
27    #[inline]
28    fn slice_owned(self, range: Range<usize>) -> OwnedSlice<Self>
29    where
30        Self: Sized,
31    {
32        // Validate range bounds
33        assert!(
34            range.start <= range.end,
35            "Invalid range: start ({}) must be <= end ({})",
36            range.start,
37            range.end
38        );
39        assert!(
40            range.end <= self.bytes_init(),
41            "Range end ({}) exceeds buffer length ({})",
42            range.end,
43            self.bytes_init()
44        );
45
46        OwnedSlice {
47            buf: self,
48            begin: range.start,
49            end: range.end,
50        }
51    }
52}
53
54/// An owned view into a contiguous sequence of bytes.
55pub struct OwnedSlice<T> {
56    buf: T,
57    begin: usize,
58    end: usize,
59}
60
61impl<T> OwnedSlice<T> {
62    /// Unwrap the slice into its underlying type.
63    pub fn into_inner(self) -> T {
64        self.buf
65    }
66}
67
68unsafe impl IoBuf for &'static [u8] {
69    #[inline]
70    fn read_ptr(&self) -> *const u8 {
71        self.as_ptr()
72    }
73
74    #[inline]
75    fn bytes_init(&self) -> usize {
76        self.len()
77    }
78
79    #[inline]
80    fn as_slice(&self) -> &[u8] {
81        self
82    }
83}
84
85unsafe impl<const N: usize> IoBuf for [u8; N] {
86    #[inline]
87    fn read_ptr(&self) -> *const u8 {
88        self.as_ptr()
89    }
90
91    #[inline]
92    fn bytes_init(&self) -> usize {
93        N
94    }
95
96    #[inline]
97    fn as_slice(&self) -> &[u8] {
98        self.as_ref()
99    }
100}
101
102unsafe impl IoBuf for Vec<u8> {
103    #[inline]
104    fn read_ptr(&self) -> *const u8 {
105        self.as_ptr()
106    }
107
108    #[inline]
109    fn bytes_init(&self) -> usize {
110        self.len()
111    }
112
113    #[inline]
114    fn as_slice(&self) -> &[u8] {
115        self.as_ref()
116    }
117}
118
119unsafe impl<T: IoBuf> IoBuf for OwnedSlice<T> {
120    #[inline]
121    fn read_ptr(&self) -> *const u8 {
122        debug_assert!(self.begin <= self.end, "Invalid slice bounds");
123        debug_assert!(
124            self.end <= self.buf.bytes_init(),
125            "Slice end exceeds buffer bounds"
126        );
127
128        let base_ptr = self.buf.read_ptr();
129        debug_assert!(!base_ptr.is_null(), "Base pointer is null");
130
131        // Check for potential pointer overflow in debug builds
132        #[cfg(debug_assertions)]
133        {
134            let max_offset = isize::MAX as usize;
135            assert!(
136                self.begin <= max_offset,
137                "Offset too large for pointer arithmetic"
138            );
139        }
140
141        unsafe { base_ptr.add(self.begin) }
142    }
143
144    #[inline]
145    fn bytes_init(&self) -> usize {
146        debug_assert!(self.begin <= self.end, "Invalid slice bounds");
147        self.end - self.begin
148    }
149
150    #[inline]
151    fn as_slice(&self) -> &[u8] {
152        let ptr = self.read_ptr();
153        let len = self.bytes_init();
154
155        debug_assert!(
156            !ptr.is_null() || len == 0,
157            "Null pointer with non-zero length"
158        );
159
160        unsafe { std::slice::from_raw_parts(ptr, len) }
161    }
162}
163
164unsafe impl IoBuf for Bytes {
165    fn read_ptr(&self) -> *const u8 {
166        self.as_ptr()
167    }
168
169    fn bytes_init(&self) -> usize {
170        self.len()
171    }
172
173    fn as_slice(&self) -> &[u8] {
174        self.as_ref()
175    }
176}
177
178unsafe impl<const A: usize> IoBuf for ConstByteBuffer<A> {
179    fn read_ptr(&self) -> *const u8 {
180        self.as_ptr()
181    }
182
183    fn bytes_init(&self) -> usize {
184        self.len()
185    }
186
187    fn as_slice(&self) -> &[u8] {
188        self.as_ref()
189    }
190}
191
192unsafe impl<T: Unpin + 'static> IoBuf for Buffer<T> {
193    fn read_ptr(&self) -> *const u8 {
194        self.as_ptr().cast()
195    }
196
197    fn bytes_init(&self) -> usize {
198        self.len()
199            .checked_mul(size_of::<T>())
200            .vortex_expect("Buffer size calculation overflow")
201    }
202
203    fn as_slice(&self) -> &[u8] {
204        unsafe { std::slice::from_raw_parts(self.read_ptr(), self.bytes_init()) }
205    }
206}
207
208#[cfg(test)]
209mod tests {
210    use rstest::rstest;
211
212    use super::*;
213
214    #[test]
215    fn test_static_slice_io_buf() {
216        let data: &'static [u8] = b"hello world";
217
218        assert_eq!(data.read_ptr(), data.as_ptr());
219        assert_eq!(data.bytes_init(), 11);
220        assert_eq!(data.as_slice(), b"hello world");
221    }
222
223    #[test]
224    fn test_static_empty_slice() {
225        let data: &'static [u8] = b"";
226
227        assert_eq!(data.bytes_init(), 0);
228        assert_eq!(data.as_slice(), b"");
229    }
230
231    #[rstest]
232    #[case([1u8, 2, 3, 4, 5], 5)]
233    #[case([0u8; 256], 256)]
234    #[case([255u8; 1], 1)]
235    fn test_array_io_buf<const N: usize>(#[case] array: [u8; N], #[case] expected_len: usize) {
236        assert_eq!(array.bytes_init(), expected_len);
237        assert_eq!(array.as_slice().len(), expected_len);
238        assert_eq!(array.read_ptr(), array.as_ptr());
239    }
240
241    #[test]
242    fn test_vec_io_buf() {
243        let vec = vec![1u8, 2, 3, 4, 5];
244
245        assert_eq!(vec.read_ptr(), vec.as_ptr());
246        assert_eq!(vec.bytes_init(), 5);
247        assert_eq!(vec.as_slice(), &[1, 2, 3, 4, 5]);
248    }
249
250    #[rstest]
251    #[case(vec![], 0)]
252    #[case(vec![42u8], 1)]
253    #[case(vec![1u8, 2, 3], 3)]
254    #[case(vec![0u8; 1024], 1024)]
255    fn test_vec_various_sizes(#[case] vec: Vec<u8>, #[case] expected_len: usize) {
256        assert_eq!(vec.bytes_init(), expected_len);
257        assert_eq!(vec.as_slice().len(), expected_len);
258    }
259
260    #[test]
261    fn test_owned_slice_basic() {
262        let data = vec![1u8, 2, 3, 4, 5];
263        let slice = data.slice_owned(1..4);
264
265        assert_eq!(slice.bytes_init(), 3);
266        assert_eq!(slice.as_slice(), &[2, 3, 4]);
267    }
268
269    #[rstest]
270    #[case(vec![1u8, 2, 3, 4, 5], 0..5, vec![1, 2, 3, 4, 5])]
271    #[case(vec![1u8, 2, 3, 4, 5], 1..4, vec![2, 3, 4])]
272    #[case(vec![1u8, 2, 3, 4, 5], 2..3, vec![3])]
273    #[case(vec![1u8, 2, 3, 4, 5], 0..0, vec![])]
274    #[case(vec![1u8, 2, 3, 4, 5], 5..5, vec![])]
275    fn test_owned_slice_ranges(
276        #[case] data: Vec<u8>,
277        #[case] range: Range<usize>,
278        #[case] expected: Vec<u8>,
279    ) {
280        let slice = data.slice_owned(range.clone());
281        assert_eq!(slice.bytes_init(), range.end - range.start);
282        assert_eq!(slice.as_slice(), &expected[..]);
283    }
284
285    #[test]
286    fn test_owned_slice_into_inner() {
287        let data = vec![1u8, 2, 3, 4, 5];
288        let slice = data.clone().slice_owned(1..4);
289        let recovered = slice.into_inner();
290
291        assert_eq!(recovered, data);
292    }
293
294    #[test]
295    fn test_nested_owned_slice() {
296        let data = vec![1u8, 2, 3, 4, 5, 6, 7, 8];
297        let slice1 = data.slice_owned(1..7); // [2, 3, 4, 5, 6, 7]
298        let slice2 = slice1.slice_owned(1..4); // [3, 4, 5]
299
300        assert_eq!(slice2.bytes_init(), 3);
301        assert_eq!(slice2.as_slice(), &[3, 4, 5]);
302    }
303
304    #[test]
305    fn test_bytes_io_buf() {
306        let bytes = Bytes::from_static(b"test data");
307
308        assert_eq!(bytes.read_ptr(), bytes.as_ptr());
309        assert_eq!(bytes.bytes_init(), 9);
310        assert_eq!(bytes.as_slice(), b"test data");
311    }
312
313    #[test]
314    fn test_bytes_empty() {
315        let bytes = Bytes::new();
316
317        assert_eq!(bytes.bytes_init(), 0);
318        assert_eq!(bytes.as_slice(), b"");
319    }
320
321    #[test]
322    fn test_const_byte_buffer() {
323        const ALIGNMENT: usize = 64;
324        let data = b"aligned data".to_vec();
325        let buffer = ConstByteBuffer::<ALIGNMENT>::copy_from(&data);
326
327        assert_eq!(buffer.bytes_init(), 12);
328        assert_eq!(buffer.as_slice(), b"aligned data");
329
330        // Verify alignment
331        let ptr_addr = buffer.read_ptr() as usize;
332        assert_eq!(ptr_addr % ALIGNMENT, 0);
333    }
334
335    macro_rules! test_const_buffer_alignment {
336        ($name:ident, $alignment:literal) => {
337            #[test]
338            fn $name() {
339                let data = b"test".to_vec();
340                let buffer = ConstByteBuffer::<$alignment>::copy_from(&data);
341                let ptr_addr = buffer.read_ptr() as usize;
342                assert_eq!(ptr_addr % $alignment, 0);
343                assert_eq!(buffer.bytes_init(), 4);
344            }
345        };
346    }
347
348    test_const_buffer_alignment!(test_const_byte_buffer_alignment_8, 8);
349    test_const_buffer_alignment!(test_const_byte_buffer_alignment_16, 16);
350    test_const_buffer_alignment!(test_const_byte_buffer_alignment_32, 32);
351    test_const_buffer_alignment!(test_const_byte_buffer_alignment_64, 64);
352    test_const_buffer_alignment!(test_const_byte_buffer_alignment_128, 128);
353    test_const_buffer_alignment!(test_const_byte_buffer_alignment_256, 256);
354
355    #[test]
356    fn test_buffer_u32() {
357        let data = vec![1u32, 2, 3, 4];
358        let mut buf_mut = vortex_buffer::BufferMut::<u32>::with_capacity(4);
359        buf_mut.extend_from_slice(&data);
360        let buffer: Buffer<u32> = buf_mut.freeze();
361
362        // The buffer has 4 u32 elements, bytes_init should be 4 * 4 = 16 bytes
363        assert_eq!(buffer.len(), 4); // 4 elements
364        assert_eq!(buffer.bytes_init(), 16); // 4 * size_of::<u32>()
365    }
366
367    #[test]
368    fn test_buffer_u64() {
369        let data = vec![100u64, 200, 300];
370        let mut buf_mut = vortex_buffer::BufferMut::<u64>::with_capacity(3);
371        buf_mut.extend_from_slice(&data);
372        let buffer: Buffer<u64> = buf_mut.freeze();
373
374        // The buffer has 3 u64 elements, bytes_init should be 3 * 8 = 24 bytes
375        assert_eq!(buffer.len(), 3); // 3 elements
376        assert_eq!(buffer.bytes_init(), 24); // 3 * size_of::<u64>()
377    }
378
379    #[test]
380    fn test_buffer_empty() {
381        let buffer: Buffer<u8> = Buffer::from(vec![]);
382
383        assert_eq!(buffer.bytes_init(), 0);
384        assert_eq!(buffer.as_slice(), &[] as &[u8]);
385    }
386
387    #[test]
388    fn test_buffer_various_types() {
389        // u8 buffer
390        let buffer = Buffer::from(vec![1u8, 2, 3]);
391        assert_eq!(buffer.bytes_init(), 3);
392
393        // u16 buffer
394        let mut buf_mut = vortex_buffer::BufferMut::<u16>::with_capacity(3);
395        buf_mut.extend_from_slice(&[1u16, 2, 3]);
396        let buffer: Buffer<u16> = buf_mut.freeze();
397        assert_eq!(buffer.bytes_init(), 6);
398
399        // u32 buffer
400        let mut buf_mut = vortex_buffer::BufferMut::<u32>::with_capacity(3);
401        buf_mut.extend_from_slice(&[1u32, 2, 3]);
402        let buffer: Buffer<u32> = buf_mut.freeze();
403        assert_eq!(buffer.bytes_init(), 12);
404
405        // u64 buffer
406        let mut buf_mut = vortex_buffer::BufferMut::<u64>::with_capacity(3);
407        buf_mut.extend_from_slice(&[1u64, 2, 3]);
408        let buffer: Buffer<u64> = buf_mut.freeze();
409        assert_eq!(buffer.bytes_init(), 24);
410    }
411
412    #[test]
413    fn test_pointer_validity() {
414        // Test that read_ptr returns valid pointers for different types
415        let vec = vec![1u8, 2, 3];
416        let slice: &'static [u8] = &[1, 2, 3];
417        let array = [1u8, 2, 3];
418
419        // These should not crash or cause UB
420        let _ = vec.read_ptr();
421        let _ = slice.read_ptr();
422        let _ = array.read_ptr();
423
424        // Verify pointer consistency
425        assert_eq!(vec.read_ptr(), vec.as_ptr());
426        assert_eq!(slice.read_ptr(), slice.as_ptr());
427        assert_eq!(array.read_ptr(), array.as_ptr());
428    }
429
430    #[test]
431    fn test_slice_owned_preserves_data() {
432        let original = vec![10u8, 20, 30, 40, 50];
433        let slice = original.clone().slice_owned(1..4);
434
435        // Verify the slice sees the correct data
436        assert_eq!(slice.as_slice(), &[20, 30, 40]);
437
438        // Verify we can recover the original
439        let recovered = slice.into_inner();
440        assert_eq!(recovered, original);
441    }
442
443    // Panic tests for bounds checking
444    #[test]
445    #[should_panic(expected = "Invalid range")]
446    fn test_owned_slice_invalid_range() {
447        let data = vec![1, 2, 3];
448        #[allow(clippy::reversed_empty_ranges)]
449        let _ = data.slice_owned(5..3); // start > end
450    }
451
452    #[test]
453    #[should_panic(expected = "exceeds buffer length")]
454    fn test_owned_slice_out_of_bounds() {
455        let data = vec![1, 2, 3];
456        let _ = data.slice_owned(1..10); // end > len
457    }
458
459    #[test]
460    fn test_owned_slice_zero_sized_at_boundary() {
461        let data = vec![1, 2, 3];
462        let slice = data.slice_owned(3..3); // Zero-sized at end
463        assert_eq!(slice.bytes_init(), 0);
464    }
465
466    #[test]
467    #[should_panic(expected = "exceeds buffer length")]
468    fn test_owned_slice_start_out_of_bounds() {
469        let data = vec![1, 2, 3];
470        let _ = data.slice_owned(10..11); // start > len
471    }
472
473    // Buffer overflow protection tests
474    #[test]
475    fn test_buffer_size_calculation_u8() {
476        let buffer: Buffer<u8> = Buffer::from(vec![1, 2, 3]);
477        assert_eq!(buffer.bytes_init(), 3);
478    }
479
480    #[test]
481    fn test_buffer_size_calculation_large_type() {
482        use vortex_buffer::BufferMut;
483
484        // Test with a struct containing a large array
485        #[repr(C)]
486        struct LargeType {
487            _data: [u8; 1024],
488        }
489
490        let mut buf = BufferMut::<LargeType>::with_capacity(10);
491        // Extend with 10 elements
492        for _ in 0..10 {
493            buf.push(LargeType { _data: [0u8; 1024] });
494        }
495        let buffer = buf.freeze();
496
497        // This should use checked arithmetic and not overflow
498        let size = buffer.bytes_init();
499        assert_eq!(size, 10 * 1024);
500    }
501
502    #[test]
503    fn test_buffer_size_near_max() {
504        // Test with a moderately large buffer that won't cause OOM
505        let large_size = 1_000_000;
506        let buffer: Buffer<u8> = Buffer::from(vec![0u8; large_size]);
507        assert_eq!(buffer.bytes_init(), large_size);
508    }
509}