Skip to main content

bytesbuf/
vec.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4use std::mem::MaybeUninit;
5use std::num::NonZero;
6use std::ptr::NonNull;
7use std::sync::atomic::{self, AtomicUsize};
8
9use smallvec::SmallVec;
10
11use crate::mem::{Block, BlockRef, BlockRefDynamic, BlockRefVTable, BlockSize};
12use crate::{BytesView, MAX_INLINE_SPANS, Span};
13
14impl From<Vec<u8>> for BytesView {
15    /// Converts a [`Vec<u8>`] instance into a `BytesView`.
16    ///
17    /// This operation is always zero-copy, though does cost a small dynamic allocation.
18    fn from(value: Vec<u8>) -> Self {
19        if value.is_empty() {
20            return Self::new();
21        }
22
23        // A Vec<u8> instance may contain any number of bytes, same as a BytesView. However, each
24        // block of memory inside BytesView is limited to BlockSize::MAX, which is a smaller size.
25        // Therefore, we may need to chop up the Vec into smaller slices, so each slice fits in
26        // a BlockSize. This iterator does the job.
27        let vec_blocks = VecBlockIterator::new(value);
28
29        let blocks = vec_blocks.map(|vec| {
30            // SAFETY: We must treat the provided memory capacity as immutable. We do, only using
31            // it to create a `BytesView` over the immutable data that already exists within.
32            // Note that this requirement also extends down the stack - no code that runs in this
33            // function is allowed to create an exclusive reference over the data of the `Vec`,
34            // even if that exclusive reference is not used for writes (Miri will tell you if you
35            // did it wrong).
36            unsafe { non_empty_vec_to_immutable_block(vec) }
37        });
38
39        let spans = blocks.map(|block| {
40            let mut span_builder = block.into_span_builder();
41
42            #[expect(clippy::cast_possible_truncation, reason = "a span can never be larger than BlockSize")]
43            let len = NonZero::new(span_builder.remaining_capacity() as BlockSize).expect("splitting Vec cannot yield zero-sized chunks");
44
45            // SAFETY: We know that the data is already initialized; we simply declare this to the
46            // SpanBuilder and get it to emit a completed Span from all its contents.
47            unsafe {
48                span_builder.advance(len.get() as usize);
49            }
50
51            span_builder.consume(len)
52        });
53
54        // NB! We cannot use `BytesBuf::from_blocks` because it is not guaranteed to use the
55        // blocks in the same order as they are provided. Instead, we directly construct the inner
56        // span array in the BytesView, which lets us avoid any temporary allocations and resizing.
57        let mut spans_reversed: SmallVec<[Span; MAX_INLINE_SPANS]> = spans.collect();
58
59        // Not ideal but 99.999% of the case this is a 1-element array, so it does not matter.
60        spans_reversed.reverse();
61
62        Self::from_spans_reversed(spans_reversed)
63    }
64}
65
66/// An implementation of `BlockRef` that reuses immutable memory of an owned `Vec<u8>` instance.
67struct VecBlock {
68    // This field exists to keep the Vec alive. The data within is accessed directly via pointers.
69    _inner: Vec<u8>,
70
71    ref_count: AtomicUsize,
72}
73
74impl VecBlock {
75    pub const fn new(inner: Vec<u8>) -> Self {
76        Self {
77            _inner: inner,
78            ref_count: AtomicUsize::new(1),
79        }
80    }
81}
82
83// SAFETY: We must guarantee thread-safety. We do.
84unsafe impl BlockRefDynamic for VecBlock {
85    type State = Self;
86
87    fn clone(state_ptr: NonNull<Self::State>) -> NonNull<Self::State> {
88        // SAFETY: The state pointer is always valid for reads.
89        // We only ever created shared references to the block state - it exists just to track the
90        // reference count.
91        let state = unsafe { state_ptr.as_ref() };
92
93        // Relaxed because incrementing reference count is independent of any other state.
94        state.ref_count.fetch_add(1, atomic::Ordering::Relaxed);
95
96        // We reuse the same state between all clones.
97        state_ptr
98    }
99
100    #[cfg_attr(test, mutants::skip)] // Impractical to test. Miri will inform about memory leaks.
101    fn drop(state_ptr: NonNull<Self::State>) {
102        // SAFETY: The state pointer is always valid for reads.
103        // We only ever created shared references to the block state - it exists just to track the
104        // reference count.
105        let state = unsafe { state_ptr.as_ref() };
106
107        // Release because we are releasing the synchronization block for the memory block state.
108        if state.ref_count.fetch_sub(1, atomic::Ordering::Release) != 1 {
109            return;
110        }
111
112        // This was the last reference, so we can deallocate the block.
113        // All we need to do is deallocate the block object - dropping the Vec field
114        // will cleanup the memory capacity provided by the Vec instance.
115
116        // Ensure that we have observed all writes into the block from other threads.
117        // On x86 this does nothing but on weaker memory models writes could be delayed.
118        atomic::fence(atomic::Ordering::Acquire);
119
120        // SAFETY: No more references exist, we can resurrect the object inside a Box and drop.
121        drop(unsafe { Box::from_raw(state_ptr.as_ptr()) });
122    }
123}
124
125/// # Panics
126///
127/// Panics if the `Vec` is larger than `BlockSize::MAX`.
128///
129/// # Safety
130///
131/// The block contents must be treated as immutable because once converted to a `BytesView`,
132/// the contents of the `Vec` are accessed via shared references only.
133unsafe fn non_empty_vec_to_immutable_block(vec: Vec<u8>) -> Block {
134    assert!(!vec.is_empty());
135
136    let len: BlockSize = vec
137        .len()
138        .try_into()
139        .expect("length of Vec<u8> instance was greater than BlockSize::MAX");
140
141    let capacity_ptr = NonNull::new(vec.as_ptr().cast_mut())
142        .expect("guarded by 'is zero sized Vec' check upstream - non-empty Vec must have non-null capacity pointer")
143        .cast::<MaybeUninit<u8>>();
144
145    let len = NonZero::new(len).expect("guarded by 'is zero sized Vec' check upstream");
146
147    let block_ptr = NonNull::new(Box::into_raw(Box::new(VecBlock::new(vec)))).expect("we just allocated it - it cannot possibly be null");
148
149    // SAFETY: block_ptr must remain valid until the dynamic fns drop() is called. Yep, it does.
150    // We only ever created shared references to the block state - it exists just to track the
151    // reference count.
152    let block_ref = unsafe { BlockRef::new(block_ptr, &BLOCK_REF_FNS) };
153
154    // SAFETY: Block requires us to guarantee exclusive access. We actually cannot do that - this
155    // memory block is shared and immutable, unlike many others! However, the good news is that this
156    // requirement on Block exists to support mutation. As long as we never treat the block as
157    // having mutable contents, we are fine with shared immutable access.
158    unsafe { Block::new(capacity_ptr, len, block_ref) }
159}
160
161const BLOCK_REF_FNS: BlockRefVTable<VecBlock> = BlockRefVTable::from_trait();
162
163/// Returns pieces of a `Vec<u8>` no greater than `BlockSize::MAX` in length.
164struct VecBlockIterator {
165    remaining: Vec<u8>,
166}
167
168impl VecBlockIterator {
169    const fn new(vec: Vec<u8>) -> Self {
170        Self { remaining: vec }
171    }
172}
173
174impl Iterator for VecBlockIterator {
175    type Item = Vec<u8>;
176
177    fn next(&mut self) -> Option<Self::Item> {
178        if self.remaining.is_empty() {
179            return None;
180        }
181
182        let bytes_to_take = self.remaining.len().min(BlockSize::MAX as usize);
183
184        // split_off splits at the given index, returning everything after that index.
185        // We want to take the first `bytes_to_take` bytes, so we split_off at that index
186        // and swap - what we split off becomes `remaining`, and what's left is what we return.
187        let keep = self.remaining.split_off(bytes_to_take);
188        let take = std::mem::replace(&mut self.remaining, keep);
189
190        Some(take)
191    }
192
193    fn size_hint(&self) -> (usize, Option<usize>) {
194        let blocks_remaining = self.remaining.len().div_ceil(BlockSize::MAX as usize);
195        (blocks_remaining, Some(blocks_remaining))
196    }
197}
198
199#[cfg_attr(coverage_nightly, coverage(off))]
200#[cfg(test)]
201mod tests {
202    use super::*;
203
204    #[test]
205    fn vec_into_view() {
206        let vec = vec![1, 2, 3, 4, 5];
207        let mut view: BytesView = vec.into();
208        assert_eq!(view.len(), 5);
209
210        assert_eq!(view.get_byte(), 1);
211        assert_eq!(view.get_byte(), 2);
212        assert_eq!(view.get_byte(), 3);
213        assert_eq!(view.get_byte(), 4);
214        assert_eq!(view.get_byte(), 5);
215
216        assert!(view.is_empty());
217    }
218
219    #[test]
220    fn zero_sized_vec() {
221        let vec = Vec::<u8>::new();
222        let view: BytesView = vec.into();
223
224        assert_eq!(view.len(), 0);
225        assert!(view.is_empty());
226    }
227
228    #[test]
229    fn test_vec_to_view() {
230        let vec = vec![b'H', b'e', b'l', b'l', b'o', b',', b' ', b'w', b'o', b'r', b'l', b'd', b'!'];
231
232        let vec_data_ptr = vec.as_ptr();
233
234        let view: BytesView = vec.into();
235
236        assert_eq!(view.len(), 13);
237        assert_eq!(view, b"Hello, world!");
238
239        // We expect this to be zero-copy - Vec to BytesView always is.
240        assert_eq!(view.first_slice().as_ptr(), vec_data_ptr);
241    }
242
243    #[test]
244    fn test_giant_vec_to_view() {
245        // This test requires at least 5 GB of memory to run. The publishing pipeline runs on a system
246        // where this may not be available, so we skip this test in that environment.
247        #[cfg(all(not(miri), any(target_os = "linux", target_os = "windows")))]
248        if crate::testing::system_memory() < 10_000_000_000 {
249            eprintln!("Skipping giant allocation test due to insufficient memory.");
250            return;
251        }
252
253        let vec = vec![0u8; 5_000_000_000];
254
255        let view: BytesView = vec.into();
256        assert_eq!(view.len(), 5_000_000_000);
257        assert_eq!(view.first_slice().len(), u32::MAX as usize);
258        assert_eq!(view.into_spans_reversed().len(), 2);
259    }
260
261    #[test]
262    fn test_vec_block_iterator_size_hint_single_block() {
263        let vec = vec![b'H', b'e', b'l', b'l', b'o', b',', b' ', b'w', b'o', b'r', b'l', b'd', b'!'];
264        let iterator = VecBlockIterator::new(vec);
265
266        let (min, max) = iterator.size_hint();
267        assert_eq!(min, 1);
268        assert_eq!(max, Some(1));
269    }
270
271    #[test]
272    fn test_vec_block_iterator_size_hint_multiple_blocks() {
273        // Create a vec that requires exactly 2 blocks
274        let size = (BlockSize::MAX as usize) + 1000;
275        let vec = vec![0u8; size];
276
277        let iterator = VecBlockIterator::new(vec);
278
279        let (min, max) = iterator.size_hint();
280        assert_eq!(min, 2);
281        assert_eq!(max, Some(2));
282    }
283
284    #[test]
285    fn test_vec_block_iterator_size_hint_empty() {
286        let vec = Vec::new();
287        let iterator = VecBlockIterator::new(vec);
288
289        let (min, max) = iterator.size_hint();
290        assert_eq!(min, 0);
291        assert_eq!(max, Some(0));
292    }
293
294    #[test]
295    fn test_vec_block_iterator_size_hint_exact_block_size() {
296        // Create a vec that is exactly one block size
297        let vec = vec![0u8; BlockSize::MAX as usize];
298
299        let iterator = VecBlockIterator::new(vec);
300
301        let (min, max) = iterator.size_hint();
302        assert_eq!(min, 1);
303        assert_eq!(max, Some(1));
304    }
305}