simd_lookup/
bulk_vec_extender.rs

1//! [BulkVecExtender] is a simple utility trait that allows you to bulk extend a Vec<T>
2//! and return a `&mut [T]` slice that you can write to - much faster than individual `push()` calls,
3//! which has to check for both bounds and capacity.
4//!
5use std::ops::{Deref, DerefMut};
6use wide::u8x16;
7
8/// [BulkVecExtender] is a simple utility trait that allows you to bulk extend a Vec<T>
9/// and return a `&mut [T]` slice that you can write to - much faster than individual `push()` calls,
10/// which has to check for both bounds and capacity.
11///
12/// # Example
13///
14/// Instead of using `push()` in a hot loop (which is expensive due to bounds checks):
15///
16/// ```rust,ignore
17/// // Slow: each push() does bounds checking
18/// let mut vec = Vec::new();
19/// for value in some_data {
20///     if condition(value) {
21///         vec.push(value);  // Bounds check on every call!
22///     }
23/// }
24/// ```
25///
26/// Use `bulk_extend_guard()` to get a RAII guard and write directly:
27///
28/// ```rust
29/// # use simd_lookup::bulk_vec_extender::BulkVecExtender;
30/// # let some_data = [1u8, 2, 3, 4, 5, 6, 7, 8];
31/// # let condition = |v: u8| v % 2 == 0;
32/// let mut vec = Vec::new();
33/// let max_elements = 100;
34///
35/// {
36///     // Get a guard to write to (no bounds checks during writes!)
37///     let mut guard = vec.bulk_extend_guard(max_elements);
38///
39///     // Write directly to the guard - much faster than push()
40///     let mut written = 0;
41///     for value in some_data.iter().take(max_elements) {
42///         if condition(*value) {
43///             guard[written] = *value;
44///             written += 1;
45///         }
46///     }
47///
48///     // Set actual number of elements written (only needed for partial writes)
49///     guard.set_written(written);
50///     // guard drops here, vec is automatically truncated to correct length
51/// }
52/// ```
53///
54/// # Performance
55///
56/// This trait is designed to eliminate the overhead of `Vec::push()` in hot loops:
57/// - **No bounds checking** during writes (you get a pre-allocated slice)
58/// - **Bulk allocation** happens once, not per-element
59/// - **Better for SIMD** - you can write entire SIMD vectors at once
60/// - **Cache-friendly** - sequential writes to a pre-allocated buffer
61///
62/// Benchmarks show `Vec::push()` can cost 35% of total performance in hot loops,
63/// even with pre-allocation. This trait eliminates that overhead.
64pub trait BulkVecExtender<T> {
65    /// Returns a RAII guard that extends the Vec and automatically finalizes on drop.
66    ///
67    /// This is the preferred method for most use cases as it avoids borrow checker issues
68    /// and automatically handles finalization.
69    ///
70    /// By default, assumes all elements will be written. If you write fewer elements,
71    /// call `guard.set_written(count)` before the guard drops.
72    ///
73    /// # Example
74    ///
75    /// ```rust
76    /// # use simd_lookup::bulk_vec_extender::BulkVecExtender;
77    /// let mut vec: Vec<u8> = Vec::new();
78    /// {
79    ///     let mut guard = vec.bulk_extend_guard(10);
80    ///     for i in 0..10 {
81    ///         guard[i] = i as u8;
82    ///     }
83    ///     // guard drops here, vec length is automatically set to 10
84    /// }
85    /// assert_eq!(vec.len(), 10);
86    /// ```
87    ///
88    /// # Example with partial writes
89    ///
90    /// ```rust
91    /// # use simd_lookup::bulk_vec_extender::BulkVecExtender;
92    /// let mut vec: Vec<u8> = Vec::new();
93    /// {
94    ///     let mut guard = vec.bulk_extend_guard(100);
95    ///     guard[0] = 42;
96    ///     guard[1] = 43;
97    ///     guard.set_written(2);  // only wrote 2 elements
98    /// }
99    /// assert_eq!(vec.len(), 2);
100    /// ```
101    fn bulk_extend_guard(&mut self, elements_to_write: usize) -> BulkExtendGuard<'_, T>;
102}
103
104/// RAII guard for bulk Vec extension. Automatically finalizes on drop.
105///
106/// When dropped, truncates the Vec to `original_len + written` elements.
107/// By default, `written` equals the requested extension size, so if you
108/// write all elements, you don't need to do anything special.
109///
110/// Use `set_written()` if you wrote fewer elements than the slice length.
111pub struct BulkExtendGuard<'a, T> {
112    vec: &'a mut Vec<T>,
113    original_len: usize,
114    extended_by: usize,
115    written: usize,
116}
117
118impl<'a, T> BulkExtendGuard<'a, T> {
119    /// Creates a new guard, extending the vec by `elements_to_write` elements.
120    #[inline(always)]
121    fn new(vec: &'a mut Vec<T>, elements_to_write: usize) -> Self {
122        let original_len = vec.len();
123        let new_len = original_len + elements_to_write;
124        vec.reserve(elements_to_write);
125        // Safety: we will finalize to the correct length on drop
126        unsafe {
127            vec.set_len(new_len);
128        }
129        Self {
130            vec,
131            original_len,
132            extended_by: elements_to_write,
133            written: elements_to_write, // default: assume all elements will be written
134        }
135    }
136
137    /// Set the actual number of elements written.
138    /// Call this if you wrote fewer elements than the slice length.
139    /// The count is capped to the extended size.
140    #[inline(always)]
141    pub fn set_written(&mut self, count: usize) {
142        self.written = count.min(self.extended_by);
143    }
144
145    /// Returns the extended region as a mutable slice.
146    /// This is a convenience method equivalent to `&mut *guard`.
147    #[inline(always)]
148    pub fn as_mut_slice(&mut self) -> &mut [T] {
149        &mut self.vec[self.original_len..]
150    }
151}
152
153impl<T> Deref for BulkExtendGuard<'_, T> {
154    type Target = [T];
155
156    #[inline(always)]
157    fn deref(&self) -> &Self::Target {
158        &self.vec[self.original_len..]
159    }
160}
161
162impl<T> DerefMut for BulkExtendGuard<'_, T> {
163    #[inline(always)]
164    fn deref_mut(&mut self) -> &mut Self::Target {
165        &mut self.vec[self.original_len..]
166    }
167}
168
169impl<T> Drop for BulkExtendGuard<'_, T> {
170    #[inline(always)]
171    fn drop(&mut self) {
172        self.vec.truncate(self.original_len + self.written);
173    }
174}
175
176impl<T> BulkVecExtender<T> for Vec<T> {
177    #[inline(always)]
178    fn bulk_extend_guard(&mut self, elements_to_write: usize) -> BulkExtendGuard<'_, T> {
179        BulkExtendGuard::new(self, elements_to_write)
180    }
181}
182
183/// Utility trait to help write u8 SIMD vectors into a mutable slice
184pub trait SliceU8SIMDExtender {
185    /// Writes slice_len bytes of the u8x16 into a u8 mut slice at index.
186    /// Panics if the slice does not have enough room (must have at least index+slice_len bytes).
187    fn write_u8x16(&mut self, index: usize, value: u8x16, slice_len: usize);
188}
189
190impl SliceU8SIMDExtender for &mut [u8] {
191    // TODO: consider using this optimized code if slice_len is 16.  OTOH, it might not be worth it, because
192    //  then we will need a branch, and needing a branch kind of kills the optimization.
193    //
194    // unsafe {
195    //     let ptr = write_slices[slice_num].as_mut_ptr() as *mut u8x16;
196    //     ptr.write_unaligned(combined);
197    // }
198    #[inline(always)]
199    fn write_u8x16(&mut self, index: usize, value: u8x16, slice_len: usize) {
200        self[index..index + slice_len].copy_from_slice(&value.to_array()[..slice_len]);
201    }
202}