simd_lookup/bulk_vec_extender.rs
1//! [BulkVecExtender] is a simple utility trait that allows you to bulk extend a Vec<T>
2//! and return a `&mut [T]` slice that you can write to - much faster than individual `push()` calls,
3//! which has to check for both bounds and capacity.
4//!
5use std::ops::{Deref, DerefMut};
6use wide::u8x16;
7
8/// [BulkVecExtender] is a simple utility trait that allows you to bulk extend a Vec<T>
9/// and return a `&mut [T]` slice that you can write to - much faster than individual `push()` calls,
10/// which has to check for both bounds and capacity.
11///
12/// # Example
13///
14/// Instead of using `push()` in a hot loop (which is expensive due to bounds checks):
15///
16/// ```rust,ignore
17/// // Slow: each push() does bounds checking
18/// let mut vec = Vec::new();
19/// for value in some_data {
20/// if condition(value) {
21/// vec.push(value); // Bounds check on every call!
22/// }
23/// }
24/// ```
25///
26/// Use `bulk_extend_guard()` to get a RAII guard and write directly:
27///
28/// ```rust
29/// # use simd_lookup::bulk_vec_extender::BulkVecExtender;
30/// # let some_data = [1u8, 2, 3, 4, 5, 6, 7, 8];
31/// # let condition = |v: u8| v % 2 == 0;
32/// let mut vec = Vec::new();
33/// let max_elements = 100;
34///
35/// {
36/// // Get a guard to write to (no bounds checks during writes!)
37/// let mut guard = vec.bulk_extend_guard(max_elements);
38///
39/// // Write directly to the guard - much faster than push()
40/// let mut written = 0;
41/// for value in some_data.iter().take(max_elements) {
42/// if condition(*value) {
43/// guard[written] = *value;
44/// written += 1;
45/// }
46/// }
47///
48/// // Set actual number of elements written (only needed for partial writes)
49/// guard.set_written(written);
50/// // guard drops here, vec is automatically truncated to correct length
51/// }
52/// ```
53///
54/// # Performance
55///
56/// This trait is designed to eliminate the overhead of `Vec::push()` in hot loops:
57/// - **No bounds checking** during writes (you get a pre-allocated slice)
58/// - **Bulk allocation** happens once, not per-element
59/// - **Better for SIMD** - you can write entire SIMD vectors at once
60/// - **Cache-friendly** - sequential writes to a pre-allocated buffer
61///
62/// Benchmarks show `Vec::push()` can cost 35% of total performance in hot loops,
63/// even with pre-allocation. This trait eliminates that overhead.
64pub trait BulkVecExtender<T> {
65 /// Returns a RAII guard that extends the Vec and automatically finalizes on drop.
66 ///
67 /// This is the preferred method for most use cases as it avoids borrow checker issues
68 /// and automatically handles finalization.
69 ///
70 /// By default, assumes all elements will be written. If you write fewer elements,
71 /// call `guard.set_written(count)` before the guard drops.
72 ///
73 /// # Example
74 ///
75 /// ```rust
76 /// # use simd_lookup::bulk_vec_extender::BulkVecExtender;
77 /// let mut vec: Vec<u8> = Vec::new();
78 /// {
79 /// let mut guard = vec.bulk_extend_guard(10);
80 /// for i in 0..10 {
81 /// guard[i] = i as u8;
82 /// }
83 /// // guard drops here, vec length is automatically set to 10
84 /// }
85 /// assert_eq!(vec.len(), 10);
86 /// ```
87 ///
88 /// # Example with partial writes
89 ///
90 /// ```rust
91 /// # use simd_lookup::bulk_vec_extender::BulkVecExtender;
92 /// let mut vec: Vec<u8> = Vec::new();
93 /// {
94 /// let mut guard = vec.bulk_extend_guard(100);
95 /// guard[0] = 42;
96 /// guard[1] = 43;
97 /// guard.set_written(2); // only wrote 2 elements
98 /// }
99 /// assert_eq!(vec.len(), 2);
100 /// ```
101 fn bulk_extend_guard(&mut self, elements_to_write: usize) -> BulkExtendGuard<'_, T>;
102}
103
104/// RAII guard for bulk Vec extension. Automatically finalizes on drop.
105///
106/// When dropped, truncates the Vec to `original_len + written` elements.
107/// By default, `written` equals the requested extension size, so if you
108/// write all elements, you don't need to do anything special.
109///
110/// Use `set_written()` if you wrote fewer elements than the slice length.
111pub struct BulkExtendGuard<'a, T> {
112 vec: &'a mut Vec<T>,
113 original_len: usize,
114 extended_by: usize,
115 written: usize,
116}
117
118impl<'a, T> BulkExtendGuard<'a, T> {
119 /// Creates a new guard, extending the vec by `elements_to_write` elements.
120 #[inline(always)]
121 fn new(vec: &'a mut Vec<T>, elements_to_write: usize) -> Self {
122 let original_len = vec.len();
123 let new_len = original_len + elements_to_write;
124 vec.reserve(elements_to_write);
125 // Safety: we will finalize to the correct length on drop
126 unsafe {
127 vec.set_len(new_len);
128 }
129 Self {
130 vec,
131 original_len,
132 extended_by: elements_to_write,
133 written: elements_to_write, // default: assume all elements will be written
134 }
135 }
136
137 /// Set the actual number of elements written.
138 /// Call this if you wrote fewer elements than the slice length.
139 /// The count is capped to the extended size.
140 #[inline(always)]
141 pub fn set_written(&mut self, count: usize) {
142 self.written = count.min(self.extended_by);
143 }
144
145 /// Returns the extended region as a mutable slice.
146 /// This is a convenience method equivalent to `&mut *guard`.
147 #[inline(always)]
148 pub fn as_mut_slice(&mut self) -> &mut [T] {
149 &mut self.vec[self.original_len..]
150 }
151}
152
153impl<T> Deref for BulkExtendGuard<'_, T> {
154 type Target = [T];
155
156 #[inline(always)]
157 fn deref(&self) -> &Self::Target {
158 &self.vec[self.original_len..]
159 }
160}
161
162impl<T> DerefMut for BulkExtendGuard<'_, T> {
163 #[inline(always)]
164 fn deref_mut(&mut self) -> &mut Self::Target {
165 &mut self.vec[self.original_len..]
166 }
167}
168
169impl<T> Drop for BulkExtendGuard<'_, T> {
170 #[inline(always)]
171 fn drop(&mut self) {
172 self.vec.truncate(self.original_len + self.written);
173 }
174}
175
176impl<T> BulkVecExtender<T> for Vec<T> {
177 #[inline(always)]
178 fn bulk_extend_guard(&mut self, elements_to_write: usize) -> BulkExtendGuard<'_, T> {
179 BulkExtendGuard::new(self, elements_to_write)
180 }
181}
182
183/// Utility trait to help write u8 SIMD vectors into a mutable slice
184pub trait SliceU8SIMDExtender {
185 /// Writes slice_len bytes of the u8x16 into a u8 mut slice at index.
186 /// Panics if the slice does not have enough room (must have at least index+slice_len bytes).
187 fn write_u8x16(&mut self, index: usize, value: u8x16, slice_len: usize);
188}
189
190impl SliceU8SIMDExtender for &mut [u8] {
191 // TODO: consider using this optimized code if slice_len is 16. OTOH, it might not be worth it, because
192 // then we will need a branch, and needing a branch kind of kills the optimization.
193 //
194 // unsafe {
195 // let ptr = write_slices[slice_num].as_mut_ptr() as *mut u8x16;
196 // ptr.write_unaligned(combined);
197 // }
198 #[inline(always)]
199 fn write_u8x16(&mut self, index: usize, value: u8x16, slice_len: usize) {
200 self[index..index + slice_len].copy_from_slice(&value.to_array()[..slice_len]);
201 }
202}