compressed_intvec/fixed/atomic/builder.rs
1//! # [`AtomicFixedVec`] Builder
2//!
3//! This module provides a builder for constructing an owned [`AtomicFixedVec`]
4//! from a slice of data (`&[T]`). It mirrors the functionality of the standard
5//! [`FixedVecBuilder`](crate::fixed::builder::FixedVecBuilder), allowing for
6//! automatic bit-width detection and a consistent API.
7//!
8//! # Examples
9//!
10//! ## Building from a slice
11//!
12//! ```rust
13//! use compressed_intvec::prelude::*;
14//! use compressed_intvec::fixed::{AtomicFixedVec, UAtomicFixedVec, BitWidth};
15//!
16//! let data: &[u32] = &[10, 20, 30, 40, 50];
17//!
18//! // The builder can infer the minimal bit width (6 bits for 50).
19//! let vec: UAtomicFixedVec<u32> = AtomicFixedVec::builder()
20//! .build(data)
21//! .unwrap();
22//!
23//! assert_eq!(vec.bit_width(), 6);
24//!
25//! // Or a specific strategy can be chosen.
26//! let vec_pow2: UAtomicFixedVec<u32> = AtomicFixedVec::builder()
27//! .bit_width(BitWidth::PowerOfTwo)
28//! .build(data)
29//! .unwrap();
30//!
31//! assert_eq!(vec_pow2.bit_width(), 8);
32//! ```
33
34use crate::fixed::atomic::AtomicFixedVec;
35use crate::fixed::traits::Storable;
36use crate::fixed::{BitWidth, Error};
37use num_traits::ToPrimitive;
38use std::marker::PhantomData;
39use std::sync::atomic::AtomicU64;
40
41/// A builder for creating an [`AtomicFixedVec`] from a slice of integers.
42///
43/// This builder analyzes a slice to determine the `bit_width` based on
44/// the selected [`BitWidth`] strategy and then constructs the thread-safe vector.
45///
46/// # Example
47///
48/// ```
49/// use compressed_intvec::prelude::*;
50/// use compressed_intvec::fixed::{AtomicFixedVec, UAtomicFixedVec, BitWidth};
51///
52/// let data: &[u32] = &[10, 20, 30, 40, 50];
53///
54/// // The builder can infer the minimal bit width (6 bits for 50).
55/// let vec: UAtomicFixedVec<u32> = AtomicFixedVec::builder()
56/// .build(data)
57/// .unwrap();
58///
59/// assert_eq!(vec.bit_width(), 6);
60///
61/// // Or a specific strategy can be chosen.
62/// let vec_pow2: UAtomicFixedVec<u32> = AtomicFixedVec::builder()
63/// .bit_width(BitWidth::PowerOfTwo)
64/// .build(data)
65/// .unwrap();
66///
67/// assert_eq!(vec_pow2.bit_width(), 8);
68///
69/// // You can also specify an explicit bit width.
70/// let vec_explicit: UAtomicFixedVec<u32> = AtomicFixedVec::builder()
71/// .bit_width(BitWidth::Explicit(10))
72/// .build(data)
73/// .unwrap();
74///
75/// assert_eq!(vec_explicit.bit_width(), 10);
76/// ```
77#[derive(Debug, Clone)]
78pub struct AtomicFixedVecBuilder<T: Storable<u64>> {
79 bit_width_strategy: BitWidth,
80 _phantom: PhantomData<T>,
81}
82
83impl<T> Default for AtomicFixedVecBuilder<T>
84where
85 T: Storable<u64>,
86{
87 fn default() -> Self {
88 Self {
89 bit_width_strategy: BitWidth::default(),
90 _phantom: PhantomData,
91 }
92 }
93}
94
95impl<T> AtomicFixedVecBuilder<T>
96where
97 T: Storable<u64> + Copy + ToPrimitive,
98{
99 /// Creates a new builder with the default [`BitWidth::Minimal`] strategy.
100 pub fn new() -> Self {
101 Self::default()
102 }
103
104 /// Sets the strategy for determining the number of bits for each element.
105 ///
106 /// This can be one of:
107 /// - [`BitWidth::Minimal`]: Automatically determines the minimal bit width needed.
108 /// - [`BitWidth::PowerOfTwo`]: Rounds up to the next power of two.
109 /// - [`BitWidth::Explicit`]: Uses the specified number of bits explicitly.
110 ///
111 /// # Note
112 ///
113 /// Choosing [`BitWidth::Minimal`] and [`BitWidth::PowerOfTwo`] introduces a one-time overhead
114 /// at construction time to find the maximum value in the input slice.
115 ///
116 /// # Performance
117 ///
118 /// Choosing [`BitWidth::PowerOfTwo`] allows for true atomic operations on the vector, resulting in
119 /// better performance for concurrent access patterns. However, it may use more space than necessary.
120 pub fn bit_width(mut self, strategy: BitWidth) -> Self {
121 self.bit_width_strategy = strategy;
122 self
123 }
124
125 /// Builds the [`AtomicFixedVec`] from a slice of data.
126 ///
127 /// This method consumes the builder and returns a new [`AtomicFixedVec`].
128 /// The initial data is populated using non-atomic writes, as the vector
129 /// is not yet shared during construction.
130 ///
131 /// # Errors
132 ///
133 /// Returns an [`Error`] if a value in the input is too large for the
134 /// specified `bit_width`, or if the parameters are invalid.
135 pub fn build(self, input: &[T]) -> Result<AtomicFixedVec<T>, Error> {
136 let bits_per_word = u64::BITS as usize;
137
138 // Determine the final bit width based on the chosen strategy.
139 let final_bit_width = match self.bit_width_strategy {
140 BitWidth::Explicit(n) => n,
141 _ => {
142 let max_val: u64 = input
143 .iter()
144 .map(|&val| T::into_word(val))
145 .max()
146 .unwrap_or(0);
147
148 let min_bits = if max_val == 0 {
149 1
150 } else {
151 bits_per_word - max_val.leading_zeros() as usize
152 };
153
154 match self.bit_width_strategy {
155 BitWidth::Minimal => min_bits,
156 BitWidth::PowerOfTwo => min_bits.next_power_of_two().min(bits_per_word),
157 BitWidth::Explicit(_) => unreachable!(),
158 }
159 }
160 };
161
162 if !input.is_empty() && final_bit_width == 0 {
163 return Err(Error::InvalidParameters(
164 "bit_width cannot be zero for a non-empty vector".to_string(),
165 ));
166 }
167
168 // Create a new, zero-initialized AtomicFixedVec.
169 let mut atomic_vec = AtomicFixedVec::new(final_bit_width, input.len())?;
170
171 if input.is_empty() {
172 return Ok(atomic_vec);
173 }
174
175 // --- Initial data population using non-atomic writes ---
176 let limit = if final_bit_width < bits_per_word {
177 1u64 << final_bit_width
178 } else {
179 u64::MAX
180 };
181
182 for (i, &value_t) in input.iter().enumerate() {
183 let value_w = T::into_word(value_t);
184 if final_bit_width < bits_per_word && value_w >= limit {
185 return Err(Error::ValueTooLarge {
186 value: value_w as u128,
187 index: i,
188 bit_width: final_bit_width,
189 });
190 }
191 // SAFETY: We are writing within the allocated bounds. The vector is
192 // not shared at this point, so non-atomic writes are safe and correct.
193 unsafe {
194 set_unchecked_non_atomic(
195 &mut atomic_vec.storage,
196 i,
197 value_w,
198 final_bit_width,
199 atomic_vec.mask,
200 );
201 }
202 }
203
204 Ok(atomic_vec)
205 }
206}
207
208/// A non-atomic, unsafe helper to set a value in a slice of [`AtomicU64`].
209/// This is only used during the initial construction of the vector.
210///
211/// # Safety
212/// The caller must ensure that `index` is within bounds and that `value_w`
213/// fits within the configured `bit_width`.
214unsafe fn set_unchecked_non_atomic(
215 limbs: &mut [AtomicU64],
216 index: usize,
217 value: u64,
218 bit_width: usize,
219 mask: u64,
220) {
221 let bits_per_word = u64::BITS as usize;
222 let bit_pos = index * bit_width;
223 let word_index = bit_pos / bits_per_word;
224 let bit_offset = bit_pos % bits_per_word;
225
226 // Use `get_mut()` to get a mutable reference to the underlying `u64`.
227 // This is safe because we have exclusive access to the vector during building.
228 if bit_offset + bit_width <= bits_per_word {
229 let word = limbs.get_unchecked_mut(word_index).get_mut();
230 *word &= !(mask << bit_offset);
231 *word |= value << bit_offset;
232 } else {
233 // The value spans two words.
234 let low_word_ptr = limbs.as_mut_ptr().add(word_index);
235 let high_word_ptr = limbs.as_mut_ptr().add(word_index + 1);
236
237 let low_word = (*low_word_ptr).get_mut();
238 let high_word = (*high_word_ptr).get_mut();
239
240 *low_word &= !(u64::MAX << bit_offset);
241 *low_word |= value << bit_offset;
242
243 let bits_in_high = (bit_offset + bit_width) - bits_per_word;
244 let high_mask = (1u64 << bits_in_high).wrapping_sub(1);
245 *high_word &= !high_mask;
246 *high_word |= value >> (bits_per_word - bit_offset);
247 }
248}