compressed_intvec/fixed/atomic/
builder.rs

1//! # [`AtomicFixedVec`] Builder
2//!
3//! This module provides a builder for constructing an owned [`AtomicFixedVec`]
4//! from a slice of data (`&[T]`). It mirrors the functionality of the standard
5//! [`FixedVecBuilder`](crate::fixed::builder::FixedVecBuilder), allowing for
6//! automatic bit-width detection and a consistent API.
7//!
8//! # Examples
9//!
10//! ## Building from a slice
11//!
12//! ```rust
13//! use compressed_intvec::prelude::*;
14//! use compressed_intvec::fixed::{AtomicFixedVec, UAtomicFixedVec, BitWidth};
15//!
16//! let data: &[u32] = &[10, 20, 30, 40, 50];
17//!
18//! // The builder can infer the minimal bit width (6 bits for 50).
19//! let vec: UAtomicFixedVec<u32> = AtomicFixedVec::builder()
20//!     .build(data)
21//!     .unwrap();
22//!
23//! assert_eq!(vec.bit_width(), 6);
24//!
25//! // Or a specific strategy can be chosen.
26//! let vec_pow2: UAtomicFixedVec<u32> = AtomicFixedVec::builder()
27//!     .bit_width(BitWidth::PowerOfTwo)
28//!     .build(data)
29//!     .unwrap();
30//!
31//! assert_eq!(vec_pow2.bit_width(), 8);
32//! ```
33
34use crate::fixed::atomic::AtomicFixedVec;
35use crate::fixed::traits::Storable;
36use crate::fixed::{BitWidth, Error};
37use num_traits::ToPrimitive;
38use std::marker::PhantomData;
39use std::sync::atomic::AtomicU64;
40
41/// A builder for creating an [`AtomicFixedVec`] from a slice of integers.
42///
43/// This builder analyzes a slice to determine the `bit_width` based on
44/// the selected [`BitWidth`] strategy and then constructs the thread-safe vector.
45///
46/// # Example
47///
48/// ```
49/// use compressed_intvec::prelude::*;
50/// use compressed_intvec::fixed::{AtomicFixedVec, UAtomicFixedVec, BitWidth};
51///
52/// let data: &[u32] = &[10, 20, 30, 40, 50];
53///
54/// // The builder can infer the minimal bit width (6 bits for 50).
55/// let vec: UAtomicFixedVec<u32> = AtomicFixedVec::builder()
56///     .build(data)
57///     .unwrap();
58///
59/// assert_eq!(vec.bit_width(), 6);
60///
61/// // Or a specific strategy can be chosen.
62/// let vec_pow2: UAtomicFixedVec<u32> = AtomicFixedVec::builder()
63///     .bit_width(BitWidth::PowerOfTwo)
64///     .build(data)
65///     .unwrap();
66///
67/// assert_eq!(vec_pow2.bit_width(), 8);
68///
69/// // You can also specify an explicit bit width.
70/// let vec_explicit: UAtomicFixedVec<u32> = AtomicFixedVec::builder()
71///    .bit_width(BitWidth::Explicit(10))
72///    .build(data)
73///    .unwrap();
74///
75/// assert_eq!(vec_explicit.bit_width(), 10);
76/// ```
77#[derive(Debug, Clone)]
78pub struct AtomicFixedVecBuilder<T: Storable<u64>> {
79    bit_width_strategy: BitWidth,
80    _phantom: PhantomData<T>,
81}
82
83impl<T> Default for AtomicFixedVecBuilder<T>
84where
85    T: Storable<u64>,
86{
87    fn default() -> Self {
88        Self {
89            bit_width_strategy: BitWidth::default(),
90            _phantom: PhantomData,
91        }
92    }
93}
94
95impl<T> AtomicFixedVecBuilder<T>
96where
97    T: Storable<u64> + Copy + ToPrimitive,
98{
99    /// Creates a new builder with the default [`BitWidth::Minimal`] strategy.
100    pub fn new() -> Self {
101        Self::default()
102    }
103
104    /// Sets the strategy for determining the number of bits for each element.
105    ///
106    /// This can be one of:
107    /// - [`BitWidth::Minimal`]: Automatically determines the minimal bit width needed.
108    /// - [`BitWidth::PowerOfTwo`]: Rounds up to the next power of two.
109    /// - [`BitWidth::Explicit`]: Uses the specified number of bits explicitly.
110    ///
111    /// # Note
112    ///
113    /// Choosing [`BitWidth::Minimal`] and [`BitWidth::PowerOfTwo`] introduces a one-time overhead
114    /// at construction time to find the maximum value in the input slice.
115    ///
116    /// # Performance
117    ///
118    /// Choosing [`BitWidth::PowerOfTwo`] allows for true atomic operations on the vector, resulting in
119    /// better performance for concurrent access patterns. However, it may use more space than necessary.
120    pub fn bit_width(mut self, strategy: BitWidth) -> Self {
121        self.bit_width_strategy = strategy;
122        self
123    }
124
125    /// Builds the [`AtomicFixedVec`] from a slice of data.
126    ///
127    /// This method consumes the builder and returns a new [`AtomicFixedVec`].
128    /// The initial data is populated using non-atomic writes, as the vector
129    /// is not yet shared during construction.
130    ///
131    /// # Errors
132    ///
133    /// Returns an [`Error`] if a value in the input is too large for the
134    /// specified `bit_width`, or if the parameters are invalid.
135    pub fn build(self, input: &[T]) -> Result<AtomicFixedVec<T>, Error> {
136        let bits_per_word = u64::BITS as usize;
137
138        // Determine the final bit width based on the chosen strategy.
139        let final_bit_width = match self.bit_width_strategy {
140            BitWidth::Explicit(n) => n,
141            _ => {
142                let max_val: u64 = input
143                    .iter()
144                    .map(|&val| T::into_word(val))
145                    .max()
146                    .unwrap_or(0);
147
148                let min_bits = if max_val == 0 {
149                    1
150                } else {
151                    bits_per_word - max_val.leading_zeros() as usize
152                };
153
154                match self.bit_width_strategy {
155                    BitWidth::Minimal => min_bits,
156                    BitWidth::PowerOfTwo => min_bits.next_power_of_two().min(bits_per_word),
157                    BitWidth::Explicit(_) => unreachable!(),
158                }
159            }
160        };
161
162        if !input.is_empty() && final_bit_width == 0 {
163            return Err(Error::InvalidParameters(
164                "bit_width cannot be zero for a non-empty vector".to_string(),
165            ));
166        }
167
168        // Create a new, zero-initialized AtomicFixedVec.
169        let mut atomic_vec = AtomicFixedVec::new(final_bit_width, input.len())?;
170
171        if input.is_empty() {
172            return Ok(atomic_vec);
173        }
174
175        // --- Initial data population using non-atomic writes ---
176        let limit = if final_bit_width < bits_per_word {
177            1u64 << final_bit_width
178        } else {
179            u64::MAX
180        };
181
182        for (i, &value_t) in input.iter().enumerate() {
183            let value_w = T::into_word(value_t);
184            if final_bit_width < bits_per_word && value_w >= limit {
185                return Err(Error::ValueTooLarge {
186                    value: value_w as u128,
187                    index: i,
188                    bit_width: final_bit_width,
189                });
190            }
191            // SAFETY: We are writing within the allocated bounds. The vector is
192            // not shared at this point, so non-atomic writes are safe and correct.
193            unsafe {
194                set_unchecked_non_atomic(
195                    &mut atomic_vec.storage,
196                    i,
197                    value_w,
198                    final_bit_width,
199                    atomic_vec.mask,
200                );
201            }
202        }
203
204        Ok(atomic_vec)
205    }
206}
207
208/// A non-atomic, unsafe helper to set a value in a slice of [`AtomicU64`].
209/// This is only used during the initial construction of the vector.
210///
211/// # Safety
212/// The caller must ensure that `index` is within bounds and that `value_w`
213/// fits within the configured `bit_width`.
214unsafe fn set_unchecked_non_atomic(
215    limbs: &mut [AtomicU64],
216    index: usize,
217    value: u64,
218    bit_width: usize,
219    mask: u64,
220) {
221    let bits_per_word = u64::BITS as usize;
222    let bit_pos = index * bit_width;
223    let word_index = bit_pos / bits_per_word;
224    let bit_offset = bit_pos % bits_per_word;
225
226    // Use `get_mut()` to get a mutable reference to the underlying `u64`.
227    // This is safe because we have exclusive access to the vector during building.
228    if bit_offset + bit_width <= bits_per_word {
229        let word = limbs.get_unchecked_mut(word_index).get_mut();
230        *word &= !(mask << bit_offset);
231        *word |= value << bit_offset;
232    } else {
233        // The value spans two words.
234        let low_word_ptr = limbs.as_mut_ptr().add(word_index);
235        let high_word_ptr = limbs.as_mut_ptr().add(word_index + 1);
236
237        let low_word = (*low_word_ptr).get_mut();
238        let high_word = (*high_word_ptr).get_mut();
239
240        *low_word &= !(u64::MAX << bit_offset);
241        *low_word |= value << bit_offset;
242
243        let bits_in_high = (bit_offset + bit_width) - bits_per_word;
244        let high_mask = (1u64 << bits_in_high).wrapping_sub(1);
245        *high_word &= !high_mask;
246        *high_word |= value >> (bits_per_word - bit_offset);
247    }
248}