int-interval-set 0.3.4

Integer half-open interval set structures built on top of int-interval.
Documentation
use super::*;

use rayon::iter::{FromParallelIterator, IntoParallelIterator, ParallelIterator};

use crate::int_co_set::funcs_for_canonicalization::{is_canonical, merge, normalize};

const BATCH_SIZE: usize = 128;

impl<I: IntCO> IntCOSet<I> {
    /// Builds a set from an already canonical interval vector.
    ///
    /// # Safety
    ///
    /// The caller must guarantee that `intervals` is canonical:
    ///
    /// - intervals are sorted by ascending `start`;
    /// - intervals are non-overlapping;
    /// - contiguous intervals have already been merged;
    /// - therefore, for every adjacent pair `a, b`,
    ///   `a.end_excl() < b.start()` holds.
    ///
    /// Violating this invariant can make binary-search based queries
    /// return incorrect results.
    #[inline]
    pub unsafe fn new_unchecked(intervals: Vec<I>) -> Self {
        debug_assert!(is_canonical(&intervals));

        Self {
            intervals: Arc::from(intervals.into_boxed_slice()),
        }
    }
}

impl<I: IntCO> Default for IntCOSet<I> {
    fn default() -> Self {
        Self {
            intervals: Arc::new([]),
        }
    }
}

impl<I: IntCO> FromIterator<I> for IntCOSet<I> {
    fn from_iter<T>(iter: T) -> Self
    where
        T: IntoIterator<Item = I>,
    {
        let mut batch = Vec::with_capacity(BATCH_SIZE);
        let mut reduced = Vec::new();

        for iv in iter {
            batch.push(iv);

            if batch.len() == BATCH_SIZE {
                let run = normalize(std::mem::replace(
                    &mut batch,
                    Vec::with_capacity(BATCH_SIZE),
                ));

                reduced = merge(reduced, run);
            }
        }

        if !batch.is_empty() {
            reduced = merge(reduced, normalize(batch));
        }

        // SAFETY:
        // Each normalized batch is canonical. `merge` preserves sorted,
        // non-overlapping, and non-adjacent interval invariants.
        unsafe { IntCOSet::new_unchecked(reduced) }
    }
}

impl<I> FromParallelIterator<I> for IntCOSet<I>
where
    I: IntCO + Send,
{
    fn from_par_iter<T>(iter: T) -> Self
    where
        T: IntoParallelIterator<Item = I>,
    {
        let reduced = iter
            .into_par_iter()
            .fold(
                || (Vec::with_capacity(BATCH_SIZE), Vec::<I>::new()),
                |(mut batch, mut reduced), iv| {
                    batch.push(iv);

                    if batch.len() == BATCH_SIZE {
                        let run = normalize(std::mem::replace(
                            &mut batch,
                            Vec::with_capacity(BATCH_SIZE),
                        ));

                        reduced = merge(reduced, run);
                    }

                    (batch, reduced)
                },
            )
            .map(|(batch, reduced)| {
                if batch.is_empty() {
                    reduced
                } else {
                    merge(reduced, normalize(batch))
                }
            })
            .reduce(Vec::new, merge);

        // SAFETY:
        // Every parallel fold result is canonical, and `merge` preserves the
        // canonical interval invariant during reduction.
        unsafe { IntCOSet::new_unchecked(reduced) }
    }
}

#[cfg(test)]
mod tests_for_from_iter;
#[cfg(test)]
mod tests_for_from_par_iter;