virtual-frame 0.1.1

Deterministic data pipeline toolkit for LLM training — bitmask-filtered virtual views, NFA regex, Kahan summation, full audit trail. Python bindings included.
Documentation
//! Kahan compensated summation — bit-identical results regardless of platform.
//!
//! Every floating-point reduction in this library uses Kahan summation to
//! guarantee deterministic results. The compensation term captures rounding
//! error that naive left-to-right summation would lose.

/// Kahan compensated accumulator for f64 values.
///
/// Maintains a running sum and a compensation term on the stack.
/// The result is bit-identical for identical input sequences.
#[derive(Debug, Clone)]
pub struct KahanAccumulator {
    sum: f64,
    comp: f64,
    count: usize,
}

impl KahanAccumulator {
    /// Create a zero-initialized accumulator.
    #[inline]
    pub fn new() -> Self {
        Self {
            sum: 0.0,
            comp: 0.0,
            count: 0,
        }
    }

    /// Add a single value with Kahan compensation.
    #[inline]
    pub fn add(&mut self, value: f64) {
        self.count += 1;
        let y = value - self.comp;
        let t = self.sum + y;
        self.comp = (t - self.sum) - y;
        self.sum = t;
    }

    /// Add a slice of values.
    pub fn add_slice(&mut self, values: &[f64]) {
        for &v in values {
            self.add(v);
        }
    }

    /// Return the accumulated sum, including the final compensation residual.
    #[inline]
    pub fn finalize(&self) -> f64 {
        self.sum - self.comp
    }

    /// Return the number of values added.
    #[inline]
    pub fn count(&self) -> usize {
        self.count
    }
}

impl Default for KahanAccumulator {
    fn default() -> Self {
        Self::new()
    }
}

/// One-shot Kahan summation for a slice of f64 values.
pub fn kahan_sum(values: &[f64]) -> f64 {
    let mut acc = KahanAccumulator::new();
    acc.add_slice(values);
    acc.finalize()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_kahan_compensates() {
        // Sum 10 million copies of 0.1.
        // Naive summation accumulates rounding error; Kahan stays accurate.
        let mut acc = KahanAccumulator::new();
        let n = 10_000_000;
        for _ in 0..n {
            acc.add(0.1);
        }
        let result = acc.finalize();
        let expected = 1_000_000.0;
        // Kahan should be within 1 ULP of the expected value
        assert!(
            (result - expected).abs() < 1e-6,
            "Kahan result {} should be close to {}",
            result,
            expected
        );
        // Verify it's better than naive
        let naive: f64 = (0..n).map(|_| 0.1_f64).fold(0.0, |a, b| a + b);
        assert!(
            (result - expected).abs() <= (naive - expected).abs(),
            "Kahan ({}) should be at least as accurate as naive ({})",
            result,
            naive
        );
    }

    #[test]
    fn test_kahan_determinism() {
        let values: Vec<f64> = (0..1000).map(|i| (i as f64) * 0.001).collect();
        let r1 = kahan_sum(&values);
        let r2 = kahan_sum(&values);
        assert_eq!(r1.to_bits(), r2.to_bits());
    }

    #[test]
    fn test_kahan_count() {
        let mut acc = KahanAccumulator::new();
        acc.add(1.0);
        acc.add(2.0);
        acc.add(3.0);
        assert_eq!(acc.count(), 3);
        assert_eq!(acc.finalize(), 6.0);
    }
}