Skip to main content

cell_sheet_core/formula/
functions.rs

1use crate::model::{CellError, CellValue};
2
3fn collect_numbers(values: &[CellValue]) -> Result<Vec<f64>, CellError> {
4    let mut nums = Vec::new();
5    for v in values {
6        match v {
7            CellValue::Number(n) => nums.push(*n),
8            CellValue::Error(e) => return Err(e.clone()),
9            CellValue::Empty => {}
10            CellValue::Text(_) => {}
11            CellValue::Bool(_) => {}
12        }
13    }
14    Ok(nums)
15}
16
17/// Neumaier's improved Kahan compensated summation.
18///
19/// Unlike naive accumulation, it captures the rounding error lost when a
20/// large and a small value are added, so it handles catastrophic cancellation
21/// (e.g. `[1e16, 1.0, -1e16]`) and long sequences of small floats correctly.
22fn neumaier_sum(nums: &[f64]) -> f64 {
23    let mut sum = 0.0f64;
24    let mut c = 0.0f64;
25    for &x in nums {
26        let t = sum + x;
27        c += if sum.abs() >= x.abs() {
28            (sum - t) + x
29        } else {
30            (x - t) + sum
31        };
32        sum = t;
33    }
34    sum + c
35}
36
37pub fn fn_sum(values: &[CellValue]) -> CellValue {
38    match collect_numbers(values) {
39        Ok(nums) => CellValue::Number(neumaier_sum(&nums)),
40        Err(e) => CellValue::Error(e),
41    }
42}
43
44pub fn fn_average(values: &[CellValue]) -> CellValue {
45    match collect_numbers(values) {
46        Ok(nums) if nums.is_empty() => CellValue::Error(CellError::DivZero),
47        // Reuse the compensated sum so the mean inherits its accuracy;
48        // a naive sum/n approach has the same catastrophic-cancellation
49        // problem when individual values are very large.
50        Ok(nums) => CellValue::Number(neumaier_sum(&nums) / nums.len() as f64),
51        Err(e) => CellValue::Error(e),
52    }
53}
54
55pub fn fn_count(values: &[CellValue]) -> CellValue {
56    match collect_numbers(values) {
57        Ok(nums) => CellValue::Number(nums.len() as f64),
58        Err(e) => CellValue::Error(e),
59    }
60}
61
62pub fn fn_min(values: &[CellValue]) -> CellValue {
63    match collect_numbers(values) {
64        Ok(nums) if nums.is_empty() => CellValue::Number(0.0),
65        Ok(nums) => CellValue::Number(nums.iter().cloned().fold(f64::INFINITY, f64::min)),
66        Err(e) => CellValue::Error(e),
67    }
68}
69
70pub fn fn_max(values: &[CellValue]) -> CellValue {
71    match collect_numbers(values) {
72        Ok(nums) if nums.is_empty() => CellValue::Number(0.0),
73        Ok(nums) => CellValue::Number(nums.iter().cloned().fold(f64::NEG_INFINITY, f64::max)),
74        Err(e) => CellValue::Error(e),
75    }
76}
77
78pub fn fn_if(args: &[CellValue]) -> CellValue {
79    if args.len() != 3 {
80        return CellValue::Error(CellError::Value);
81    }
82    match &args[0] {
83        CellValue::Bool(true) => args[1].clone(),
84        CellValue::Bool(false) => args[2].clone(),
85        CellValue::Number(n) => {
86            if *n != 0.0 {
87                args[1].clone()
88            } else {
89                args[2].clone()
90            }
91        }
92        CellValue::Error(e) => CellValue::Error(e.clone()),
93        _ => CellValue::Error(CellError::Value),
94    }
95}
96
97#[cfg(test)]
98mod tests {
99    use super::*;
100
101    fn num(n: f64) -> CellValue {
102        CellValue::Number(n)
103    }
104
105    fn extract_number(v: CellValue) -> f64 {
106        match v {
107            CellValue::Number(n) => n,
108            other => panic!("expected Number, got {:?}", other),
109        }
110    }
111
112    // SUM: catastrophic cancellation — naive accumulation loses 1.0 entirely
113    // because 1e16 + 1.0 rounds to 1e16 in f64.
114    #[test]
115    fn sum_catastrophic_cancellation_is_accurate() {
116        let values = vec![num(1e16), num(1.0), num(-1e16)];
117        assert_eq!(fn_sum(&values), CellValue::Number(1.0));
118    }
119
120    // SUM: long sequence — 10_000 × 0.1; the tight tolerance (1e-10)
121    // catches naive per-step drift that Neumaier summation avoids.
122    #[test]
123    fn sum_long_sequence_of_small_floats_is_accurate() {
124        let values: Vec<CellValue> = std::iter::repeat_n(num(0.1), 10_000).collect();
125        let result = extract_number(fn_sum(&values));
126        assert!(
127            (result - 1000.0).abs() < 1e-10,
128            "SUM error too large: |{result} - 1000| = {}",
129            (result - 1000.0).abs()
130        );
131    }
132
133    // AVERAGE: catastrophic cancellation — naive sum = 0.0, so naive mean = 0.0
134    // instead of the correct 1/3.
135    #[test]
136    fn average_catastrophic_cancellation_is_accurate() {
137        let values = vec![num(1e16), num(1.0), num(-1e16)];
138        let result = extract_number(fn_average(&values));
139        let expected = 1.0 / 3.0;
140        assert!(
141            (result - expected).abs() < 1e-10,
142            "AVERAGE catastrophic error: |{result} - {expected}| = {}",
143            (result - expected).abs()
144        );
145    }
146
147    // AVERAGE: long sequence — compensated sum/n stays accurate without
148    // accumulating intermediate drift.
149    #[test]
150    fn average_long_sequence_of_small_floats_is_accurate() {
151        let values: Vec<CellValue> = std::iter::repeat_n(num(0.1), 10_000).collect();
152        let result = extract_number(fn_average(&values));
153        assert!(
154            (result - 0.1).abs() < 1e-14,
155            "AVERAGE error too large: |{result} - 0.1| = {}",
156            (result - 0.1).abs()
157        );
158    }
159}