Skip to main content

sheetkit_core/formula/functions/
statistical.rs

1//! Statistical formula functions: AVERAGEIF, AVERAGEIFS, COUNTBLANK, COUNTIF,
2//! COUNTIFS, MEDIAN, MODE, LARGE, SMALL, RANK.
3
4use crate::cell::CellValue;
5use crate::error::Result;
6use crate::formula::ast::Expr;
7use crate::formula::eval::{coerce_to_number, coerce_to_string, Evaluator};
8use crate::formula::functions::{check_arg_count, collect_criteria_range_values, matches_criteria};
9
10/// AVERAGEIF(range, criteria, [average_range])
11pub fn fn_averageif(args: &[Expr], ctx: &mut Evaluator) -> Result<CellValue> {
12    check_arg_count("AVERAGEIF", args, 2, 3)?;
13    let range_vals = collect_criteria_range_values(&args[0], ctx)?;
14    let criteria_val = ctx.eval_expr(&args[1])?;
15    let criteria = coerce_to_string(&criteria_val);
16    let avg_vals = if args.len() == 3 {
17        collect_criteria_range_values(&args[2], ctx)?
18    } else {
19        range_vals.clone()
20    };
21    let mut sum = 0.0;
22    let mut count = 0u64;
23    for (i, rv) in range_vals.iter().enumerate() {
24        if matches_criteria(rv, &criteria) {
25            if let Some(sv) = avg_vals.get(i) {
26                if let Ok(n) = coerce_to_number(sv) {
27                    sum += n;
28                    count += 1;
29                }
30            }
31        }
32    }
33    if count == 0 {
34        return Ok(CellValue::Error("#DIV/0!".to_string()));
35    }
36    Ok(CellValue::Number(sum / count as f64))
37}
38
39/// AVERAGEIFS(average_range, criteria_range1, criteria1, ...)
40pub fn fn_averageifs(args: &[Expr], ctx: &mut Evaluator) -> Result<CellValue> {
41    check_arg_count("AVERAGEIFS", args, 3, 255)?;
42    if !(args.len() - 1).is_multiple_of(2) {
43        return Ok(CellValue::Error("#VALUE!".to_string()));
44    }
45    let avg_vals = collect_criteria_range_values(&args[0], ctx)?;
46    let pair_count = (args.len() - 1) / 2;
47    let mut criteria_ranges: Vec<Vec<CellValue>> = Vec::with_capacity(pair_count);
48    let mut criteria_strings: Vec<String> = Vec::with_capacity(pair_count);
49    for i in 0..pair_count {
50        let range_vals = collect_criteria_range_values(&args[1 + i * 2], ctx)?;
51        let crit_val = ctx.eval_expr(&args[2 + i * 2])?;
52        criteria_ranges.push(range_vals);
53        criteria_strings.push(coerce_to_string(&crit_val));
54    }
55    let mut sum = 0.0;
56    let mut count = 0u64;
57    for (idx, sv) in avg_vals.iter().enumerate() {
58        let all_match =
59            criteria_ranges
60                .iter()
61                .zip(criteria_strings.iter())
62                .all(|(range_vals, crit)| {
63                    range_vals
64                        .get(idx)
65                        .is_some_and(|rv| matches_criteria(rv, crit))
66                });
67        if all_match {
68            if let Ok(n) = coerce_to_number(sv) {
69                sum += n;
70                count += 1;
71            }
72        }
73    }
74    if count == 0 {
75        return Ok(CellValue::Error("#DIV/0!".to_string()));
76    }
77    Ok(CellValue::Number(sum / count as f64))
78}
79
80/// COUNTBLANK(range) - count empty cells in a range
81pub fn fn_countblank(args: &[Expr], ctx: &mut Evaluator) -> Result<CellValue> {
82    check_arg_count("COUNTBLANK", args, 1, 1)?;
83    let values = collect_criteria_range_values(&args[0], ctx)?;
84    let count = values
85        .iter()
86        .filter(|v| matches!(v, CellValue::Empty))
87        .count();
88    Ok(CellValue::Number(count as f64))
89}
90
91/// COUNTIF(range, criteria)
92pub fn fn_countif(args: &[Expr], ctx: &mut Evaluator) -> Result<CellValue> {
93    check_arg_count("COUNTIF", args, 2, 2)?;
94    let range_vals = collect_criteria_range_values(&args[0], ctx)?;
95    let criteria_val = ctx.eval_expr(&args[1])?;
96    let criteria = coerce_to_string(&criteria_val);
97    let count = range_vals
98        .iter()
99        .filter(|rv| matches_criteria(rv, &criteria))
100        .count();
101    Ok(CellValue::Number(count as f64))
102}
103
104/// COUNTIFS(range1, criteria1, range2, criteria2, ...)
105pub fn fn_countifs(args: &[Expr], ctx: &mut Evaluator) -> Result<CellValue> {
106    check_arg_count("COUNTIFS", args, 2, 255)?;
107    if !args.len().is_multiple_of(2) {
108        return Ok(CellValue::Error("#VALUE!".to_string()));
109    }
110    let pair_count = args.len() / 2;
111    let mut criteria_ranges: Vec<Vec<CellValue>> = Vec::with_capacity(pair_count);
112    let mut criteria_strings: Vec<String> = Vec::with_capacity(pair_count);
113    for i in 0..pair_count {
114        let range_vals = collect_criteria_range_values(&args[i * 2], ctx)?;
115        let crit_val = ctx.eval_expr(&args[i * 2 + 1])?;
116        criteria_ranges.push(range_vals);
117        criteria_strings.push(coerce_to_string(&crit_val));
118    }
119    let len = criteria_ranges.first().map_or(0, |r| r.len());
120    let mut count = 0u64;
121    for idx in 0..len {
122        let all_match =
123            criteria_ranges
124                .iter()
125                .zip(criteria_strings.iter())
126                .all(|(range_vals, crit)| {
127                    range_vals
128                        .get(idx)
129                        .is_some_and(|rv| matches_criteria(rv, crit))
130                });
131        if all_match {
132            count += 1;
133        }
134    }
135    Ok(CellValue::Number(count as f64))
136}
137
138/// MEDIAN(args...) - median of numeric values
139pub fn fn_median(args: &[Expr], ctx: &mut Evaluator) -> Result<CellValue> {
140    check_arg_count("MEDIAN", args, 1, 255)?;
141    let mut nums = ctx.collect_numbers(args)?;
142    if nums.is_empty() {
143        return Ok(CellValue::Error("#NUM!".to_string()));
144    }
145    nums.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
146    let n = nums.len();
147    let median = if n % 2 == 1 {
148        nums[n / 2]
149    } else {
150        (nums[n / 2 - 1] + nums[n / 2]) / 2.0
151    };
152    Ok(CellValue::Number(median))
153}
154
155/// MODE(args...) - most frequently occurring value
156pub fn fn_mode(args: &[Expr], ctx: &mut Evaluator) -> Result<CellValue> {
157    check_arg_count("MODE", args, 1, 255)?;
158    let nums = ctx.collect_numbers(args)?;
159    if nums.is_empty() {
160        return Ok(CellValue::Error("#N/A".to_string()));
161    }
162    let mut counts: std::collections::HashMap<u64, (f64, usize)> = std::collections::HashMap::new();
163    for &n in &nums {
164        let key = n.to_bits();
165        let entry = counts.entry(key).or_insert((n, 0));
166        entry.1 += 1;
167    }
168    let max_count = counts.values().map(|(_, c)| *c).max().unwrap_or(0);
169    if max_count <= 1 {
170        return Ok(CellValue::Error("#N/A".to_string()));
171    }
172    // Return the first value that has max_count occurrences (in order of appearance)
173    for &n in &nums {
174        let key = n.to_bits();
175        if let Some((_, c)) = counts.get(&key) {
176            if *c == max_count {
177                return Ok(CellValue::Number(n));
178            }
179        }
180    }
181    Ok(CellValue::Error("#N/A".to_string()))
182}
183
184/// LARGE(array, k) - k-th largest value
185pub fn fn_large(args: &[Expr], ctx: &mut Evaluator) -> Result<CellValue> {
186    check_arg_count("LARGE", args, 2, 2)?;
187    let mut nums = collect_criteria_range_values(&args[0], ctx)?
188        .iter()
189        .filter_map(|v| coerce_to_number(v).ok())
190        .collect::<Vec<f64>>();
191    let k = coerce_to_number(&ctx.eval_expr(&args[1])?)? as usize;
192    if k == 0 || k > nums.len() {
193        return Ok(CellValue::Error("#NUM!".to_string()));
194    }
195    nums.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
196    Ok(CellValue::Number(nums[k - 1]))
197}
198
199/// SMALL(array, k) - k-th smallest value
200pub fn fn_small(args: &[Expr], ctx: &mut Evaluator) -> Result<CellValue> {
201    check_arg_count("SMALL", args, 2, 2)?;
202    let mut nums = collect_criteria_range_values(&args[0], ctx)?
203        .iter()
204        .filter_map(|v| coerce_to_number(v).ok())
205        .collect::<Vec<f64>>();
206    let k = coerce_to_number(&ctx.eval_expr(&args[1])?)? as usize;
207    if k == 0 || k > nums.len() {
208        return Ok(CellValue::Error("#NUM!".to_string()));
209    }
210    nums.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
211    Ok(CellValue::Number(nums[k - 1]))
212}
213
214/// RANK(number, ref, [order]) - rank of a number in a list.
215/// order=0 or omitted: descending (largest=1). order=nonzero: ascending (smallest=1).
216pub fn fn_rank(args: &[Expr], ctx: &mut Evaluator) -> Result<CellValue> {
217    check_arg_count("RANK", args, 2, 3)?;
218    let number = coerce_to_number(&ctx.eval_expr(&args[0])?)?;
219    let ref_vals = collect_criteria_range_values(&args[1], ctx)?;
220    let order = if args.len() > 2 {
221        coerce_to_number(&ctx.eval_expr(&args[2])?)? as i64
222    } else {
223        0
224    };
225    let nums: Vec<f64> = ref_vals
226        .iter()
227        .filter_map(|v| coerce_to_number(v).ok())
228        .collect();
229    let rank = if order == 0 {
230        // Descending: count how many are greater than number, +1
231        nums.iter().filter(|&&n| n > number).count() + 1
232    } else {
233        // Ascending: count how many are less than number, +1
234        nums.iter().filter(|&&n| n < number).count() + 1
235    };
236    // If the number is not in the list, return #N/A
237    if !nums.iter().any(|&n| (n - number).abs() < f64::EPSILON) {
238        return Ok(CellValue::Error("#N/A".to_string()));
239    }
240    Ok(CellValue::Number(rank as f64))
241}
242
243#[cfg(test)]
244mod tests {
245    use crate::cell::CellValue;
246    use crate::formula::eval::{evaluate, CellSnapshot};
247    use crate::formula::parser::parse_formula;
248
249    fn eval_with_data(formula: &str, data: &[(&str, u32, u32, CellValue)]) -> CellValue {
250        let mut snap = CellSnapshot::new("Sheet1".to_string());
251        for (sheet, col, row, val) in data {
252            snap.set_cell(sheet, *col, *row, val.clone());
253        }
254        let expr = parse_formula(formula).unwrap();
255        evaluate(&expr, &snap).unwrap()
256    }
257
258    fn eval(formula: &str) -> CellValue {
259        eval_with_data(formula, &[])
260    }
261
262    // AVERAGEIF tests
263
264    #[test]
265    fn averageif_greater_than() {
266        let data = vec![
267            ("Sheet1", 1, 1, CellValue::Number(2.0)),
268            ("Sheet1", 1, 2, CellValue::Number(4.0)),
269            ("Sheet1", 1, 3, CellValue::Number(6.0)),
270        ];
271        let result = eval_with_data("AVERAGEIF(A1:A3,\">3\")", &data);
272        assert_eq!(result, CellValue::Number(5.0));
273    }
274
275    #[test]
276    fn averageif_with_avg_range() {
277        let data = vec![
278            ("Sheet1", 1, 1, CellValue::String("A".to_string())),
279            ("Sheet1", 2, 1, CellValue::Number(10.0)),
280            ("Sheet1", 1, 2, CellValue::String("B".to_string())),
281            ("Sheet1", 2, 2, CellValue::Number(20.0)),
282            ("Sheet1", 1, 3, CellValue::String("A".to_string())),
283            ("Sheet1", 2, 3, CellValue::Number(30.0)),
284        ];
285        let result = eval_with_data("AVERAGEIF(A1:A3,\"A\",B1:B3)", &data);
286        assert_eq!(result, CellValue::Number(20.0));
287    }
288
289    #[test]
290    fn averageif_no_match() {
291        let data = vec![
292            ("Sheet1", 1, 1, CellValue::Number(1.0)),
293            ("Sheet1", 1, 2, CellValue::Number(2.0)),
294        ];
295        let result = eval_with_data("AVERAGEIF(A1:A2,\">100\")", &data);
296        assert_eq!(result, CellValue::Error("#DIV/0!".to_string()));
297    }
298
299    // AVERAGEIFS test
300
301    #[test]
302    fn averageifs_multi_criteria() {
303        let data = vec![
304            ("Sheet1", 1, 1, CellValue::String("A".to_string())),
305            ("Sheet1", 2, 1, CellValue::Number(1.0)),
306            ("Sheet1", 3, 1, CellValue::Number(100.0)),
307            ("Sheet1", 1, 2, CellValue::String("A".to_string())),
308            ("Sheet1", 2, 2, CellValue::Number(5.0)),
309            ("Sheet1", 3, 2, CellValue::Number(200.0)),
310            ("Sheet1", 1, 3, CellValue::String("B".to_string())),
311            ("Sheet1", 2, 3, CellValue::Number(3.0)),
312            ("Sheet1", 3, 3, CellValue::Number(300.0)),
313        ];
314        let result = eval_with_data("AVERAGEIFS(C1:C3,A1:A3,\"A\",B1:B3,\">2\")", &data);
315        assert_eq!(result, CellValue::Number(200.0));
316    }
317
318    // COUNTBLANK tests
319
320    #[test]
321    fn countblank_basic() {
322        let data = vec![
323            ("Sheet1", 1, 1, CellValue::Number(1.0)),
324            ("Sheet1", 1, 2, CellValue::Empty),
325            ("Sheet1", 1, 3, CellValue::String("x".to_string())),
326            ("Sheet1", 1, 4, CellValue::Empty),
327        ];
328        let result = eval_with_data("COUNTBLANK(A1:A4)", &data);
329        assert_eq!(result, CellValue::Number(2.0));
330    }
331
332    // COUNTIF tests
333
334    #[test]
335    fn countif_greater_than() {
336        let data = vec![
337            ("Sheet1", 1, 1, CellValue::Number(1.0)),
338            ("Sheet1", 1, 2, CellValue::Number(5.0)),
339            ("Sheet1", 1, 3, CellValue::Number(10.0)),
340        ];
341        let result = eval_with_data("COUNTIF(A1:A3,\">3\")", &data);
342        assert_eq!(result, CellValue::Number(2.0));
343    }
344
345    #[test]
346    fn countif_text_match() {
347        let data = vec![
348            ("Sheet1", 1, 1, CellValue::String("Apple".to_string())),
349            ("Sheet1", 1, 2, CellValue::String("Banana".to_string())),
350            ("Sheet1", 1, 3, CellValue::String("apple".to_string())),
351        ];
352        let result = eval_with_data("COUNTIF(A1:A3,\"Apple\")", &data);
353        assert_eq!(result, CellValue::Number(2.0));
354    }
355
356    // COUNTIFS test
357
358    #[test]
359    fn countifs_multi_criteria() {
360        let data = vec![
361            ("Sheet1", 1, 1, CellValue::String("A".to_string())),
362            ("Sheet1", 2, 1, CellValue::Number(10.0)),
363            ("Sheet1", 1, 2, CellValue::String("A".to_string())),
364            ("Sheet1", 2, 2, CellValue::Number(20.0)),
365            ("Sheet1", 1, 3, CellValue::String("B".to_string())),
366            ("Sheet1", 2, 3, CellValue::Number(30.0)),
367        ];
368        let result = eval_with_data("COUNTIFS(A1:A3,\"A\",B1:B3,\">15\")", &data);
369        assert_eq!(result, CellValue::Number(1.0));
370    }
371
372    // MEDIAN tests
373
374    #[test]
375    fn median_odd_count() {
376        assert_eq!(eval("MEDIAN(1,3,2)"), CellValue::Number(2.0));
377    }
378
379    #[test]
380    fn median_even_count() {
381        assert_eq!(eval("MEDIAN(1,2,3,4)"), CellValue::Number(2.5));
382    }
383
384    #[test]
385    fn median_single() {
386        assert_eq!(eval("MEDIAN(42)"), CellValue::Number(42.0));
387    }
388
389    // MODE tests
390
391    #[test]
392    fn mode_basic() {
393        assert_eq!(eval("MODE(1,2,2,3,3,3)"), CellValue::Number(3.0));
394    }
395
396    #[test]
397    fn mode_no_repeat() {
398        assert_eq!(eval("MODE(1,2,3)"), CellValue::Error("#N/A".to_string()));
399    }
400
401    // LARGE tests
402
403    #[test]
404    fn large_basic() {
405        let data = vec![
406            ("Sheet1", 1, 1, CellValue::Number(5.0)),
407            ("Sheet1", 1, 2, CellValue::Number(3.0)),
408            ("Sheet1", 1, 3, CellValue::Number(8.0)),
409            ("Sheet1", 1, 4, CellValue::Number(1.0)),
410        ];
411        let result = eval_with_data("LARGE(A1:A4,2)", &data);
412        assert_eq!(result, CellValue::Number(5.0));
413    }
414
415    #[test]
416    fn large_k_out_of_range() {
417        let data = vec![
418            ("Sheet1", 1, 1, CellValue::Number(1.0)),
419            ("Sheet1", 1, 2, CellValue::Number(2.0)),
420        ];
421        let result = eval_with_data("LARGE(A1:A2,5)", &data);
422        assert_eq!(result, CellValue::Error("#NUM!".to_string()));
423    }
424
425    // SMALL tests
426
427    #[test]
428    fn small_basic() {
429        let data = vec![
430            ("Sheet1", 1, 1, CellValue::Number(5.0)),
431            ("Sheet1", 1, 2, CellValue::Number(3.0)),
432            ("Sheet1", 1, 3, CellValue::Number(8.0)),
433            ("Sheet1", 1, 4, CellValue::Number(1.0)),
434        ];
435        let result = eval_with_data("SMALL(A1:A4,2)", &data);
436        assert_eq!(result, CellValue::Number(3.0));
437    }
438
439    // RANK tests
440
441    #[test]
442    fn rank_descending() {
443        let data = vec![
444            ("Sheet1", 1, 1, CellValue::Number(5.0)),
445            ("Sheet1", 1, 2, CellValue::Number(3.0)),
446            ("Sheet1", 1, 3, CellValue::Number(8.0)),
447            ("Sheet1", 1, 4, CellValue::Number(1.0)),
448        ];
449        // 5 is 2nd largest in [5,3,8,1]
450        let result = eval_with_data("RANK(5,A1:A4)", &data);
451        assert_eq!(result, CellValue::Number(2.0));
452    }
453
454    #[test]
455    fn rank_ascending() {
456        let data = vec![
457            ("Sheet1", 1, 1, CellValue::Number(5.0)),
458            ("Sheet1", 1, 2, CellValue::Number(3.0)),
459            ("Sheet1", 1, 3, CellValue::Number(8.0)),
460            ("Sheet1", 1, 4, CellValue::Number(1.0)),
461        ];
462        // 5 is 3rd smallest in [5,3,8,1]
463        let result = eval_with_data("RANK(5,A1:A4,1)", &data);
464        assert_eq!(result, CellValue::Number(3.0));
465    }
466
467    #[test]
468    fn rank_not_found() {
469        let data = vec![
470            ("Sheet1", 1, 1, CellValue::Number(1.0)),
471            ("Sheet1", 1, 2, CellValue::Number(2.0)),
472        ];
473        let result = eval_with_data("RANK(99,A1:A2)", &data);
474        assert_eq!(result, CellValue::Error("#N/A".to_string()));
475    }
476}