Skip to main content

sheetkit_core/formula/functions/
lookup.rs

1//! Lookup and reference formula functions: VLOOKUP, HLOOKUP, INDEX, MATCH,
2//! LOOKUP, ROW, COLUMN, ROWS, COLUMNS, CHOOSE, ADDRESS.
3
4use crate::cell::CellValue;
5use crate::error::{Error, Result};
6use crate::formula::ast::{CellReference, Expr};
7use crate::formula::eval::{coerce_to_number, coerce_to_string, compare_values, Evaluator};
8use crate::formula::functions::check_arg_count;
9use crate::utils::cell_ref::{column_name_to_number, column_number_to_name};
10
11/// Extract (start, end) CellReference pair from a Range expression.
12fn extract_range(expr: &Expr) -> Result<(&CellReference, &CellReference)> {
13    match expr {
14        Expr::Range { start, end } => Ok((start, end)),
15        _ => Err(Error::FormulaError(
16            "expected a range reference".to_string(),
17        )),
18    }
19}
20
21/// Read a range into a flat row-major Vec and return (values, num_cols, num_rows).
22fn read_range(expr: &Expr, ctx: &mut Evaluator) -> Result<(Vec<CellValue>, usize, usize)> {
23    let (start, end) = extract_range(expr)?;
24    let start_col = column_name_to_number(&start.col)?;
25    let end_col = column_name_to_number(&end.col)?;
26    let min_col = start_col.min(end_col);
27    let max_col = start_col.max(end_col);
28    let min_row = start.row.min(end.row);
29    let max_row = start.row.max(end.row);
30    let num_cols = (max_col - min_col + 1) as usize;
31    let num_rows = (max_row - min_row + 1) as usize;
32    let values = ctx.expand_range(start, end)?;
33    Ok((values, num_cols, num_rows))
34}
35
36/// VLOOKUP(lookup_value, table_array, col_index_num, [range_lookup])
37pub fn fn_vlookup(args: &[Expr], ctx: &mut Evaluator) -> Result<CellValue> {
38    check_arg_count("VLOOKUP", args, 3, 4)?;
39    let lookup_val = ctx.eval_expr(&args[0])?;
40    let (values, num_cols, num_rows) = read_range(&args[1], ctx)?;
41    let col_index = coerce_to_number(&ctx.eval_expr(&args[2])?)? as usize;
42    let range_lookup = if args.len() > 3 {
43        match ctx.eval_expr(&args[3])? {
44            CellValue::Bool(b) => b,
45            v => coerce_to_number(&v)? != 0.0,
46        }
47    } else {
48        true
49    };
50
51    if col_index < 1 || col_index > num_cols {
52        return Ok(CellValue::Error("#REF!".to_string()));
53    }
54
55    if range_lookup {
56        // Approximate match: find largest value <= lookup_val in first column
57        let mut best_row: Option<usize> = None;
58        for r in 0..num_rows {
59            let cell = &values[r * num_cols];
60            if compare_values(cell, &lookup_val) != std::cmp::Ordering::Greater {
61                best_row = Some(r);
62            } else {
63                break;
64            }
65        }
66        match best_row {
67            Some(r) => Ok(values[r * num_cols + col_index - 1].clone()),
68            None => Ok(CellValue::Error("#N/A".to_string())),
69        }
70    } else {
71        // Exact match
72        for r in 0..num_rows {
73            let cell = &values[r * num_cols];
74            if compare_values(cell, &lookup_val) == std::cmp::Ordering::Equal {
75                return Ok(values[r * num_cols + col_index - 1].clone());
76            }
77        }
78        Ok(CellValue::Error("#N/A".to_string()))
79    }
80}
81
82/// HLOOKUP(lookup_value, table_array, row_index_num, [range_lookup])
83pub fn fn_hlookup(args: &[Expr], ctx: &mut Evaluator) -> Result<CellValue> {
84    check_arg_count("HLOOKUP", args, 3, 4)?;
85    let lookup_val = ctx.eval_expr(&args[0])?;
86    let (values, num_cols, num_rows) = read_range(&args[1], ctx)?;
87    let row_index = coerce_to_number(&ctx.eval_expr(&args[2])?)? as usize;
88    let range_lookup = if args.len() > 3 {
89        match ctx.eval_expr(&args[3])? {
90            CellValue::Bool(b) => b,
91            v => coerce_to_number(&v)? != 0.0,
92        }
93    } else {
94        true
95    };
96
97    if row_index < 1 || row_index > num_rows {
98        return Ok(CellValue::Error("#REF!".to_string()));
99    }
100
101    if range_lookup {
102        let mut best_col: Option<usize> = None;
103        for (c, cell) in values.iter().enumerate().take(num_cols) {
104            if compare_values(cell, &lookup_val) != std::cmp::Ordering::Greater {
105                best_col = Some(c);
106            } else {
107                break;
108            }
109        }
110        match best_col {
111            Some(c) => Ok(values[(row_index - 1) * num_cols + c].clone()),
112            None => Ok(CellValue::Error("#N/A".to_string())),
113        }
114    } else {
115        for (c, cell) in values.iter().enumerate().take(num_cols) {
116            if compare_values(cell, &lookup_val) == std::cmp::Ordering::Equal {
117                return Ok(values[(row_index - 1) * num_cols + c].clone());
118            }
119        }
120        Ok(CellValue::Error("#N/A".to_string()))
121    }
122}
123
124/// INDEX(array, row_num, [col_num])
125pub fn fn_index(args: &[Expr], ctx: &mut Evaluator) -> Result<CellValue> {
126    check_arg_count("INDEX", args, 2, 3)?;
127    let (values, num_cols, num_rows) = read_range(&args[0], ctx)?;
128    let row_num = coerce_to_number(&ctx.eval_expr(&args[1])?)? as usize;
129    let col_num = if args.len() > 2 {
130        coerce_to_number(&ctx.eval_expr(&args[2])?)? as usize
131    } else {
132        1
133    };
134
135    if row_num < 1 || row_num > num_rows || col_num < 1 || col_num > num_cols {
136        return Ok(CellValue::Error("#REF!".to_string()));
137    }
138    Ok(values[(row_num - 1) * num_cols + (col_num - 1)].clone())
139}
140
141/// MATCH(lookup_value, lookup_array, [match_type])
142pub fn fn_match(args: &[Expr], ctx: &mut Evaluator) -> Result<CellValue> {
143    check_arg_count("MATCH", args, 2, 3)?;
144    let lookup_val = ctx.eval_expr(&args[0])?;
145    let (values, num_cols, num_rows) = read_range(&args[1], ctx)?;
146    let match_type = if args.len() > 2 {
147        coerce_to_number(&ctx.eval_expr(&args[2])?)? as i32
148    } else {
149        1
150    };
151
152    // MATCH works on a 1-D array (single row or column)
153    let items: &[CellValue] = if num_rows == 1 || num_cols == 1 {
154        &values
155    } else {
156        return Ok(CellValue::Error("#N/A".to_string()));
157    };
158
159    match match_type {
160        0 => {
161            // Exact match
162            for (i, v) in items.iter().enumerate() {
163                if compare_values(v, &lookup_val) == std::cmp::Ordering::Equal {
164                    return Ok(CellValue::Number((i + 1) as f64));
165                }
166            }
167            Ok(CellValue::Error("#N/A".to_string()))
168        }
169        1 => {
170            // Largest value <= lookup_val (data sorted ascending)
171            let mut best: Option<usize> = None;
172            for (i, v) in items.iter().enumerate() {
173                if compare_values(v, &lookup_val) != std::cmp::Ordering::Greater {
174                    best = Some(i);
175                }
176            }
177            match best {
178                Some(i) => Ok(CellValue::Number((i + 1) as f64)),
179                None => Ok(CellValue::Error("#N/A".to_string())),
180            }
181        }
182        -1 => {
183            // Smallest value >= lookup_val (data sorted descending)
184            let mut best: Option<usize> = None;
185            for (i, v) in items.iter().enumerate() {
186                if compare_values(v, &lookup_val) != std::cmp::Ordering::Less {
187                    best = Some(i);
188                }
189            }
190            match best {
191                Some(i) => Ok(CellValue::Number((i + 1) as f64)),
192                None => Ok(CellValue::Error("#N/A".to_string())),
193            }
194        }
195        _ => Ok(CellValue::Error("#N/A".to_string())),
196    }
197}
198
199/// LOOKUP(lookup_value, lookup_vector, [result_vector]) - vector form.
200pub fn fn_lookup(args: &[Expr], ctx: &mut Evaluator) -> Result<CellValue> {
201    check_arg_count("LOOKUP", args, 2, 3)?;
202    let lookup_val = ctx.eval_expr(&args[0])?;
203    let (lookup_values, _, _) = read_range(&args[1], ctx)?;
204
205    // Find largest value <= lookup_val (assumes sorted ascending)
206    let mut best: Option<usize> = None;
207    for (i, v) in lookup_values.iter().enumerate() {
208        if compare_values(v, &lookup_val) != std::cmp::Ordering::Greater {
209            best = Some(i);
210        }
211    }
212    let idx = match best {
213        Some(i) => i,
214        None => return Ok(CellValue::Error("#N/A".to_string())),
215    };
216
217    if args.len() > 2 {
218        let (result_values, _, _) = read_range(&args[2], ctx)?;
219        if idx < result_values.len() {
220            Ok(result_values[idx].clone())
221        } else {
222            Ok(CellValue::Error("#N/A".to_string()))
223        }
224    } else {
225        Ok(lookup_values[idx].clone())
226    }
227}
228
229/// ROW([reference]) - returns the row number of a reference.
230pub fn fn_row(args: &[Expr], _ctx: &mut Evaluator) -> Result<CellValue> {
231    check_arg_count("ROW", args, 0, 1)?;
232    if args.is_empty() {
233        return Ok(CellValue::Number(1.0));
234    }
235    match &args[0] {
236        Expr::CellRef(cell_ref) => Ok(CellValue::Number(cell_ref.row as f64)),
237        Expr::Range { start, .. } => Ok(CellValue::Number(start.row as f64)),
238        _ => Ok(CellValue::Error("#VALUE!".to_string())),
239    }
240}
241
242/// COLUMN([reference]) - returns the column number of a reference.
243pub fn fn_column(args: &[Expr], _ctx: &mut Evaluator) -> Result<CellValue> {
244    check_arg_count("COLUMN", args, 0, 1)?;
245    if args.is_empty() {
246        return Ok(CellValue::Number(1.0));
247    }
248    match &args[0] {
249        Expr::CellRef(cell_ref) => {
250            let col = column_name_to_number(&cell_ref.col)?;
251            Ok(CellValue::Number(col as f64))
252        }
253        Expr::Range { start, .. } => {
254            let col = column_name_to_number(&start.col)?;
255            Ok(CellValue::Number(col as f64))
256        }
257        _ => Ok(CellValue::Error("#VALUE!".to_string())),
258    }
259}
260
261/// ROWS(array) - returns the number of rows in a reference.
262pub fn fn_rows(args: &[Expr], _ctx: &mut Evaluator) -> Result<CellValue> {
263    check_arg_count("ROWS", args, 1, 1)?;
264    match &args[0] {
265        Expr::Range { start, end } => {
266            let rows = (end.row.max(start.row) - end.row.min(start.row) + 1) as f64;
267            Ok(CellValue::Number(rows))
268        }
269        Expr::CellRef(_) => Ok(CellValue::Number(1.0)),
270        _ => Ok(CellValue::Error("#VALUE!".to_string())),
271    }
272}
273
274/// COLUMNS(array) - returns the number of columns in a reference.
275pub fn fn_columns(args: &[Expr], _ctx: &mut Evaluator) -> Result<CellValue> {
276    check_arg_count("COLUMNS", args, 1, 1)?;
277    match &args[0] {
278        Expr::Range { start, end } => {
279            let start_col = column_name_to_number(&start.col)?;
280            let end_col = column_name_to_number(&end.col)?;
281            let cols = (start_col.max(end_col) - start_col.min(end_col) + 1) as f64;
282            Ok(CellValue::Number(cols))
283        }
284        Expr::CellRef(_) => Ok(CellValue::Number(1.0)),
285        _ => Ok(CellValue::Error("#VALUE!".to_string())),
286    }
287}
288
289/// CHOOSE(index_num, value1, [value2], ...) - returns value at given index.
290pub fn fn_choose(args: &[Expr], ctx: &mut Evaluator) -> Result<CellValue> {
291    check_arg_count("CHOOSE", args, 2, 255)?;
292    let index = coerce_to_number(&ctx.eval_expr(&args[0])?)? as usize;
293    if index < 1 || index >= args.len() {
294        return Ok(CellValue::Error("#VALUE!".to_string()));
295    }
296    ctx.eval_expr(&args[index])
297}
298
299/// ADDRESS(row_num, column_num, [abs_num], [a1], [sheet_text])
300pub fn fn_address(args: &[Expr], ctx: &mut Evaluator) -> Result<CellValue> {
301    check_arg_count("ADDRESS", args, 2, 5)?;
302    let row = coerce_to_number(&ctx.eval_expr(&args[0])?)? as u32;
303    let col = coerce_to_number(&ctx.eval_expr(&args[1])?)? as u32;
304    let abs_num = if args.len() > 2 {
305        coerce_to_number(&ctx.eval_expr(&args[2])?)? as i32
306    } else {
307        1
308    };
309    // a1 parameter (default true = A1 style, false = R1C1 style)
310    let a1 = if args.len() > 3 {
311        match ctx.eval_expr(&args[3])? {
312            CellValue::Bool(b) => b,
313            v => coerce_to_number(&v)? != 0.0,
314        }
315    } else {
316        true
317    };
318    let sheet_text = if args.len() > 4 {
319        Some(coerce_to_string(&ctx.eval_expr(&args[4])?))
320    } else {
321        None
322    };
323
324    if col < 1 || row < 1 {
325        return Ok(CellValue::Error("#VALUE!".to_string()));
326    }
327
328    let col_name = column_number_to_name(col)?;
329
330    let address = if a1 {
331        match abs_num {
332            1 => format!("${col_name}${row}"), // absolute row and column
333            2 => format!("{col_name}${row}"),  // absolute row
334            3 => format!("${col_name}{row}"),  // absolute column
335            4 => format!("{col_name}{row}"),   // relative
336            _ => return Ok(CellValue::Error("#VALUE!".to_string())),
337        }
338    } else {
339        // R1C1 style
340        match abs_num {
341            1 => format!("R{row}C{col}"),
342            2 => format!("R{row}C[{col}]"),
343            3 => format!("R[{row}]C{col}"),
344            4 => format!("R[{row}]C[{col}]"),
345            _ => return Ok(CellValue::Error("#VALUE!".to_string())),
346        }
347    };
348
349    if let Some(sheet) = sheet_text {
350        Ok(CellValue::String(format!("{sheet}!{address}")))
351    } else {
352        Ok(CellValue::String(address))
353    }
354}
355
356#[cfg(test)]
357mod tests {
358    use super::*;
359    use crate::formula::eval::{evaluate, CellSnapshot};
360    use crate::formula::parser::parse_formula;
361
362    fn eval(formula: &str) -> CellValue {
363        let snap = CellSnapshot::new("Sheet1".to_string());
364        let expr = parse_formula(formula).unwrap();
365        evaluate(&expr, &snap).unwrap()
366    }
367
368    fn eval_with_data(formula: &str, snap: &CellSnapshot) -> CellValue {
369        let expr = parse_formula(formula).unwrap();
370        evaluate(&expr, snap).unwrap()
371    }
372
373    #[test]
374    fn test_vlookup_exact() {
375        let mut snap = CellSnapshot::new("Sheet1".to_string());
376        // Column A: keys, Column B: values
377        snap.set_cell("Sheet1", 1, 1, CellValue::Number(1.0));
378        snap.set_cell("Sheet1", 2, 1, CellValue::String("one".to_string()));
379        snap.set_cell("Sheet1", 1, 2, CellValue::Number(2.0));
380        snap.set_cell("Sheet1", 2, 2, CellValue::String("two".to_string()));
381        snap.set_cell("Sheet1", 1, 3, CellValue::Number(3.0));
382        snap.set_cell("Sheet1", 2, 3, CellValue::String("three".to_string()));
383
384        assert_eq!(
385            eval_with_data("VLOOKUP(2,A1:B3,2,FALSE)", &snap),
386            CellValue::String("two".to_string())
387        );
388    }
389
390    #[test]
391    fn test_vlookup_not_found() {
392        let mut snap = CellSnapshot::new("Sheet1".to_string());
393        snap.set_cell("Sheet1", 1, 1, CellValue::Number(1.0));
394        snap.set_cell("Sheet1", 2, 1, CellValue::String("one".to_string()));
395
396        assert_eq!(
397            eval_with_data("VLOOKUP(99,A1:B1,2,FALSE)", &snap),
398            CellValue::Error("#N/A".to_string())
399        );
400    }
401
402    #[test]
403    fn test_vlookup_approximate() {
404        let mut snap = CellSnapshot::new("Sheet1".to_string());
405        snap.set_cell("Sheet1", 1, 1, CellValue::Number(10.0));
406        snap.set_cell("Sheet1", 2, 1, CellValue::String("ten".to_string()));
407        snap.set_cell("Sheet1", 1, 2, CellValue::Number(20.0));
408        snap.set_cell("Sheet1", 2, 2, CellValue::String("twenty".to_string()));
409        snap.set_cell("Sheet1", 1, 3, CellValue::Number(30.0));
410        snap.set_cell("Sheet1", 2, 3, CellValue::String("thirty".to_string()));
411
412        assert_eq!(
413            eval_with_data("VLOOKUP(25,A1:B3,2,TRUE)", &snap),
414            CellValue::String("twenty".to_string())
415        );
416    }
417
418    #[test]
419    fn test_hlookup_exact() {
420        let mut snap = CellSnapshot::new("Sheet1".to_string());
421        // Row 1: keys, Row 2: values
422        snap.set_cell("Sheet1", 1, 1, CellValue::Number(1.0));
423        snap.set_cell("Sheet1", 2, 1, CellValue::Number(2.0));
424        snap.set_cell("Sheet1", 3, 1, CellValue::Number(3.0));
425        snap.set_cell("Sheet1", 1, 2, CellValue::String("one".to_string()));
426        snap.set_cell("Sheet1", 2, 2, CellValue::String("two".to_string()));
427        snap.set_cell("Sheet1", 3, 2, CellValue::String("three".to_string()));
428
429        assert_eq!(
430            eval_with_data("HLOOKUP(2,A1:C2,2,FALSE)", &snap),
431            CellValue::String("two".to_string())
432        );
433    }
434
435    #[test]
436    fn test_index_basic() {
437        let mut snap = CellSnapshot::new("Sheet1".to_string());
438        snap.set_cell("Sheet1", 1, 1, CellValue::Number(1.0));
439        snap.set_cell("Sheet1", 2, 1, CellValue::Number(2.0));
440        snap.set_cell("Sheet1", 1, 2, CellValue::Number(3.0));
441        snap.set_cell("Sheet1", 2, 2, CellValue::Number(4.0));
442
443        assert_eq!(
444            eval_with_data("INDEX(A1:B2,2,2)", &snap),
445            CellValue::Number(4.0)
446        );
447    }
448
449    #[test]
450    fn test_match_exact() {
451        let mut snap = CellSnapshot::new("Sheet1".to_string());
452        snap.set_cell("Sheet1", 1, 1, CellValue::String("apple".to_string()));
453        snap.set_cell("Sheet1", 1, 2, CellValue::String("banana".to_string()));
454        snap.set_cell("Sheet1", 1, 3, CellValue::String("cherry".to_string()));
455
456        assert_eq!(
457            eval_with_data(r#"MATCH("banana",A1:A3,0)"#, &snap),
458            CellValue::Number(2.0)
459        );
460    }
461
462    #[test]
463    fn test_match_not_found() {
464        let mut snap = CellSnapshot::new("Sheet1".to_string());
465        snap.set_cell("Sheet1", 1, 1, CellValue::Number(1.0));
466        snap.set_cell("Sheet1", 1, 2, CellValue::Number(2.0));
467
468        assert_eq!(
469            eval_with_data("MATCH(99,A1:A2,0)", &snap),
470            CellValue::Error("#N/A".to_string())
471        );
472    }
473
474    #[test]
475    fn test_lookup_vector() {
476        let mut snap = CellSnapshot::new("Sheet1".to_string());
477        snap.set_cell("Sheet1", 1, 1, CellValue::Number(1.0));
478        snap.set_cell("Sheet1", 1, 2, CellValue::Number(2.0));
479        snap.set_cell("Sheet1", 1, 3, CellValue::Number(3.0));
480        snap.set_cell("Sheet1", 2, 1, CellValue::String("one".to_string()));
481        snap.set_cell("Sheet1", 2, 2, CellValue::String("two".to_string()));
482        snap.set_cell("Sheet1", 2, 3, CellValue::String("three".to_string()));
483
484        assert_eq!(
485            eval_with_data("LOOKUP(2,A1:A3,B1:B3)", &snap),
486            CellValue::String("two".to_string())
487        );
488    }
489
490    #[test]
491    fn test_row() {
492        assert_eq!(eval("ROW(B5)"), CellValue::Number(5.0));
493    }
494
495    #[test]
496    fn test_column() {
497        assert_eq!(eval("COLUMN(C1)"), CellValue::Number(3.0));
498    }
499
500    #[test]
501    fn test_rows() {
502        assert_eq!(eval("ROWS(A1:A10)"), CellValue::Number(10.0));
503    }
504
505    #[test]
506    fn test_columns() {
507        assert_eq!(eval("COLUMNS(A1:D1)"), CellValue::Number(4.0));
508    }
509
510    #[test]
511    fn test_choose() {
512        assert_eq!(
513            eval(r#"CHOOSE(2,"a","b","c")"#),
514            CellValue::String("b".to_string())
515        );
516    }
517
518    #[test]
519    fn test_choose_out_of_range() {
520        assert_eq!(
521            eval(r#"CHOOSE(5,"a","b","c")"#),
522            CellValue::Error("#VALUE!".to_string())
523        );
524    }
525
526    #[test]
527    fn test_address_absolute() {
528        assert_eq!(eval("ADDRESS(1,1)"), CellValue::String("$A$1".to_string()));
529    }
530
531    #[test]
532    fn test_address_relative() {
533        assert_eq!(eval("ADDRESS(1,1,4)"), CellValue::String("A1".to_string()));
534    }
535
536    #[test]
537    fn test_address_with_sheet() {
538        assert_eq!(
539            eval(r#"ADDRESS(1,1,1,TRUE,"Sheet2")"#),
540            CellValue::String("Sheet2!$A$1".to_string())
541        );
542    }
543
544    #[test]
545    fn test_row_no_args() {
546        assert_eq!(eval("ROW()"), CellValue::Number(1.0));
547    }
548
549    #[test]
550    fn test_column_no_args() {
551        assert_eq!(eval("COLUMN()"), CellValue::Number(1.0));
552    }
553}