Skip to main content

ganit_core/parser/
mod.rs

1pub mod ast;
2pub mod tokens;
3
4pub use ast::Expr;
5use ast::{BinaryOp, Span, UnaryOp};
6use crate::types::ParseError;
7use nom::{IResult, character::complete::multispace0};
8use tokens::{bool_literal, identifier, number_literal, offset, string_literal};
9
10struct Parser<'a> {
11    full: &'a str,
12}
13
14impl<'a> Parser<'a> {
15    fn new(full: &'a str) -> Self {
16        Self { full }
17    }
18
19    fn span(&self, before: &str, after: &str) -> Span {
20        let start = offset(self.full, before);
21        let end = offset(self.full, after);
22        Span::new(start, end - start)
23    }
24
25    // ── primary ────────────────────────────────────────────────────────────
26
27    fn parse_primary(&self, i: &'a str) -> IResult<&'a str, Expr> {
28        let i = multispace0(i)?.0;
29
30        // Number literal (must come before identifier to catch e.g. "1e3")
31        if let Ok((rest, n)) = number_literal(i) {
32            return Ok((rest, Expr::Number(n, self.span(i, rest))));
33        }
34
35        // String literal
36        if let Ok((rest, text)) = string_literal(i) {
37            return Ok((rest, Expr::Text(text, self.span(i, rest))));
38        }
39
40        // Array literal: {expr, expr, ...}
41        if let Some(inner) = i.strip_prefix('{') {
42            let (rest, elems) = self.parse_array_elements(inner)?;
43            let rest = multispace0(rest)?.0;
44            if let Some(after) = rest.strip_prefix('}') {
45                return Ok((after, Expr::Array(elems, self.span(i, after))));
46            }
47            return Err(nom::Err::Error(nom::error::Error::new(
48                rest,
49                nom::error::ErrorKind::Char,
50            )));
51        }
52
53        // Parenthesised expression
54        if let Some(inner) = i.strip_prefix('(') {
55            let (rest, expr) = self.parse_comparison(inner)?;
56            let rest = multispace0(rest)?.0;
57            if let Some(after) = rest.strip_prefix(')') {
58                return Ok((after, expr));
59            }
60            return Err(nom::Err::Error(nom::error::Error::new(
61                rest,
62                nom::error::ErrorKind::Char,
63            )));
64        }
65
66        // Boolean (before identifier — uses word-boundary check in bool_literal)
67        if let Ok((rest, b)) = bool_literal(i) {
68            return Ok((rest, Expr::Bool(b, self.span(i, rest))));
69        }
70
71        // Identifier: variable or function call
72        if let Ok((rest, name)) = identifier(i) {
73            let rest_ws = multispace0(rest)?.0;
74            if let Some(args_input) = rest_ws.strip_prefix('(') {
75                // Function call
76                let (rest2, args) = self.parse_arg_list(args_input)?;
77                let rest2 = multispace0(rest2)?.0;
78                if let Some(after_close) = rest2.strip_prefix(')') {
79                    let func_expr = Expr::FunctionCall {
80                        name: name.to_uppercase(),
81                        args,
82                        span: self.span(i, after_close),
83                    };
84                    // Detect immediately-invoked call: FUNC(lambda_args)(call_args)
85                    let after_ws = multispace0(after_close)?.0;
86                    if let Some(call_input) = after_ws.strip_prefix('(') {
87                        let (rest3, call_args) = self.parse_arg_list(call_input)?;
88                        let rest3 = multispace0(rest3)?.0;
89                        if let Some(after) = rest3.strip_prefix(')') {
90                            return Ok((after, Expr::Apply {
91                                func: Box::new(func_expr),
92                                call_args,
93                                span: self.span(i, after),
94                            }));
95                        }
96                        return Err(nom::Err::Error(nom::error::Error::new(
97                            rest3,
98                            nom::error::ErrorKind::Char,
99                        )));
100                    }
101                    return Ok((after_close, func_expr));
102                }
103                return Err(nom::Err::Error(nom::error::Error::new(
104                    rest2,
105                    nom::error::ErrorKind::Char,
106                )));
107            }
108            // Range reference: A1:D4
109            if is_cell_ref(name) {
110                if let Some(after_colon) = rest_ws.strip_prefix(':') {
111                    if let Ok((rest2, name2)) = identifier(after_colon) {
112                        if is_cell_ref(name2) {
113                            let range_name = format!("{}:{}", name, name2);
114                            return Ok((rest2, Expr::Variable(range_name, self.span(i, rest2))));
115                        }
116                    }
117                }
118            }
119            return Ok((rest, Expr::Variable(name.to_string(), self.span(i, rest))));
120        }
121
122        Err(nom::Err::Error(nom::error::Error::new(i, nom::error::ErrorKind::Alt)))
123    }
124
125    fn parse_arg_list(&self, i: &'a str) -> IResult<&'a str, Vec<Expr>> {
126        let mut args = Vec::new();
127        let mut rest = multispace0(i)?.0;
128
129        if rest.starts_with(')') {
130            return Ok((rest, args));
131        }
132
133        // Parse first argument (may be empty if it starts with comma or close paren)
134        let ws = multispace0(rest)?.0;
135        if ws.starts_with(',') || ws.starts_with(')') {
136            // Empty first argument
137            args.push(Expr::Variable(String::new(), Span::new(0, 0)));
138        } else {
139            let (r, first) = self.parse_comparison(rest)?;
140            args.push(first);
141            rest = r;
142        }
143
144        loop {
145            rest = multispace0(rest)?.0;
146            if let Some(after_comma) = rest.strip_prefix(',') {
147                let after_ws = multispace0(after_comma)?.0;
148                if after_ws.starts_with(',') || after_ws.starts_with(')') {
149                    // Empty argument
150                    args.push(Expr::Variable(String::new(), Span::new(0, 0)));
151                    rest = after_comma;
152                } else {
153                    let (r, arg) = self.parse_comparison(after_comma)?;
154                    args.push(arg);
155                    rest = r;
156                }
157            } else {
158                break;
159            }
160        }
161
162        Ok((rest, args))
163    }
164
165    fn parse_array_elements(&self, i: &'a str) -> IResult<&'a str, Vec<Expr>> {
166        let mut rows: Vec<Vec<Expr>> = Vec::new();
167        let mut current_row: Vec<Expr> = Vec::new();
168        let mut rest = multispace0(i)?.0;
169        if rest.starts_with('}') {
170            return Ok((rest, Vec::new())); // empty array {}
171        }
172        let (r, first) = self.parse_comparison(rest)?;
173        current_row.push(first);
174        rest = r;
175        loop {
176            rest = multispace0(rest)?.0;
177            if let Some(after_comma) = rest.strip_prefix(',') {
178                let (r, elem) = self.parse_comparison(after_comma)?;
179                current_row.push(elem);
180                rest = r;
181            } else if let Some(after_semi) = rest.strip_prefix(';') {
182                rows.push(std::mem::take(&mut current_row));
183                let (r, elem) = self.parse_comparison(after_semi)?;
184                current_row.push(elem);
185                rest = r;
186            } else {
187                break;
188            }
189        }
190        rows.push(current_row);
191        // If only one row (no semicolons), return flat vec
192        if rows.len() == 1 {
193            return Ok((rest, rows.into_iter().next().unwrap()));
194        }
195        // Multiple rows → wrap each row in an Array node
196        let span_start = i;
197        let row_exprs: Vec<Expr> = rows
198            .into_iter()
199            .map(|row_elems| {
200                let s = self.span(span_start, rest);
201                Expr::Array(row_elems, s)
202            })
203            .collect();
204        Ok((rest, row_exprs))
205    }
206
207    // ── postfix % ─────────────────────────────────────────────────────────
208
209    fn parse_postfix(&self, i: &'a str) -> IResult<&'a str, Expr> {
210        let (rest, expr) = self.parse_primary(i)?;
211        let rest_ws = multispace0(rest)?.0;
212        if let Some(after) = rest_ws.strip_prefix('%') {
213            return Ok((after, Expr::UnaryOp {
214                op: UnaryOp::Percent,
215                operand: Box::new(expr),
216                span: self.span(i, after),
217            }));
218        }
219        Ok((rest, expr))
220    }
221
222    // ── unary minus ───────────────────────────────────────────────────────
223
224    fn parse_unary(&self, i: &'a str) -> IResult<&'a str, Expr> {
225        let i_ws = multispace0(i)?.0;
226        if let Some(after_minus) = i_ws.strip_prefix('-') {
227            let (rest, operand) = self.parse_unary(after_minus)?;
228            return Ok((rest, Expr::UnaryOp {
229                op: UnaryOp::Neg,
230                operand: Box::new(operand),
231                span: self.span(i_ws, rest),
232            }));
233        }
234        self.parse_postfix(i)
235    }
236
237    // ── power ^ (right-associative) ───────────────────────────────────────
238
239    fn parse_power(&self, i: &'a str) -> IResult<&'a str, Expr> {
240        let (rest, left) = self.parse_unary(i)?;
241        let rest_ws = multispace0(rest)?.0;
242        if let Some(after_op) = rest_ws.strip_prefix('^') {
243            let (rest2, right) = self.parse_power(after_op)?;
244            return Ok((rest2, Expr::BinaryOp {
245                op: BinaryOp::Pow,
246                left: Box::new(left),
247                right: Box::new(right),
248                span: self.span(i, rest2),
249            }));
250        }
251        Ok((rest, left))
252    }
253
254    // ── multiplicative * / ────────────────────────────────────────────────
255
256    fn parse_multiplicative(&self, i: &'a str) -> IResult<&'a str, Expr> {
257        let (mut rest, mut left) = self.parse_power(i)?;
258        loop {
259            let ws = multispace0(rest)?.0;
260            let op = ws.strip_prefix('*').map(|after| (BinaryOp::Mul, after))
261                .or_else(|| ws.strip_prefix('/').map(|after| (BinaryOp::Div, after)));
262            match op {
263                None => break,
264                Some((op, after)) => {
265                    let (r, right) = self.parse_power(after)?;
266                    left = Expr::BinaryOp {
267                        op,
268                        span: self.span(i, r),
269                        left: Box::new(left),
270                        right: Box::new(right),
271                    };
272                    rest = r;
273                }
274            }
275        }
276        Ok((rest, left))
277    }
278
279    // ── additive + - ──────────────────────────────────────────────────────
280
281    fn parse_additive(&self, i: &'a str) -> IResult<&'a str, Expr> {
282        let (mut rest, mut left) = self.parse_multiplicative(i)?;
283        loop {
284            let ws = multispace0(rest)?.0;
285            let op = ws.strip_prefix('+').map(|after| (BinaryOp::Add, after))
286                .or_else(|| ws.strip_prefix('-').map(|after| (BinaryOp::Sub, after)));
287            match op {
288                None => break,
289                Some((op, after)) => {
290                    let (r, right) = self.parse_multiplicative(after)?;
291                    left = Expr::BinaryOp {
292                        op,
293                        span: self.span(i, r),
294                        left: Box::new(left),
295                        right: Box::new(right),
296                    };
297                    rest = r;
298                }
299            }
300        }
301        Ok((rest, left))
302    }
303
304    // ── concat & ─────────────────────────────────────────────────────────
305
306    fn parse_concat(&self, i: &'a str) -> IResult<&'a str, Expr> {
307        let (mut rest, mut left) = self.parse_additive(i)?;
308        loop {
309            let ws = multispace0(rest)?.0;
310            if let Some(after) = ws.strip_prefix('&') {
311                let (r, right) = self.parse_additive(after)?;
312                left = Expr::BinaryOp {
313                    op: BinaryOp::Concat,
314                    span: self.span(i, r),
315                    left: Box::new(left),
316                    right: Box::new(right),
317                };
318                rest = r;
319            } else {
320                break;
321            }
322        }
323        Ok((rest, left))
324    }
325
326    // ── comparison = <> < > <= >= ─────────────────────────────────────────
327
328    fn parse_comparison(&self, i: &'a str) -> IResult<&'a str, Expr> {
329        let (rest, left) = self.parse_concat(i)?;
330        let ws = multispace0(rest)?.0;
331
332        // Longest match first
333        let op_result: Option<(BinaryOp, &'a str)> = if let Some(after) = ws.strip_prefix("<>") {
334            Some((BinaryOp::Ne, after))
335        } else if let Some(after) = ws.strip_prefix("<=") {
336            Some((BinaryOp::Le, after))
337        } else if let Some(after) = ws.strip_prefix(">=") {
338            Some((BinaryOp::Ge, after))
339        } else if let Some(after) = ws.strip_prefix('<') {
340            Some((BinaryOp::Lt, after))
341        } else if let Some(after) = ws.strip_prefix('>') {
342            Some((BinaryOp::Gt, after))
343        } else if let Some(after) = ws.strip_prefix('=') {
344            Some((BinaryOp::Eq, after))
345        } else {
346            None
347        };
348
349        if let Some((op, after)) = op_result {
350            let (r, right) = self.parse_concat(after)?;
351            return Ok((r, Expr::BinaryOp {
352                op,
353                span: self.span(i, r),
354                left: Box::new(left),
355                right: Box::new(right),
356            }));
357        }
358
359        Ok((rest, left))
360    }
361}
362
363/// Returns true if `name` looks like a cell reference (e.g. "A1", "BC42").
364fn is_cell_ref(name: &str) -> bool {
365    let bytes = name.as_bytes();
366    let col_end = bytes.iter().take_while(|b| b.is_ascii_alphabetic()).count();
367    col_end > 0 && col_end < bytes.len() && bytes[col_end..].iter().all(|b| b.is_ascii_digit())
368}
369
370// ── public API ──────────────────────────────────────────────────────────────
371
372/// Parse a formula string into an expression tree.
373///
374/// The formula must start with `=`. Returns a [`ParseError`] if the input
375/// is not a valid formula.
376pub fn parse(formula: &str) -> Result<Expr, ParseError> {
377    let input = formula.strip_prefix('=').unwrap_or(formula).trim();
378    let p = Parser::new(formula);
379    match p.parse_comparison(input) {
380        Ok((rest, expr)) => {
381            let rest = rest.trim();
382            if rest.is_empty() {
383                Ok(expr)
384            } else {
385                Err(ParseError {
386                    message: format!("Unexpected input '{}'", rest),
387                    position: offset(formula, rest),
388                })
389            }
390        }
391        Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => Err(ParseError {
392            message: "Parse error".into(),
393            position: offset(formula, e.input),
394        }),
395        Err(nom::Err::Incomplete(_)) => Err(ParseError {
396            message: "Incomplete input".into(),
397            position: formula.len(),
398        }),
399    }
400}
401
402/// Validate that a formula string is syntactically correct without returning the AST.
403pub fn validate(formula: &str) -> Result<(), ParseError> {
404    parse(formula).map(|_| ())
405}
406
407#[cfg(test)]
408mod tests {
409    use super::*;
410    use crate::parser::ast::{BinaryOp, Expr, UnaryOp};
411
412    #[test]
413    fn parse_number_literal() {
414        let expr = parse("=42").unwrap();
415        assert!(matches!(expr, Expr::Number(n, _) if n == 42.0));
416    }
417
418    #[test]
419    fn parse_binary_add() {
420        let expr = parse("=1+2").unwrap();
421        assert!(matches!(expr, Expr::BinaryOp { op: BinaryOp::Add, .. }));
422    }
423
424    #[test]
425    fn parse_precedence() {
426        // 2+3*4 should parse as 2+(3*4)
427        let expr = parse("=2+3*4").unwrap();
428        match expr {
429            Expr::BinaryOp { op: BinaryOp::Add, right, .. } => {
430                assert!(matches!(*right, Expr::BinaryOp { op: BinaryOp::Mul, .. }));
431            }
432            _ => panic!("Expected Add at top"),
433        }
434    }
435
436    #[test]
437    fn parse_function_call() {
438        let expr = parse("=SUM(1,2,3)").unwrap();
439        match expr {
440            Expr::FunctionCall { name, args, .. } => {
441                assert_eq!(name, "SUM");
442                assert_eq!(args.len(), 3);
443            }
444            _ => panic!("Expected FunctionCall"),
445        }
446    }
447
448    #[test]
449    fn parse_percent() {
450        let expr = parse("=50%").unwrap();
451        assert!(matches!(expr, Expr::UnaryOp { op: UnaryOp::Percent, .. }));
452    }
453
454    #[test]
455    fn parse_string_literal() {
456        let expr = parse("=\"hello\"").unwrap();
457        assert!(matches!(expr, Expr::Text(ref s, _) if s == "hello"));
458    }
459
460    #[test]
461    fn parse_concat_op() {
462        let expr = parse("=\"a\"&\"b\"").unwrap();
463        assert!(matches!(expr, Expr::BinaryOp { op: BinaryOp::Concat, .. }));
464    }
465
466    #[test]
467    fn validate_incomplete_fails() {
468        let err = validate("=SUM(1,").unwrap_err();
469        assert!(!err.message.is_empty());
470    }
471
472    #[test]
473    fn parse_nested() {
474        assert!(parse("=ROUND(SUM(1,2)*1.1, 1)").is_ok());
475    }
476
477    #[test]
478    fn parse_boolean() {
479        let expr = parse("=TRUE").unwrap();
480        assert!(matches!(expr, Expr::Bool(true, _)));
481    }
482
483    #[test]
484    fn parse_variable() {
485        let expr = parse("=myVar").unwrap();
486        assert!(matches!(expr, Expr::Variable(ref n, _) if n == "myVar"));
487    }
488
489    #[test]
490    fn parse_array_literal_numbers() {
491        let expr = parse("={1,2,3}").unwrap();
492        match expr {
493            Expr::Array(elems, _) => assert_eq!(elems.len(), 3),
494            _ => panic!("Expected Array"),
495        }
496    }
497
498    #[test]
499    fn parse_array_literal_mixed() {
500        let expr = parse("={1,\"hello\",TRUE}").unwrap();
501        assert!(matches!(expr, Expr::Array(_, _)));
502    }
503
504    #[test]
505    fn parse_array_literal_empty() {
506        let expr = parse("={}").unwrap();
507        assert!(matches!(expr, Expr::Array(ref e, _) if e.is_empty()));
508    }
509
510    #[test]
511    fn parse_array_in_function_call() {
512        let expr = parse("=SUM({1,2,3})").unwrap();
513        match expr {
514            Expr::FunctionCall { name, args, .. } => {
515                assert_eq!(name, "SUM");
516                assert_eq!(args.len(), 1);
517                assert!(matches!(args[0], Expr::Array(_, _)));
518            }
519            _ => panic!("Expected FunctionCall"),
520        }
521    }
522
523    #[test]
524    fn parse_power_right_assoc() {
525        // 2^3^2 = 2^(3^2) = 2^9 = 512 (right-associative)
526        let expr = parse("=2^3^2").unwrap();
527        match expr {
528            Expr::BinaryOp { op: BinaryOp::Pow, right, .. } => {
529                assert!(matches!(*right, Expr::BinaryOp { op: BinaryOp::Pow, .. }));
530            }
531            _ => panic!("Expected Pow at top"),
532        }
533    }
534}