Skip to main content

pdforg_sheets/
parser.rs

1//! Formula parser — converts a token stream into an Expression AST.
2
3use crate::lexer::Token;
4use thiserror::Error;
5use serde::{Deserialize, Serialize};
6use pdf_core::CellAddress;
7
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub enum Expr {
10    Number(f64),
11    Text(String),
12    Bool(bool),
13    Error(String),
14    CellRef(CellAddress),
15    RangeRef(CellAddress, CellAddress),
16    NamedRange(String),
17    Call { name: String, args: Vec<Expr> },
18    BinOp { left: Box<Expr>, op: BinOpKind, right: Box<Expr> },
19    UnaryOp { op: UnaryOpKind, expr: Box<Expr> },
20    Array(Vec<Vec<Expr>>),
21    Percent(Box<Expr>),
22}
23
24#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
25pub enum BinOpKind {
26    Add, Sub, Mul, Div, Pow,
27    Concat, // &
28    Eq, Ne, Lt, Le, Gt, Ge,
29}
30
31#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
32pub enum UnaryOpKind {
33    Neg,
34    Plus,
35}
36
37#[derive(Debug, Error)]
38pub enum ParseError {
39    #[error("Unexpected token: {0:?}")]
40    Unexpected(Token),
41    #[error("Unexpected end of input")]
42    UnexpectedEOF,
43    #[error("Invalid cell reference: {0}")]
44    InvalidCellRef(String),
45}
46
47pub struct Parser {
48    tokens: Vec<Token>,
49    pos: usize,
50}
51
52impl Parser {
53    pub fn new(tokens: Vec<Token>) -> Self {
54        Parser { tokens, pos: 0 }
55    }
56
57    pub fn parse(&mut self) -> Result<Expr, ParseError> {
58        self.parse_expr()
59    }
60
61    fn peek(&self) -> &Token {
62        self.tokens.get(self.pos).unwrap_or(&Token::EOF)
63    }
64
65    fn advance(&mut self) -> &Token {
66        let tok = self.tokens.get(self.pos).unwrap_or(&Token::EOF);
67        self.pos += 1;
68        tok
69    }
70
71    fn expect(&mut self, expected: &Token) -> Result<(), ParseError> {
72        let tok = self.advance().clone();
73        if std::mem::discriminant(&tok) == std::mem::discriminant(expected) {
74            Ok(())
75        } else {
76            Err(ParseError::Unexpected(tok))
77        }
78    }
79
80    // Pratt parser with precedence levels
81    fn parse_expr(&mut self) -> Result<Expr, ParseError> {
82        self.parse_comparison()
83    }
84
85    fn parse_comparison(&mut self) -> Result<Expr, ParseError> {
86        let mut left = self.parse_additive()?;
87        loop {
88            let op = match self.peek() {
89                Token::Equal => BinOpKind::Eq,
90                Token::NotEqual => BinOpKind::Ne,
91                Token::LessThan => BinOpKind::Lt,
92                Token::LessEqual => BinOpKind::Le,
93                Token::GreaterThan => BinOpKind::Gt,
94                Token::GreaterEqual => BinOpKind::Ge,
95                _ => break,
96            };
97            self.advance();
98            let right = self.parse_additive()?;
99            left = Expr::BinOp { left: Box::new(left), op, right: Box::new(right) };
100        }
101        Ok(left)
102    }
103
104    fn parse_additive(&mut self) -> Result<Expr, ParseError> {
105        let mut left = self.parse_concat()?;
106        loop {
107            let op = match self.peek() {
108                Token::Plus => BinOpKind::Add,
109                Token::Minus => BinOpKind::Sub,
110                _ => break,
111            };
112            self.advance();
113            let right = self.parse_concat()?;
114            left = Expr::BinOp { left: Box::new(left), op, right: Box::new(right) };
115        }
116        Ok(left)
117    }
118
119    fn parse_concat(&mut self) -> Result<Expr, ParseError> {
120        let mut left = self.parse_multiplicative()?;
121        while matches!(self.peek(), Token::Ampersand) {
122            self.advance();
123            let right = self.parse_multiplicative()?;
124            left = Expr::BinOp { left: Box::new(left), op: BinOpKind::Concat, right: Box::new(right) };
125        }
126        Ok(left)
127    }
128
129    fn parse_multiplicative(&mut self) -> Result<Expr, ParseError> {
130        let mut left = self.parse_power()?;
131        loop {
132            let op = match self.peek() {
133                Token::Star => BinOpKind::Mul,
134                Token::Slash => BinOpKind::Div,
135                _ => break,
136            };
137            self.advance();
138            let right = self.parse_power()?;
139            left = Expr::BinOp { left: Box::new(left), op, right: Box::new(right) };
140        }
141        Ok(left)
142    }
143
144    fn parse_power(&mut self) -> Result<Expr, ParseError> {
145        let base = self.parse_unary()?;
146        if matches!(self.peek(), Token::Caret) {
147            self.advance();
148            let exp = self.parse_power()?; // right-associative
149            Ok(Expr::BinOp { left: Box::new(base), op: BinOpKind::Pow, right: Box::new(exp) })
150        } else {
151            Ok(base)
152        }
153    }
154
155    fn parse_unary(&mut self) -> Result<Expr, ParseError> {
156        match self.peek() {
157            Token::Minus => {
158                self.advance();
159                let expr = self.parse_postfix()?;
160                Ok(Expr::UnaryOp { op: UnaryOpKind::Neg, expr: Box::new(expr) })
161            }
162            Token::Plus => {
163                self.advance();
164                let expr = self.parse_postfix()?;
165                Ok(Expr::UnaryOp { op: UnaryOpKind::Plus, expr: Box::new(expr) })
166            }
167            _ => self.parse_postfix(),
168        }
169    }
170
171    fn parse_postfix(&mut self) -> Result<Expr, ParseError> {
172        let mut expr = self.parse_primary()?;
173        // Postfix percent
174        if matches!(self.peek(), Token::Percent) {
175            self.advance();
176            expr = Expr::Percent(Box::new(expr));
177        }
178        Ok(expr)
179    }
180
181    fn parse_primary(&mut self) -> Result<Expr, ParseError> {
182        match self.peek().clone() {
183            Token::Number(n) => { self.advance(); Ok(Expr::Number(n)) }
184            Token::StringLit(s) => { self.advance(); Ok(Expr::Text(s)) }
185            Token::Bool(b) => { self.advance(); Ok(Expr::Bool(b)) }
186            Token::Error(e) => { self.advance(); Ok(Expr::Error(e)) }
187
188            Token::CellRef(r) => {
189                self.advance();
190                let addr = parse_cell_ref(&r)
191                    .ok_or_else(|| ParseError::InvalidCellRef(r.clone()))?;
192                Ok(Expr::CellRef(addr))
193            }
194
195            Token::RangeRef(a, b) => {
196                self.advance();
197                let start = parse_cell_ref(&a)
198                    .ok_or_else(|| ParseError::InvalidCellRef(a.clone()))?;
199                let end = parse_cell_ref(&b)
200                    .ok_or_else(|| ParseError::InvalidCellRef(b.clone()))?;
201                Ok(Expr::RangeRef(start, end))
202            }
203
204            Token::Ident(name) => {
205                self.advance();
206                if matches!(self.peek(), Token::LParen) {
207                    // Function call
208                    self.advance(); // consume '('
209                    let args = self.parse_arg_list()?;
210                    self.expect(&Token::RParen)?;
211                    Ok(Expr::Call { name, args })
212                } else {
213                    Ok(Expr::NamedRange(name))
214                }
215            }
216
217            Token::LParen => {
218                self.advance();
219                let expr = self.parse_expr()?;
220                self.expect(&Token::RParen)?;
221                Ok(expr)
222            }
223
224            Token::LBrace => {
225                self.advance();
226                let array = self.parse_array()?;
227                self.expect(&Token::RBrace)?;
228                Ok(Expr::Array(array))
229            }
230
231            Token::EOF => Err(ParseError::UnexpectedEOF),
232            tok => Err(ParseError::Unexpected(tok.clone())),
233        }
234    }
235
236    fn parse_arg_list(&mut self) -> Result<Vec<Expr>, ParseError> {
237        let mut args = vec![];
238        if matches!(self.peek(), Token::RParen) { return Ok(args); }
239        args.push(self.parse_expr()?);
240        while matches!(self.peek(), Token::Comma | Token::Semicolon) {
241            self.advance();
242            if matches!(self.peek(), Token::RParen) { break; } // trailing comma
243            args.push(self.parse_expr()?);
244        }
245        Ok(args)
246    }
247
248    fn parse_array(&mut self) -> Result<Vec<Vec<Expr>>, ParseError> {
249        let mut rows = vec![];
250        let mut row = vec![];
251        loop {
252            match self.peek() {
253                Token::RBrace => break,
254                Token::Semicolon => { self.advance(); rows.push(row); row = vec![]; }
255                Token::Comma => { self.advance(); }
256                _ => row.push(self.parse_expr()?),
257            }
258        }
259        if !row.is_empty() { rows.push(row); }
260        Ok(rows)
261    }
262}
263
264/// Parse "A1", "$B$2", "Sheet1!C3" into a CellAddress
265pub fn parse_cell_ref(s: &str) -> Option<CellAddress> {
266    // Handle sheet refs: "Sheet1!A1"
267    let (sheet, local) = if let Some(pos) = s.find('!') {
268        (Some(s[..pos].to_string()), &s[pos+1..])
269    } else {
270        (None, s)
271    };
272
273    let local = local.trim_matches('$');
274    let col_end = local.chars().take_while(|c| c.is_ascii_alphabetic()).count();
275    if col_end == 0 { return None; }
276
277    let col_str = &local[..col_end];
278    let row_str = local[col_end..].trim_matches('$');
279
280    let col = col_str.chars().fold(0u32, |acc, c| {
281        acc * 26 + (c.to_ascii_uppercase() as u32 - 'A' as u32 + 1)
282    }).checked_sub(1)?;
283    let row: u32 = row_str.parse::<u32>().ok()?.checked_sub(1)?;
284
285    Some(CellAddress { row, col, sheet })
286}
287
288/// Parse a formula string into an Expr AST
289pub fn parse_formula(formula: &str) -> Result<Expr, String> {
290    use crate::lexer::Lexer;
291    let mut lex = Lexer::new(formula);
292    let tokens = lex.tokenize().map_err(|e| e.to_string())?;
293    let mut parser = Parser::new(tokens);
294    parser.parse().map_err(|e| e.to_string())
295}
296
297#[cfg(test)]
298mod tests {
299    use super::*;
300
301    #[test]
302    fn test_parse_number() {
303        let expr = parse_formula("=42").unwrap();
304        assert!(matches!(expr, Expr::Number(42.0)));
305    }
306
307    #[test]
308    fn test_parse_sum() {
309        let expr = parse_formula("=SUM(A1:B10)").unwrap();
310        assert!(matches!(expr, Expr::Call { .. }));
311    }
312
313    #[test]
314    fn test_parse_if() {
315        let expr = parse_formula("=IF(A1>0, \"pos\", \"neg\")").unwrap();
316        assert!(matches!(expr, Expr::Call { .. }));
317    }
318
319    #[test]
320    fn test_parse_binop() {
321        let expr = parse_formula("=A1+B2*3").unwrap();
322        assert!(matches!(expr, Expr::BinOp { op: BinOpKind::Add, .. }));
323    }
324
325    #[test]
326    fn test_parse_unary_neg() {
327        let expr = parse_formula("=-A1").unwrap();
328        assert!(matches!(expr, Expr::UnaryOp { op: UnaryOpKind::Neg, .. }));
329    }
330}