Skip to main content

cell_sheet_core/formula/
token.rs

1use crate::model::CellError;
2
3#[derive(Debug, Clone, PartialEq)]
4pub enum Token {
5    Number(f64),
6    StringLit(String),
7    Bool(bool),
8    CellRef {
9        col: String,
10        row: String,
11        abs_col: bool,
12        abs_row: bool,
13    },
14    Ident(String),
15    Plus,
16    Minus,
17    Star,
18    Slash,
19    Gt,
20    Gte,
21    Lt,
22    Lte,
23    Eq,
24    Neq,
25    LParen,
26    RParen,
27    Comma,
28    Colon,
29}
30
31pub fn tokenize(input: &str) -> Result<Vec<Token>, CellError> {
32    let mut tokens = Vec::new();
33    let chars: Vec<char> = input.chars().collect();
34    let mut i = 0;
35
36    while i < chars.len() {
37        match chars[i] {
38            ' ' | '\t' => {
39                i += 1;
40            }
41            '+' => {
42                tokens.push(Token::Plus);
43                i += 1;
44            }
45            '-' => {
46                tokens.push(Token::Minus);
47                i += 1;
48            }
49            '*' => {
50                tokens.push(Token::Star);
51                i += 1;
52            }
53            '/' => {
54                tokens.push(Token::Slash);
55                i += 1;
56            }
57            '(' => {
58                tokens.push(Token::LParen);
59                i += 1;
60            }
61            ')' => {
62                tokens.push(Token::RParen);
63                i += 1;
64            }
65            ',' => {
66                tokens.push(Token::Comma);
67                i += 1;
68            }
69            ':' => {
70                tokens.push(Token::Colon);
71                i += 1;
72            }
73            '>' => {
74                if i + 1 < chars.len() && chars[i + 1] == '=' {
75                    tokens.push(Token::Gte);
76                    i += 2;
77                } else {
78                    tokens.push(Token::Gt);
79                    i += 1;
80                }
81            }
82            '<' => {
83                if i + 1 < chars.len() && chars[i + 1] == '=' {
84                    tokens.push(Token::Lte);
85                    i += 2;
86                } else if i + 1 < chars.len() && chars[i + 1] == '>' {
87                    tokens.push(Token::Neq);
88                    i += 2;
89                } else {
90                    tokens.push(Token::Lt);
91                    i += 1;
92                }
93            }
94            '=' => {
95                tokens.push(Token::Eq);
96                i += 1;
97            }
98            '"' => {
99                i += 1;
100                let start = i;
101                while i < chars.len() && chars[i] != '"' {
102                    i += 1;
103                }
104                if i >= chars.len() {
105                    return Err(CellError::Parse);
106                }
107                let s: String = chars[start..i].iter().collect();
108                tokens.push(Token::StringLit(s));
109                i += 1; // skip closing quote
110            }
111            c if c == '$' || c.is_ascii_uppercase() => {
112                let mut abs_col = false;
113                let mut j = i;
114
115                if chars[j] == '$' {
116                    abs_col = true;
117                    j += 1;
118                }
119
120                let col_start = j;
121                while j < chars.len() && chars[j].is_ascii_uppercase() {
122                    j += 1;
123                }
124                let col: String = chars[col_start..j].iter().collect();
125
126                if col.is_empty() {
127                    return Err(CellError::Parse);
128                }
129
130                let mut abs_row = false;
131                if j < chars.len() && chars[j] == '$' {
132                    abs_row = true;
133                    j += 1;
134                }
135
136                let row_start = j;
137                while j < chars.len() && chars[j].is_ascii_digit() {
138                    j += 1;
139                }
140                let row: String = chars[row_start..j].iter().collect();
141
142                if !row.is_empty() {
143                    tokens.push(Token::CellRef {
144                        col,
145                        row,
146                        abs_col,
147                        abs_row,
148                    });
149                } else if !abs_col && !abs_row {
150                    if col == "TRUE" {
151                        tokens.push(Token::Bool(true));
152                    } else if col == "FALSE" {
153                        tokens.push(Token::Bool(false));
154                    } else {
155                        tokens.push(Token::Ident(col));
156                    }
157                } else {
158                    return Err(CellError::Parse);
159                }
160                i = j;
161            }
162            c if c.is_ascii_digit() => {
163                let start = i;
164                while i < chars.len() && (chars[i].is_ascii_digit() || chars[i] == '.') {
165                    i += 1;
166                }
167                let s: String = chars[start..i].iter().collect();
168                let n: f64 = s.parse().map_err(|_| CellError::Parse)?;
169                tokens.push(Token::Number(n));
170            }
171            _ => return Err(CellError::Parse),
172        }
173    }
174
175    Ok(tokens)
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181
182    #[test]
183    fn tokenize_number() {
184        let tokens = tokenize("42").unwrap();
185        assert_eq!(tokens, vec![Token::Number(42.0)]);
186    }
187
188    #[test]
189    fn tokenize_float() {
190        let tokens = tokenize("3.15").unwrap();
191        assert_eq!(tokens, vec![Token::Number(3.15)]);
192    }
193
194    #[test]
195    fn tokenize_string() {
196        let tokens = tokenize("\"hello\"").unwrap();
197        assert_eq!(tokens, vec![Token::StringLit("hello".into())]);
198    }
199
200    #[test]
201    fn tokenize_cell_ref() {
202        let tokens = tokenize("A1").unwrap();
203        assert_eq!(
204            tokens,
205            vec![Token::CellRef {
206                col: "A".into(),
207                row: "1".into(),
208                abs_col: false,
209                abs_row: false,
210            }]
211        );
212    }
213
214    #[test]
215    fn tokenize_absolute_cell_ref() {
216        let tokens = tokenize("$A$1").unwrap();
217        assert_eq!(
218            tokens,
219            vec![Token::CellRef {
220                col: "A".into(),
221                row: "1".into(),
222                abs_col: true,
223                abs_row: true,
224            }]
225        );
226    }
227
228    #[test]
229    fn tokenize_mixed_ref() {
230        let tokens = tokenize("$A1").unwrap();
231        assert_eq!(
232            tokens,
233            vec![Token::CellRef {
234                col: "A".into(),
235                row: "1".into(),
236                abs_col: true,
237                abs_row: false,
238            }]
239        );
240    }
241
242    #[test]
243    fn tokenize_operators() {
244        let tokens = tokenize("+-*/").unwrap();
245        assert_eq!(
246            tokens,
247            vec![Token::Plus, Token::Minus, Token::Star, Token::Slash,]
248        );
249    }
250
251    #[test]
252    fn tokenize_comparison_operators() {
253        let tokens = tokenize(">>=<<=<>").unwrap();
254        assert_eq!(
255            tokens,
256            vec![Token::Gt, Token::Gte, Token::Lt, Token::Lte, Token::Neq,]
257        );
258    }
259
260    #[test]
261    fn tokenize_parens_and_comma() {
262        let tokens = tokenize("(,)").unwrap();
263        assert_eq!(tokens, vec![Token::LParen, Token::Comma, Token::RParen]);
264    }
265
266    #[test]
267    fn tokenize_colon() {
268        let tokens = tokenize(":").unwrap();
269        assert_eq!(tokens, vec![Token::Colon]);
270    }
271
272    #[test]
273    fn tokenize_function_name() {
274        let tokens = tokenize("SUM(").unwrap();
275        assert_eq!(tokens, vec![Token::Ident("SUM".into()), Token::LParen]);
276    }
277
278    #[test]
279    fn tokenize_full_formula() {
280        let tokens = tokenize("SUM(A1:A3)+1").unwrap();
281        assert_eq!(
282            tokens,
283            vec![
284                Token::Ident("SUM".into()),
285                Token::LParen,
286                Token::CellRef {
287                    col: "A".into(),
288                    row: "1".into(),
289                    abs_col: false,
290                    abs_row: false
291                },
292                Token::Colon,
293                Token::CellRef {
294                    col: "A".into(),
295                    row: "3".into(),
296                    abs_col: false,
297                    abs_row: false
298                },
299                Token::RParen,
300                Token::Plus,
301                Token::Number(1.0),
302            ]
303        );
304    }
305
306    #[test]
307    fn tokenize_boolean_true() {
308        let tokens = tokenize("TRUE").unwrap();
309        assert_eq!(tokens, vec![Token::Bool(true)]);
310    }
311
312    #[test]
313    fn tokenize_boolean_false() {
314        let tokens = tokenize("FALSE").unwrap();
315        assert_eq!(tokens, vec![Token::Bool(false)]);
316    }
317
318    #[test]
319    fn tokenize_equals() {
320        let tokens = tokenize("=").unwrap();
321        assert_eq!(tokens, vec![Token::Eq]);
322    }
323
324    #[test]
325    fn tokenize_whitespace_ignored() {
326        let tokens = tokenize(" A1 + B1 ").unwrap();
327        assert_eq!(
328            tokens,
329            vec![
330                Token::CellRef {
331                    col: "A".into(),
332                    row: "1".into(),
333                    abs_col: false,
334                    abs_row: false
335                },
336                Token::Plus,
337                Token::CellRef {
338                    col: "B".into(),
339                    row: "1".into(),
340                    abs_col: false,
341                    abs_row: false
342                },
343            ]
344        );
345    }
346
347    #[test]
348    fn tokenize_multi_letter_col() {
349        let tokens = tokenize("AA10").unwrap();
350        assert_eq!(
351            tokens,
352            vec![Token::CellRef {
353                col: "AA".into(),
354                row: "10".into(),
355                abs_col: false,
356                abs_row: false,
357            }]
358        );
359    }
360}