1use crate::model::CellError;
2
3#[derive(Debug, Clone, PartialEq)]
4pub enum Token {
5 Number(f64),
6 StringLit(String),
7 Bool(bool),
8 CellRef {
9 col: String,
10 row: String,
11 abs_col: bool,
12 abs_row: bool,
13 },
14 Ident(String),
15 Plus,
16 Minus,
17 Star,
18 Slash,
19 Gt,
20 Gte,
21 Lt,
22 Lte,
23 Eq,
24 Neq,
25 LParen,
26 RParen,
27 Comma,
28 Colon,
29}
30
31pub fn tokenize(input: &str) -> Result<Vec<Token>, CellError> {
32 let mut tokens = Vec::new();
33 let chars: Vec<char> = input.chars().collect();
34 let mut i = 0;
35
36 while i < chars.len() {
37 match chars[i] {
38 ' ' | '\t' => {
39 i += 1;
40 }
41 '+' => {
42 tokens.push(Token::Plus);
43 i += 1;
44 }
45 '-' => {
46 tokens.push(Token::Minus);
47 i += 1;
48 }
49 '*' => {
50 tokens.push(Token::Star);
51 i += 1;
52 }
53 '/' => {
54 tokens.push(Token::Slash);
55 i += 1;
56 }
57 '(' => {
58 tokens.push(Token::LParen);
59 i += 1;
60 }
61 ')' => {
62 tokens.push(Token::RParen);
63 i += 1;
64 }
65 ',' => {
66 tokens.push(Token::Comma);
67 i += 1;
68 }
69 ':' => {
70 tokens.push(Token::Colon);
71 i += 1;
72 }
73 '>' => {
74 if i + 1 < chars.len() && chars[i + 1] == '=' {
75 tokens.push(Token::Gte);
76 i += 2;
77 } else {
78 tokens.push(Token::Gt);
79 i += 1;
80 }
81 }
82 '<' => {
83 if i + 1 < chars.len() && chars[i + 1] == '=' {
84 tokens.push(Token::Lte);
85 i += 2;
86 } else if i + 1 < chars.len() && chars[i + 1] == '>' {
87 tokens.push(Token::Neq);
88 i += 2;
89 } else {
90 tokens.push(Token::Lt);
91 i += 1;
92 }
93 }
94 '=' => {
95 tokens.push(Token::Eq);
96 i += 1;
97 }
98 '"' => {
99 i += 1;
100 let start = i;
101 while i < chars.len() && chars[i] != '"' {
102 i += 1;
103 }
104 if i >= chars.len() {
105 return Err(CellError::Parse);
106 }
107 let s: String = chars[start..i].iter().collect();
108 tokens.push(Token::StringLit(s));
109 i += 1; }
111 c if c == '$' || c.is_ascii_uppercase() => {
112 let mut abs_col = false;
113 let mut j = i;
114
115 if chars[j] == '$' {
116 abs_col = true;
117 j += 1;
118 }
119
120 let col_start = j;
121 while j < chars.len() && chars[j].is_ascii_uppercase() {
122 j += 1;
123 }
124 let col: String = chars[col_start..j].iter().collect();
125
126 if col.is_empty() {
127 return Err(CellError::Parse);
128 }
129
130 let mut abs_row = false;
131 if j < chars.len() && chars[j] == '$' {
132 abs_row = true;
133 j += 1;
134 }
135
136 let row_start = j;
137 while j < chars.len() && chars[j].is_ascii_digit() {
138 j += 1;
139 }
140 let row: String = chars[row_start..j].iter().collect();
141
142 if !row.is_empty() {
143 tokens.push(Token::CellRef {
144 col,
145 row,
146 abs_col,
147 abs_row,
148 });
149 } else if !abs_col && !abs_row {
150 if col == "TRUE" {
151 tokens.push(Token::Bool(true));
152 } else if col == "FALSE" {
153 tokens.push(Token::Bool(false));
154 } else {
155 tokens.push(Token::Ident(col));
156 }
157 } else {
158 return Err(CellError::Parse);
159 }
160 i = j;
161 }
162 c if c.is_ascii_digit() => {
163 let start = i;
164 while i < chars.len() && (chars[i].is_ascii_digit() || chars[i] == '.') {
165 i += 1;
166 }
167 let s: String = chars[start..i].iter().collect();
168 let n: f64 = s.parse().map_err(|_| CellError::Parse)?;
169 tokens.push(Token::Number(n));
170 }
171 _ => return Err(CellError::Parse),
172 }
173 }
174
175 Ok(tokens)
176}
177
178#[cfg(test)]
179mod tests {
180 use super::*;
181
182 #[test]
183 fn tokenize_number() {
184 let tokens = tokenize("42").unwrap();
185 assert_eq!(tokens, vec![Token::Number(42.0)]);
186 }
187
188 #[test]
189 fn tokenize_float() {
190 let tokens = tokenize("3.15").unwrap();
191 assert_eq!(tokens, vec![Token::Number(3.15)]);
192 }
193
194 #[test]
195 fn tokenize_string() {
196 let tokens = tokenize("\"hello\"").unwrap();
197 assert_eq!(tokens, vec![Token::StringLit("hello".into())]);
198 }
199
200 #[test]
201 fn tokenize_cell_ref() {
202 let tokens = tokenize("A1").unwrap();
203 assert_eq!(
204 tokens,
205 vec![Token::CellRef {
206 col: "A".into(),
207 row: "1".into(),
208 abs_col: false,
209 abs_row: false,
210 }]
211 );
212 }
213
214 #[test]
215 fn tokenize_absolute_cell_ref() {
216 let tokens = tokenize("$A$1").unwrap();
217 assert_eq!(
218 tokens,
219 vec![Token::CellRef {
220 col: "A".into(),
221 row: "1".into(),
222 abs_col: true,
223 abs_row: true,
224 }]
225 );
226 }
227
228 #[test]
229 fn tokenize_mixed_ref() {
230 let tokens = tokenize("$A1").unwrap();
231 assert_eq!(
232 tokens,
233 vec![Token::CellRef {
234 col: "A".into(),
235 row: "1".into(),
236 abs_col: true,
237 abs_row: false,
238 }]
239 );
240 }
241
242 #[test]
243 fn tokenize_operators() {
244 let tokens = tokenize("+-*/").unwrap();
245 assert_eq!(
246 tokens,
247 vec![Token::Plus, Token::Minus, Token::Star, Token::Slash,]
248 );
249 }
250
251 #[test]
252 fn tokenize_comparison_operators() {
253 let tokens = tokenize(">>=<<=<>").unwrap();
254 assert_eq!(
255 tokens,
256 vec![Token::Gt, Token::Gte, Token::Lt, Token::Lte, Token::Neq,]
257 );
258 }
259
260 #[test]
261 fn tokenize_parens_and_comma() {
262 let tokens = tokenize("(,)").unwrap();
263 assert_eq!(tokens, vec![Token::LParen, Token::Comma, Token::RParen]);
264 }
265
266 #[test]
267 fn tokenize_colon() {
268 let tokens = tokenize(":").unwrap();
269 assert_eq!(tokens, vec![Token::Colon]);
270 }
271
272 #[test]
273 fn tokenize_function_name() {
274 let tokens = tokenize("SUM(").unwrap();
275 assert_eq!(tokens, vec![Token::Ident("SUM".into()), Token::LParen]);
276 }
277
278 #[test]
279 fn tokenize_full_formula() {
280 let tokens = tokenize("SUM(A1:A3)+1").unwrap();
281 assert_eq!(
282 tokens,
283 vec![
284 Token::Ident("SUM".into()),
285 Token::LParen,
286 Token::CellRef {
287 col: "A".into(),
288 row: "1".into(),
289 abs_col: false,
290 abs_row: false
291 },
292 Token::Colon,
293 Token::CellRef {
294 col: "A".into(),
295 row: "3".into(),
296 abs_col: false,
297 abs_row: false
298 },
299 Token::RParen,
300 Token::Plus,
301 Token::Number(1.0),
302 ]
303 );
304 }
305
306 #[test]
307 fn tokenize_boolean_true() {
308 let tokens = tokenize("TRUE").unwrap();
309 assert_eq!(tokens, vec![Token::Bool(true)]);
310 }
311
312 #[test]
313 fn tokenize_boolean_false() {
314 let tokens = tokenize("FALSE").unwrap();
315 assert_eq!(tokens, vec![Token::Bool(false)]);
316 }
317
318 #[test]
319 fn tokenize_equals() {
320 let tokens = tokenize("=").unwrap();
321 assert_eq!(tokens, vec![Token::Eq]);
322 }
323
324 #[test]
325 fn tokenize_whitespace_ignored() {
326 let tokens = tokenize(" A1 + B1 ").unwrap();
327 assert_eq!(
328 tokens,
329 vec![
330 Token::CellRef {
331 col: "A".into(),
332 row: "1".into(),
333 abs_col: false,
334 abs_row: false
335 },
336 Token::Plus,
337 Token::CellRef {
338 col: "B".into(),
339 row: "1".into(),
340 abs_col: false,
341 abs_row: false
342 },
343 ]
344 );
345 }
346
347 #[test]
348 fn tokenize_multi_letter_col() {
349 let tokens = tokenize("AA10").unwrap();
350 assert_eq!(
351 tokens,
352 vec![Token::CellRef {
353 col: "AA".into(),
354 row: "10".into(),
355 abs_col: false,
356 abs_row: false,
357 }]
358 );
359 }
360}