1use thiserror::Error;
4
5#[derive(Debug, Clone, PartialEq)]
6pub enum Token {
7 Number(f64),
9 StringLit(String),
10 Bool(bool),
11 Error(String), Ident(String), CellRef(String), RangeRef(String, String), Plus,
20 Minus,
21 Star,
22 Slash,
23 Caret, Ampersand, Percent, Equal,
27 NotEqual, LessThan,
29 LessEqual,
30 GreaterThan,
31 GreaterEqual,
32
33 LParen,
35 RParen,
36 LBrace, RBrace, Comma,
39 Semicolon, Colon,
41 Dollar,
42 Exclamation, EOF,
46}
47
48#[derive(Debug, Error)]
49pub enum LexError {
50 #[error("Unexpected character '{0}' at position {1}")]
51 UnexpectedChar(char, usize),
52 #[error("Unterminated string literal")]
53 UnterminatedString,
54}
55
56pub struct Lexer {
57 input: Vec<char>,
58 pos: usize,
59}
60
61impl Lexer {
62 pub fn new(input: &str) -> Self {
63 let s = input.trim();
65 let chars: Vec<char> = if s.starts_with('=') {
66 s[1..].chars().collect()
67 } else {
68 s.chars().collect()
69 };
70 Lexer { input: chars, pos: 0 }
71 }
72
73 pub fn tokenize(&mut self) -> Result<Vec<Token>, LexError> {
74 let mut tokens = vec![];
75 loop {
76 let tok = self.next_token()?;
77 let done = tok == Token::EOF;
78 tokens.push(tok);
79 if done { break; }
80 }
81 Ok(tokens)
82 }
83
84 fn peek(&self) -> Option<char> { self.input.get(self.pos).copied() }
85 fn peek2(&self) -> Option<char> { self.input.get(self.pos + 1).copied() }
86 fn advance(&mut self) -> Option<char> {
87 let c = self.input.get(self.pos).copied();
88 self.pos += 1;
89 c
90 }
91
92 fn skip_whitespace(&mut self) {
93 while matches!(self.peek(), Some(' ' | '\t' | '\n' | '\r')) {
94 self.pos += 1;
95 }
96 }
97
98 fn next_token(&mut self) -> Result<Token, LexError> {
99 self.skip_whitespace();
100
101 match self.peek() {
102 None => Ok(Token::EOF),
103 Some(c) => match c {
104 '+' => { self.advance(); Ok(Token::Plus) }
105 '-' => { self.advance(); Ok(Token::Minus) }
106 '*' => { self.advance(); Ok(Token::Star) }
107 '/' => { self.advance(); Ok(Token::Slash) }
108 '^' => { self.advance(); Ok(Token::Caret) }
109 '&' => { self.advance(); Ok(Token::Ampersand) }
110 '%' => { self.advance(); Ok(Token::Percent) }
111 '(' => { self.advance(); Ok(Token::LParen) }
112 ')' => { self.advance(); Ok(Token::RParen) }
113 '{' => { self.advance(); Ok(Token::LBrace) }
114 '}' => { self.advance(); Ok(Token::RBrace) }
115 ',' => { self.advance(); Ok(Token::Comma) }
116 ';' => { self.advance(); Ok(Token::Semicolon) }
117 '!' => { self.advance(); Ok(Token::Exclamation) }
118 '$' => { self.advance(); Ok(Token::Dollar) }
119
120 '=' => { self.advance(); Ok(Token::Equal) }
121 '<' => {
122 self.advance();
123 if self.peek() == Some('>') { self.advance(); Ok(Token::NotEqual) }
124 else if self.peek() == Some('=') { self.advance(); Ok(Token::LessEqual) }
125 else { Ok(Token::LessThan) }
126 }
127 '>' => {
128 self.advance();
129 if self.peek() == Some('=') { self.advance(); Ok(Token::GreaterEqual) }
130 else { Ok(Token::GreaterThan) }
131 }
132
133 '"' => self.lex_string(),
134 '#' => self.lex_error_val(),
135
136 '0'..='9' | '.' => self.lex_number(),
137 'A'..='Z' | 'a'..='z' | '_' => self.lex_ident_or_cellref(),
138 ':' => { self.advance(); Ok(Token::Colon) }
139
140 other => {
141 let pos = self.pos;
142 self.advance();
143 Err(LexError::UnexpectedChar(other, pos))
144 }
145 }
146 }
147 }
148
149 fn lex_string(&mut self) -> Result<Token, LexError> {
150 self.advance(); let mut s = String::new();
152 loop {
153 match self.advance() {
154 None => return Err(LexError::UnterminatedString),
155 Some('"') => {
156 if self.peek() == Some('"') {
158 self.advance();
159 s.push('"');
160 } else {
161 break;
162 }
163 }
164 Some(c) => s.push(c),
165 }
166 }
167 Ok(Token::StringLit(s))
168 }
169
170 fn lex_error_val(&mut self) -> Result<Token, LexError> {
171 let start = self.pos;
172 while matches!(self.peek(), Some(c) if !c.is_whitespace() && c != ')' && c != ',') {
173 self.advance();
174 }
175 let s: String = self.input[start..self.pos].iter().collect();
176 Ok(Token::Error(s))
177 }
178
179 fn lex_number(&mut self) -> Result<Token, LexError> {
180 let start = self.pos;
181 while matches!(self.peek(), Some('0'..='9' | '.')) {
182 self.advance();
183 }
184 if matches!(self.peek(), Some('e' | 'E')) {
186 self.advance();
187 if matches!(self.peek(), Some('+' | '-')) { self.advance(); }
188 while matches!(self.peek(), Some('0'..='9')) { self.advance(); }
189 }
190 let s: String = self.input[start..self.pos].iter().collect();
191 let n: f64 = s.parse().unwrap_or(0.0);
192 Ok(Token::Number(n))
193 }
194
195 fn lex_ident_or_cellref(&mut self) -> Result<Token, LexError> {
196 let start = self.pos;
197
198 let has_dollar_col = self.peek() == Some('$');
200 if has_dollar_col { self.advance(); }
201
202 let col_start = self.pos;
204 while matches!(self.peek(), Some('A'..='Z' | 'a'..='z')) {
205 self.advance();
206 }
207 let col_part: String = self.input[col_start..self.pos].iter().collect();
208
209 let has_dollar_row = self.peek() == Some('$');
211 if has_dollar_row { self.advance(); }
212
213 let row_start = self.pos;
215 while matches!(self.peek(), Some('0'..='9')) {
216 self.advance();
217 }
218 let row_part: String = self.input[row_start..self.pos].iter().collect();
219
220 if !col_part.is_empty() && !row_part.is_empty() {
221 let cell_ref: String = self.input[start..self.pos].iter().collect();
223
224 if self.peek() == Some(':') {
226 self.advance(); let start2 = self.pos;
230 if self.peek() == Some('$') { self.advance(); }
231 while matches!(self.peek(), Some('A'..='Z' | 'a'..='z')) { self.advance(); }
232 if self.peek() == Some('$') { self.advance(); }
233 while matches!(self.peek(), Some('0'..='9')) { self.advance(); }
234 let cell_ref2: String = self.input[start2..self.pos].iter().collect();
235 return Ok(Token::RangeRef(cell_ref, cell_ref2));
236 }
237 return Ok(Token::CellRef(cell_ref));
238 }
239
240 while matches!(self.peek(), Some('A'..='Z' | 'a'..='z' | '0'..='9' | '_' | '.')) {
242 self.advance();
243 }
244 let ident: String = self.input[start..self.pos].iter().collect();
245
246 if self.peek() == Some('!') {
248 self.advance();
249 let ref_start = self.pos;
250 if self.peek() == Some('$') { self.advance(); }
252 while matches!(self.peek(), Some('A'..='Z' | 'a'..='z')) { self.advance(); }
253 if self.peek() == Some('$') { self.advance(); }
254 while matches!(self.peek(), Some('0'..='9')) { self.advance(); }
255 let cell_part: String = self.input[ref_start..self.pos].iter().collect();
256 return Ok(Token::CellRef(format!("{}!{}", ident, cell_part)));
257 }
258
259 match ident.to_uppercase().as_str() {
261 "TRUE" => return Ok(Token::Bool(true)),
262 "FALSE" => return Ok(Token::Bool(false)),
263 _ => {}
264 }
265
266 Ok(Token::Ident(ident))
267 }
268}
269
270#[cfg(test)]
271mod tests {
272 use super::*;
273
274 #[test]
275 fn test_lex_sum() {
276 let mut lex = Lexer::new("=SUM(A1:B10)");
277 let tokens = lex.tokenize().unwrap();
278 assert!(tokens.contains(&Token::Ident("SUM".into())));
279 }
280
281 #[test]
282 fn test_lex_number() {
283 let mut lex = Lexer::new("=3.14");
284 let tokens = lex.tokenize().unwrap();
285 assert!(tokens.contains(&Token::Number(3.14)));
286 }
287
288 #[test]
289 fn test_lex_string() {
290 let mut lex = Lexer::new(r#"="Hello World""#);
291 let tokens = lex.tokenize().unwrap();
292 assert!(tokens.contains(&Token::StringLit("Hello World".into())));
293 }
294
295 #[test]
296 fn test_lex_range() {
297 let mut lex = Lexer::new("=A1:B10");
298 let tokens = lex.tokenize().unwrap();
299 assert!(matches!(tokens[0], Token::RangeRef(..)));
300 }
301}