1use crate::token::Token;
2
3#[derive(Debug, PartialEq)]
4pub enum Error {
5 InvalidCharacter,
7 UnknownIdentifier,
9 NonTerminatedStringLiteral,
11}
12
13pub fn lex(code: &[u8]) -> Result<Vec<Token>, Error> {
14 let mut tokens = Vec::new();
15 let mut chars = code.iter().peekable();
16
17 while let Some(ch) = chars.next() {
18 let token = match ch {
19 b',' => Token::Comma,
20 b'(' => Token::OpeningParenthesis,
21 b')' => Token::ClosingParenthesis,
22 b'=' => Token::Equal,
23 b'<' => match chars.peek() {
24 Some(b'=') => {
25 chars.next();
26 Token::LessThanOrEqual
27 }
28 Some(b'>') => {
29 chars.next();
30 Token::NotEqual
31 }
32 _ => Token::LessThan,
33 },
34 b'>' => match chars.peek() {
35 Some(b'=') => {
36 chars.next();
37 Token::GreaterThanOrEqual
38 }
39 Some(b'<') => {
40 chars.next();
41 Token::NotEqual
42 }
43 _ => Token::GreaterThan,
44 },
45 b'+' => Token::Plus,
46 b'-' => Token::Minus,
47 b'*' => Token::Multiply,
48 b'/' => Token::Divide,
49 b'0'..=b'9' => {
50 let mut value: i16 = (ch - b'0') as i16;
51 while let Some(&ch @ b'0'..=b'9') = chars.peek() {
52 value *= 10;
53 value += (ch - b'0') as i16;
54 chars.next();
55 }
56
57 Token::NumberLiteral(value)
58 }
59 b'"' => {
60 let mut value = Vec::new();
61 let mut is_string_terminated = false;
62 for &ch in &mut chars {
63 if ch == b'"' {
64 is_string_terminated = true;
65 break;
66 }
67
68 value.push(ch);
69 }
70
71 if !is_string_terminated {
72 return Err(Error::NonTerminatedStringLiteral);
73 }
74
75 Token::StringLiteral { value }
76 }
77 ch if ch.is_ascii_alphabetic() => {
78 const MAX_IDENTIFIER_LENGTH: usize = 6;
80
81 let mut identifier = Vec::with_capacity(MAX_IDENTIFIER_LENGTH);
82 identifier.push(ch.to_ascii_uppercase());
83
84 while let Some(&ch) = chars.peek() {
85 if !ch.is_ascii_alphanumeric() {
86 break;
87 }
88
89 identifier.push(ch.to_ascii_uppercase());
90 chars.next();
91 }
92
93 debug_assert_eq!(identifier, identifier.to_ascii_uppercase());
94
95 if identifier.len() == 1 {
97 Token::Variable {
98 identifier: identifier[0],
99 }
100 } else {
101 match identifier.as_slice() {
102 b"PRINT" => Token::Print,
103 b"IF" => Token::If,
104 b"THEN" => Token::Then,
105 b"GOTO" => Token::Goto,
106 b"INPUT" => Token::Input,
107 b"LET" => Token::Let,
108 b"GOSUB" => Token::GoSub,
109 b"RETURN" => Token::Return,
110 b"CLEAR" => Token::Clear,
111 b"LIST" => Token::List,
112 b"RUN" => Token::Run,
113 b"END" => Token::End,
114 _ => return Err(Error::UnknownIdentifier),
115 }
116 }
117 }
118 ch if ch.is_ascii_whitespace() => continue,
119 _ => return Err(Error::InvalidCharacter),
120 };
121
122 tokens.push(token);
123 }
124
125 Ok(tokens)
126}
127
128#[cfg(test)]
129mod tests {
130 use super::*;
131
132 #[test]
133 fn lex_hello_world_returns_tokens() {
134 let code = b"PRINT \"Hello, World!\"";
135 let expected = vec![
136 Token::Print,
137 Token::StringLiteral {
138 value: b"Hello, World!".to_vec(),
139 },
140 ];
141
142 let actual = lex(code);
143
144 assert_eq!(Ok(expected), actual);
145 }
146
147 #[test]
148 fn lex_expression_returns_tokens() {
149 let expression = b"1 + 2 * 3 / 4 - 5";
150 let expected = vec![
151 Token::NumberLiteral(1),
152 Token::Plus,
153 Token::NumberLiteral(2),
154 Token::Multiply,
155 Token::NumberLiteral(3),
156 Token::Divide,
157 Token::NumberLiteral(4),
158 Token::Minus,
159 Token::NumberLiteral(5),
160 ];
161
162 let actual = lex(expression);
163
164 assert_eq!(Ok(expected), actual);
165 }
166
167 #[test]
168 fn lex_keywords_returns_tokens() {
169 let code = b"PRINT IF THEN GOTO INPUT LET GOSUB RETURN CLEAR LIST RUN END";
170 let expected = vec![
171 Token::Print,
172 Token::If,
173 Token::Then,
174 Token::Goto,
175 Token::Input,
176 Token::Let,
177 Token::GoSub,
178 Token::Return,
179 Token::Clear,
180 Token::List,
181 Token::Run,
182 Token::End,
183 ];
184
185 let actual = lex(code);
186
187 assert_eq!(Ok(expected), actual);
188 }
189
190 #[test]
191 fn lex_variable_returns_token() {
192 let code = b"IF A < B THEN PRINT Z";
193 let expected = vec![
194 Token::If,
195 Token::Variable { identifier: b'A' },
196 Token::LessThan,
197 Token::Variable { identifier: b'B' },
198 Token::Then,
199 Token::Print,
200 Token::Variable { identifier: b'Z' },
201 ];
202
203 let actual = lex(code);
204
205 assert_eq!(Ok(expected), actual);
206 }
207
208 #[test]
209 fn lex_lowercase_variable_returns_uppercase_token() {
210 let code = b"IF a < b THEN PRINT z";
211 let expected = vec![
212 Token::If,
213 Token::Variable { identifier: b'A' },
214 Token::LessThan,
215 Token::Variable { identifier: b'B' },
216 Token::Then,
217 Token::Print,
218 Token::Variable { identifier: b'Z' },
219 ];
220
221 let actual = lex(code);
222
223 assert_eq!(Ok(expected), actual);
224 }
225
226 #[test]
227 fn lex_unknown_identifier_returns_error() {
228 let invalid_code = b"PRINT HELLO";
229
230 let actual = lex(invalid_code);
231
232 assert_eq!(Err(Error::UnknownIdentifier), actual);
233 }
234
235 #[test]
236 fn lex_non_terminated_string_returns_error() {
237 let invalid_code = b"PRINT \"Hello, World!";
238
239 let actual = lex(invalid_code);
240
241 assert_eq!(Err(Error::NonTerminatedStringLiteral), actual);
242 }
243
244 #[test]
245 fn lex_empty_string_literal_returns_tokens() {
246 let code = br#"PRINT "", """#;
247 let expected = vec![
248 Token::Print,
249 Token::StringLiteral { value: Vec::new() },
250 Token::Comma,
251 Token::StringLiteral { value: Vec::new() },
252 ];
253
254 let actual = lex(code);
255
256 assert_eq!(Ok(expected), actual);
257 }
258
259 #[test]
260 fn lex_number_with_boundary_digit_returns_tokens() {
261 let code = b"9999";
262 let expected = vec![Token::NumberLiteral(9999)];
263
264 let actual = lex(code);
265
266 assert_eq!(Ok(expected), actual);
267 }
268}