1#[derive(Debug, Clone, PartialEq)]
2pub struct Span {
3 pub offset: usize,
4 pub line: usize,
5 pub column: usize,
6}
7
8#[derive(Debug, Clone, PartialEq)]
9pub struct SpannedToken {
10 pub token: Token,
11 pub span: Span,
12}
13
14#[derive(Debug, Clone, PartialEq)]
15pub enum Token {
16 Feature,
18 Struct,
19
20 BooleanType,
22 IntegerType,
23 FloatType,
24 StringType,
25
26 BoolLit(bool),
28 NumberLit(f64),
29 StringLit(String),
30
31 LBrace,
33 RBrace,
34 Equals,
35 Colon,
36
37 Ident(String),
39}
40
41#[derive(Debug, Clone, PartialEq)]
42pub struct LexError {
43 pub message: String,
44 pub span: Span,
45}
46
47fn compute_span(full_input: &str, offset: usize) -> Span {
48 let consumed = &full_input[..offset];
49 let line = consumed.chars().filter(|&c| c == '\n').count() + 1;
50 let column = match consumed.rfind('\n') {
51 Some(pos) => offset - pos,
52 None => offset + 1,
53 };
54 Span {
55 offset,
56 line,
57 column,
58 }
59}
60
61fn skip_whitespace(input: &str, pos: usize) -> usize {
62 let mut i = pos;
63 let bytes = input.as_bytes();
64 while i < bytes.len()
65 && (bytes[i] == b' ' || bytes[i] == b'\t' || bytes[i] == b'\n' || bytes[i] == b'\r')
66 {
67 i += 1;
68 }
69 i
70}
71
72fn lex_word(input: &str, pos: usize) -> (usize, &str) {
73 let start = pos;
74 let mut i = pos;
75 let bytes = input.as_bytes();
76 while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
77 i += 1;
78 }
79 (i, &input[start..i])
80}
81
82fn lex_number_token(input: &str, pos: usize) -> Result<(usize, f64), String> {
83 let mut i = pos;
84 let bytes = input.as_bytes();
85
86 if i < bytes.len() && bytes[i] == b'-' {
87 i += 1;
88 }
89
90 let digit_start = i;
91 while i < bytes.len() && bytes[i].is_ascii_digit() {
92 i += 1;
93 }
94 if i == digit_start {
95 return Err("expected digit".to_string());
96 }
97
98 if i < bytes.len() && bytes[i] == b'.' {
99 i += 1;
100 let frac_start = i;
101 while i < bytes.len() && bytes[i].is_ascii_digit() {
102 i += 1;
103 }
104 if i == frac_start {
105 return Err("expected digit after decimal point".to_string());
106 }
107 }
108
109 let num_str = &input[pos..i];
110 let n: f64 = num_str.parse().map_err(|e| format!("{}", e))?;
111 Ok((i, n))
112}
113
114fn lex_string_token(input: &str, pos: usize) -> Result<(usize, String), String> {
115 let mut i = pos + 1; let mut result = String::new();
118 let bytes = input.as_bytes();
119
120 loop {
121 if i >= bytes.len() {
122 return Err("unterminated string".to_string());
123 }
124 match bytes[i] {
125 b'"' => {
126 return Ok((i + 1, result));
127 }
128 b'\\' => {
129 i += 1;
130 if i >= bytes.len() {
131 return Err("unterminated string".to_string());
132 }
133 match bytes[i] {
134 b'n' => result.push('\n'),
135 b't' => result.push('\t'),
136 b'\\' => result.push('\\'),
137 b'"' => result.push('"'),
138 c => {
139 result.push('\\');
140 result.push(c as char);
141 }
142 }
143 i += 1;
144 }
145 _ => {
146 let ch = input[i..].chars().next().unwrap();
148 result.push(ch);
149 i += ch.len_utf8();
150 }
151 }
152 }
153}
154
155pub fn lex(input: &str) -> Result<Vec<SpannedToken>, LexError> {
156 let mut tokens = Vec::new();
157 let mut pos = 0;
158
159 loop {
160 pos = skip_whitespace(input, pos);
161 if pos >= input.len() {
162 break;
163 }
164
165 let span = compute_span(input, pos);
166 let byte = input.as_bytes()[pos];
167
168 match byte {
169 b'{' => {
170 tokens.push(SpannedToken {
171 token: Token::LBrace,
172 span,
173 });
174 pos += 1;
175 }
176 b'}' => {
177 tokens.push(SpannedToken {
178 token: Token::RBrace,
179 span,
180 });
181 pos += 1;
182 }
183 b'=' => {
184 tokens.push(SpannedToken {
185 token: Token::Equals,
186 span,
187 });
188 pos += 1;
189 }
190 b':' => {
191 tokens.push(SpannedToken {
192 token: Token::Colon,
193 span,
194 });
195 pos += 1;
196 }
197 b'"' => match lex_string_token(input, pos) {
198 Ok((new_pos, s)) => {
199 tokens.push(SpannedToken {
200 token: Token::StringLit(s),
201 span,
202 });
203 pos = new_pos;
204 }
205 Err(msg) => {
206 return Err(LexError { message: msg, span });
207 }
208 },
209 b'0'..=b'9' => match lex_number_token(input, pos) {
210 Ok((new_pos, n)) => {
211 tokens.push(SpannedToken {
212 token: Token::NumberLit(n),
213 span,
214 });
215 pos = new_pos;
216 }
217 Err(msg) => {
218 return Err(LexError { message: msg, span });
219 }
220 },
221 c if c.is_ascii_alphabetic() || c == b'_' => {
222 let (new_pos, word) = lex_word(input, pos);
223 let token = match word {
224 "Feature" => Token::Feature,
225 "Struct" => Token::Struct,
226 "Boolean" => Token::BooleanType,
227 "Integer" => Token::IntegerType,
228 "Float" => Token::FloatType,
229 "String" => Token::StringType,
230 "true" => Token::BoolLit(true),
231 "false" => Token::BoolLit(false),
232 _ => Token::Ident(word.to_string()),
233 };
234 tokens.push(SpannedToken { token, span });
235 pos = new_pos;
236 }
237 _ => {
238 return Err(LexError {
239 message: format!("unexpected character: {:?}", byte as char),
240 span,
241 });
242 }
243 }
244 }
245
246 Ok(tokens)
247}
248
249#[cfg(test)]
250mod tests {
251 use super::*;
252
253 #[test]
254 fn lex_feature_keyword() {
255 let tokens = lex("Feature").unwrap();
256 assert_eq!(tokens.len(), 1);
257 assert_eq!(tokens[0].token, Token::Feature);
258 assert_eq!(
259 tokens[0].span,
260 Span {
261 offset: 0,
262 line: 1,
263 column: 1
264 }
265 );
266 }
267
268 #[test]
269 fn lex_type_keywords() {
270 let tokens = lex("Boolean Integer Float String").unwrap();
271 assert_eq!(tokens.len(), 4);
272 assert_eq!(tokens[0].token, Token::BooleanType);
273 assert_eq!(tokens[1].token, Token::IntegerType);
274 assert_eq!(tokens[2].token, Token::FloatType);
275 assert_eq!(tokens[3].token, Token::StringType);
276 }
277
278 #[test]
279 fn lex_bool_literals() {
280 let tokens = lex("true false").unwrap();
281 assert_eq!(tokens.len(), 2);
282 assert_eq!(tokens[0].token, Token::BoolLit(true));
283 assert_eq!(tokens[1].token, Token::BoolLit(false));
284 }
285
286 #[test]
287 fn lex_number_literals() {
288 let tokens = lex("42 3.14").unwrap();
289 assert_eq!(tokens.len(), 2);
290 assert_eq!(tokens[0].token, Token::NumberLit(42.0));
291 assert_eq!(tokens[1].token, Token::NumberLit(3.14));
292 }
293
294 #[test]
295 fn lex_string_literal() {
296 let tokens = lex(r#""hello""#).unwrap();
297 assert_eq!(tokens.len(), 1);
298 assert_eq!(tokens[0].token, Token::StringLit("hello".to_string()));
299 }
300
301 #[test]
302 fn lex_string_with_escapes() {
303 let tokens = lex(r#""hello\nworld""#).unwrap();
304 assert_eq!(tokens.len(), 1);
305 assert_eq!(
306 tokens[0].token,
307 Token::StringLit("hello\nworld".to_string())
308 );
309 }
310
311 #[test]
312 fn lex_complete_feature_block() {
313 let input = r#"1: Feature Checkout = {
314 1: enabled Boolean = true
315 2: max_items Integer = 50
316 3: header_text String = "Complete your purchase"
317}"#;
318 let tokens = lex(input).unwrap();
319 assert_eq!(tokens.len(), 25);
323 assert_eq!(tokens[0].token, Token::NumberLit(1.0));
324 assert_eq!(tokens[1].token, Token::Colon);
325 assert_eq!(tokens[2].token, Token::Feature);
326 assert_eq!(tokens[3].token, Token::Ident("Checkout".to_string()));
327 assert_eq!(tokens[4].token, Token::Equals);
328 assert_eq!(tokens[5].token, Token::LBrace);
329 assert_eq!(tokens[6].token, Token::NumberLit(1.0));
330 assert_eq!(tokens[7].token, Token::Colon);
331 assert_eq!(tokens[8].token, Token::Ident("enabled".to_string()));
332 assert_eq!(tokens[9].token, Token::BooleanType);
333 assert_eq!(tokens[10].token, Token::Equals);
334 assert_eq!(tokens[11].token, Token::BoolLit(true));
335 assert_eq!(tokens[24].token, Token::RBrace);
336 }
337
338 #[test]
339 fn lex_error_unterminated_string() {
340 let result = lex(r#""hello"#);
341 assert!(result.is_err());
342 let err = result.unwrap_err();
343 assert_eq!(err.message, "unterminated string");
344 }
345
346 #[test]
347 fn lex_error_invalid_character() {
348 let result = lex("@");
349 assert!(result.is_err());
350 let err = result.unwrap_err();
351 assert!(err.message.contains("unexpected character"));
352 }
353
354 #[test]
355 fn lex_struct_keyword() {
356 let tokens = lex("Struct").unwrap();
357 assert_eq!(tokens.len(), 1);
358 assert_eq!(tokens[0].token, Token::Struct);
359 }
360
361 #[test]
362 fn lex_struct_block() {
363 let input = r#"1: Struct Theme = {
364 1: dark_mode Boolean = false
365}"#;
366 let tokens = lex(input).unwrap();
367 assert_eq!(tokens[0].token, Token::NumberLit(1.0));
368 assert_eq!(tokens[1].token, Token::Colon);
369 assert_eq!(tokens[2].token, Token::Struct);
370 assert_eq!(tokens[3].token, Token::Ident("Theme".to_string()));
371 assert_eq!(tokens[4].token, Token::Equals);
372 assert_eq!(tokens[5].token, Token::LBrace);
373 assert_eq!(tokens[6].token, Token::NumberLit(1.0));
374 assert_eq!(tokens[7].token, Token::Colon);
375 }
376
377 #[test]
378 fn lex_span_info_multiline() {
379 let input = "Feature\n Checkout";
380 let tokens = lex(input).unwrap();
381 assert_eq!(
382 tokens[0].span,
383 Span {
384 offset: 0,
385 line: 1,
386 column: 1
387 }
388 );
389 assert_eq!(
390 tokens[1].span,
391 Span {
392 offset: 10,
393 line: 2,
394 column: 3
395 }
396 );
397 }
398}