1#[derive(Debug, Clone, PartialEq)]
2pub struct Span {
3 pub offset: usize,
4 pub line: usize,
5 pub column: usize,
6}
7
8#[derive(Debug, Clone, PartialEq)]
9pub struct SpannedToken {
10 pub token: Token,
11 pub span: Span,
12}
13
14#[derive(Debug, Clone, PartialEq)]
15pub enum Token {
16 Feature,
18 Variable,
19
20 BooleanType,
22 NumberType,
23 StringType,
24
25 BoolLit(bool),
27 NumberLit(f64),
28 StringLit(String),
29
30 LBrace,
32 RBrace,
33 Equals,
34 Colon,
35
36 Ident(String),
38}
39
40#[derive(Debug, Clone, PartialEq)]
41pub struct LexError {
42 pub message: String,
43 pub span: Span,
44}
45
46fn compute_span(full_input: &str, offset: usize) -> Span {
47 let consumed = &full_input[..offset];
48 let line = consumed.chars().filter(|&c| c == '\n').count() + 1;
49 let column = match consumed.rfind('\n') {
50 Some(pos) => offset - pos,
51 None => offset + 1,
52 };
53 Span {
54 offset,
55 line,
56 column,
57 }
58}
59
60fn skip_whitespace(input: &str, pos: usize) -> usize {
61 let mut i = pos;
62 let bytes = input.as_bytes();
63 while i < bytes.len()
64 && (bytes[i] == b' ' || bytes[i] == b'\t' || bytes[i] == b'\n' || bytes[i] == b'\r')
65 {
66 i += 1;
67 }
68 i
69}
70
71fn lex_word(input: &str, pos: usize) -> (usize, &str) {
72 let start = pos;
73 let mut i = pos;
74 let bytes = input.as_bytes();
75 while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
76 i += 1;
77 }
78 (i, &input[start..i])
79}
80
81fn lex_number_token(input: &str, pos: usize) -> Result<(usize, f64), String> {
82 let mut i = pos;
83 let bytes = input.as_bytes();
84
85 if i < bytes.len() && bytes[i] == b'-' {
86 i += 1;
87 }
88
89 let digit_start = i;
90 while i < bytes.len() && bytes[i].is_ascii_digit() {
91 i += 1;
92 }
93 if i == digit_start {
94 return Err("expected digit".to_string());
95 }
96
97 if i < bytes.len() && bytes[i] == b'.' {
98 i += 1;
99 let frac_start = i;
100 while i < bytes.len() && bytes[i].is_ascii_digit() {
101 i += 1;
102 }
103 if i == frac_start {
104 return Err("expected digit after decimal point".to_string());
105 }
106 }
107
108 let num_str = &input[pos..i];
109 let n: f64 = num_str.parse().map_err(|e| format!("{}", e))?;
110 Ok((i, n))
111}
112
113fn lex_string_token(input: &str, pos: usize) -> Result<(usize, String), String> {
114 let mut i = pos + 1; let mut result = String::new();
117 let bytes = input.as_bytes();
118
119 loop {
120 if i >= bytes.len() {
121 return Err("unterminated string".to_string());
122 }
123 match bytes[i] {
124 b'"' => {
125 return Ok((i + 1, result));
126 }
127 b'\\' => {
128 i += 1;
129 if i >= bytes.len() {
130 return Err("unterminated string".to_string());
131 }
132 match bytes[i] {
133 b'n' => result.push('\n'),
134 b't' => result.push('\t'),
135 b'\\' => result.push('\\'),
136 b'"' => result.push('"'),
137 c => {
138 result.push('\\');
139 result.push(c as char);
140 }
141 }
142 i += 1;
143 }
144 _ => {
145 let ch = input[i..].chars().next().unwrap();
147 result.push(ch);
148 i += ch.len_utf8();
149 }
150 }
151 }
152}
153
154pub fn lex(input: &str) -> Result<Vec<SpannedToken>, LexError> {
155 let mut tokens = Vec::new();
156 let mut pos = 0;
157
158 loop {
159 pos = skip_whitespace(input, pos);
160 if pos >= input.len() {
161 break;
162 }
163
164 let span = compute_span(input, pos);
165 let byte = input.as_bytes()[pos];
166
167 match byte {
168 b'{' => {
169 tokens.push(SpannedToken {
170 token: Token::LBrace,
171 span,
172 });
173 pos += 1;
174 }
175 b'}' => {
176 tokens.push(SpannedToken {
177 token: Token::RBrace,
178 span,
179 });
180 pos += 1;
181 }
182 b'=' => {
183 tokens.push(SpannedToken {
184 token: Token::Equals,
185 span,
186 });
187 pos += 1;
188 }
189 b':' => {
190 tokens.push(SpannedToken {
191 token: Token::Colon,
192 span,
193 });
194 pos += 1;
195 }
196 b'"' => match lex_string_token(input, pos) {
197 Ok((new_pos, s)) => {
198 tokens.push(SpannedToken {
199 token: Token::StringLit(s),
200 span,
201 });
202 pos = new_pos;
203 }
204 Err(msg) => {
205 return Err(LexError { message: msg, span });
206 }
207 },
208 b'0'..=b'9' => match lex_number_token(input, pos) {
209 Ok((new_pos, n)) => {
210 tokens.push(SpannedToken {
211 token: Token::NumberLit(n),
212 span,
213 });
214 pos = new_pos;
215 }
216 Err(msg) => {
217 return Err(LexError { message: msg, span });
218 }
219 },
220 c if c.is_ascii_alphabetic() || c == b'_' => {
221 let (new_pos, word) = lex_word(input, pos);
222 let token = match word {
223 "Feature" => Token::Feature,
224 "Variable" => Token::Variable,
225 "Boolean" => Token::BooleanType,
226 "Number" => Token::NumberType,
227 "String" => Token::StringType,
228 "true" => Token::BoolLit(true),
229 "false" => Token::BoolLit(false),
230 _ => Token::Ident(word.to_string()),
231 };
232 tokens.push(SpannedToken { token, span });
233 pos = new_pos;
234 }
235 _ => {
236 return Err(LexError {
237 message: format!("unexpected character: {:?}", byte as char),
238 span,
239 });
240 }
241 }
242 }
243
244 Ok(tokens)
245}
246
247#[cfg(test)]
248mod tests {
249 use super::*;
250
251 #[test]
252 fn lex_feature_keyword() {
253 let tokens = lex("Feature").unwrap();
254 assert_eq!(tokens.len(), 1);
255 assert_eq!(tokens[0].token, Token::Feature);
256 assert_eq!(
257 tokens[0].span,
258 Span {
259 offset: 0,
260 line: 1,
261 column: 1
262 }
263 );
264 }
265
266 #[test]
267 fn lex_variable_keyword() {
268 let tokens = lex("Variable").unwrap();
269 assert_eq!(tokens.len(), 1);
270 assert_eq!(tokens[0].token, Token::Variable);
271 }
272
273 #[test]
274 fn lex_type_keywords() {
275 let tokens = lex("Boolean Number String").unwrap();
276 assert_eq!(tokens.len(), 3);
277 assert_eq!(tokens[0].token, Token::BooleanType);
278 assert_eq!(tokens[1].token, Token::NumberType);
279 assert_eq!(tokens[2].token, Token::StringType);
280 }
281
282 #[test]
283 fn lex_bool_literals() {
284 let tokens = lex("true false").unwrap();
285 assert_eq!(tokens.len(), 2);
286 assert_eq!(tokens[0].token, Token::BoolLit(true));
287 assert_eq!(tokens[1].token, Token::BoolLit(false));
288 }
289
290 #[test]
291 fn lex_number_literals() {
292 let tokens = lex("42 3.14").unwrap();
293 assert_eq!(tokens.len(), 2);
294 assert_eq!(tokens[0].token, Token::NumberLit(42.0));
295 assert_eq!(tokens[1].token, Token::NumberLit(3.14));
296 }
297
298 #[test]
299 fn lex_string_literal() {
300 let tokens = lex(r#""hello""#).unwrap();
301 assert_eq!(tokens.len(), 1);
302 assert_eq!(tokens[0].token, Token::StringLit("hello".to_string()));
303 }
304
305 #[test]
306 fn lex_string_with_escapes() {
307 let tokens = lex(r#""hello\nworld""#).unwrap();
308 assert_eq!(tokens.len(), 1);
309 assert_eq!(
310 tokens[0].token,
311 Token::StringLit("hello\nworld".to_string())
312 );
313 }
314
315 #[test]
316 fn lex_complete_feature_block() {
317 let input = r#"1: Feature Checkout = {
318 1: Variable enabled Boolean = true
319 2: Variable max_items Number = 50
320 3: Variable header_text String = "Complete your purchase"
321}"#;
322 let tokens = lex(input).unwrap();
323 assert_eq!(tokens.len(), 28);
327 assert_eq!(tokens[0].token, Token::NumberLit(1.0));
328 assert_eq!(tokens[1].token, Token::Colon);
329 assert_eq!(tokens[2].token, Token::Feature);
330 assert_eq!(tokens[3].token, Token::Ident("Checkout".to_string()));
331 assert_eq!(tokens[4].token, Token::Equals);
332 assert_eq!(tokens[5].token, Token::LBrace);
333 assert_eq!(tokens[6].token, Token::NumberLit(1.0));
334 assert_eq!(tokens[7].token, Token::Colon);
335 assert_eq!(tokens[8].token, Token::Variable);
336 assert_eq!(tokens[9].token, Token::Ident("enabled".to_string()));
337 assert_eq!(tokens[10].token, Token::BooleanType);
338 assert_eq!(tokens[11].token, Token::Equals);
339 assert_eq!(tokens[12].token, Token::BoolLit(true));
340 assert_eq!(tokens[27].token, Token::RBrace);
341 }
342
343 #[test]
344 fn lex_error_unterminated_string() {
345 let result = lex(r#""hello"#);
346 assert!(result.is_err());
347 let err = result.unwrap_err();
348 assert_eq!(err.message, "unterminated string");
349 }
350
351 #[test]
352 fn lex_error_invalid_character() {
353 let result = lex("@");
354 assert!(result.is_err());
355 let err = result.unwrap_err();
356 assert!(err.message.contains("unexpected character"));
357 }
358
359 #[test]
360 fn lex_span_info_multiline() {
361 let input = "Feature\n Checkout";
362 let tokens = lex(input).unwrap();
363 assert_eq!(
364 tokens[0].span,
365 Span {
366 offset: 0,
367 line: 1,
368 column: 1
369 }
370 );
371 assert_eq!(
372 tokens[1].span,
373 Span {
374 offset: 10,
375 line: 2,
376 column: 3
377 }
378 );
379 }
380}