1#[derive(Debug, Clone, PartialEq)]
2pub struct Span {
3 pub offset: usize,
4 pub line: usize,
5 pub column: usize,
6}
7
8#[derive(Debug, Clone, PartialEq)]
9pub struct SpannedToken {
10 pub token: Token,
11 pub span: Span,
12}
13
14#[derive(Debug, Clone, PartialEq)]
15pub enum Token {
16 Feature,
18 Variable,
19
20 BooleanType,
22 NumberType,
23 StringType,
24
25 BoolLit(bool),
27 NumberLit(f64),
28 StringLit(String),
29
30 LBrace,
32 RBrace,
33 Equals,
34
35 Ident(String),
37}
38
39#[derive(Debug, Clone, PartialEq)]
40pub struct LexError {
41 pub message: String,
42 pub span: Span,
43}
44
45fn compute_span(full_input: &str, offset: usize) -> Span {
46 let consumed = &full_input[..offset];
47 let line = consumed.chars().filter(|&c| c == '\n').count() + 1;
48 let column = match consumed.rfind('\n') {
49 Some(pos) => offset - pos,
50 None => offset + 1,
51 };
52 Span {
53 offset,
54 line,
55 column,
56 }
57}
58
59fn skip_whitespace(input: &str, pos: usize) -> usize {
60 let mut i = pos;
61 let bytes = input.as_bytes();
62 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t' || bytes[i] == b'\n' || bytes[i] == b'\r') {
63 i += 1;
64 }
65 i
66}
67
68fn lex_word(input: &str, pos: usize) -> (usize, &str) {
69 let start = pos;
70 let mut i = pos;
71 let bytes = input.as_bytes();
72 while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
73 i += 1;
74 }
75 (i, &input[start..i])
76}
77
78fn lex_number_token(input: &str, pos: usize) -> Result<(usize, f64), String> {
79 let mut i = pos;
80 let bytes = input.as_bytes();
81
82 if i < bytes.len() && bytes[i] == b'-' {
83 i += 1;
84 }
85
86 let digit_start = i;
87 while i < bytes.len() && bytes[i].is_ascii_digit() {
88 i += 1;
89 }
90 if i == digit_start {
91 return Err("expected digit".to_string());
92 }
93
94 if i < bytes.len() && bytes[i] == b'.' {
95 i += 1;
96 let frac_start = i;
97 while i < bytes.len() && bytes[i].is_ascii_digit() {
98 i += 1;
99 }
100 if i == frac_start {
101 return Err("expected digit after decimal point".to_string());
102 }
103 }
104
105 let num_str = &input[pos..i];
106 let n: f64 = num_str.parse().map_err(|e| format!("{}", e))?;
107 Ok((i, n))
108}
109
110fn lex_string_token(input: &str, pos: usize) -> Result<(usize, String), String> {
111 let mut i = pos + 1; let mut result = String::new();
114 let bytes = input.as_bytes();
115
116 loop {
117 if i >= bytes.len() {
118 return Err("unterminated string".to_string());
119 }
120 match bytes[i] {
121 b'"' => {
122 return Ok((i + 1, result));
123 }
124 b'\\' => {
125 i += 1;
126 if i >= bytes.len() {
127 return Err("unterminated string".to_string());
128 }
129 match bytes[i] {
130 b'n' => result.push('\n'),
131 b't' => result.push('\t'),
132 b'\\' => result.push('\\'),
133 b'"' => result.push('"'),
134 c => {
135 result.push('\\');
136 result.push(c as char);
137 }
138 }
139 i += 1;
140 }
141 _ => {
142 let ch = input[i..].chars().next().unwrap();
144 result.push(ch);
145 i += ch.len_utf8();
146 }
147 }
148 }
149}
150
151pub fn lex(input: &str) -> Result<Vec<SpannedToken>, LexError> {
152 let mut tokens = Vec::new();
153 let mut pos = 0;
154
155 loop {
156 pos = skip_whitespace(input, pos);
157 if pos >= input.len() {
158 break;
159 }
160
161 let span = compute_span(input, pos);
162 let byte = input.as_bytes()[pos];
163
164 match byte {
165 b'{' => {
166 tokens.push(SpannedToken { token: Token::LBrace, span });
167 pos += 1;
168 }
169 b'}' => {
170 tokens.push(SpannedToken { token: Token::RBrace, span });
171 pos += 1;
172 }
173 b'=' => {
174 tokens.push(SpannedToken { token: Token::Equals, span });
175 pos += 1;
176 }
177 b'"' => {
178 match lex_string_token(input, pos) {
179 Ok((new_pos, s)) => {
180 tokens.push(SpannedToken {
181 token: Token::StringLit(s),
182 span,
183 });
184 pos = new_pos;
185 }
186 Err(msg) => {
187 return Err(LexError { message: msg, span });
188 }
189 }
190 }
191 b'0'..=b'9' => {
192 match lex_number_token(input, pos) {
193 Ok((new_pos, n)) => {
194 tokens.push(SpannedToken {
195 token: Token::NumberLit(n),
196 span,
197 });
198 pos = new_pos;
199 }
200 Err(msg) => {
201 return Err(LexError { message: msg, span });
202 }
203 }
204 }
205 c if c.is_ascii_alphabetic() || c == b'_' => {
206 let (new_pos, word) = lex_word(input, pos);
207 let token = match word {
208 "Feature" => Token::Feature,
209 "Variable" => Token::Variable,
210 "Boolean" => Token::BooleanType,
211 "Number" => Token::NumberType,
212 "String" => Token::StringType,
213 "true" => Token::BoolLit(true),
214 "false" => Token::BoolLit(false),
215 _ => Token::Ident(word.to_string()),
216 };
217 tokens.push(SpannedToken { token, span });
218 pos = new_pos;
219 }
220 _ => {
221 return Err(LexError {
222 message: format!("unexpected character: {:?}", byte as char),
223 span,
224 });
225 }
226 }
227 }
228
229 Ok(tokens)
230}
231
232#[cfg(test)]
233mod tests {
234 use super::*;
235
236 #[test]
237 fn lex_feature_keyword() {
238 let tokens = lex("Feature").unwrap();
239 assert_eq!(tokens.len(), 1);
240 assert_eq!(tokens[0].token, Token::Feature);
241 assert_eq!(tokens[0].span, Span { offset: 0, line: 1, column: 1 });
242 }
243
244 #[test]
245 fn lex_variable_keyword() {
246 let tokens = lex("Variable").unwrap();
247 assert_eq!(tokens.len(), 1);
248 assert_eq!(tokens[0].token, Token::Variable);
249 }
250
251 #[test]
252 fn lex_type_keywords() {
253 let tokens = lex("Boolean Number String").unwrap();
254 assert_eq!(tokens.len(), 3);
255 assert_eq!(tokens[0].token, Token::BooleanType);
256 assert_eq!(tokens[1].token, Token::NumberType);
257 assert_eq!(tokens[2].token, Token::StringType);
258 }
259
260 #[test]
261 fn lex_bool_literals() {
262 let tokens = lex("true false").unwrap();
263 assert_eq!(tokens.len(), 2);
264 assert_eq!(tokens[0].token, Token::BoolLit(true));
265 assert_eq!(tokens[1].token, Token::BoolLit(false));
266 }
267
268 #[test]
269 fn lex_number_literals() {
270 let tokens = lex("42 3.14").unwrap();
271 assert_eq!(tokens.len(), 2);
272 assert_eq!(tokens[0].token, Token::NumberLit(42.0));
273 assert_eq!(tokens[1].token, Token::NumberLit(3.14));
274 }
275
276 #[test]
277 fn lex_string_literal() {
278 let tokens = lex(r#""hello""#).unwrap();
279 assert_eq!(tokens.len(), 1);
280 assert_eq!(tokens[0].token, Token::StringLit("hello".to_string()));
281 }
282
283 #[test]
284 fn lex_string_with_escapes() {
285 let tokens = lex(r#""hello\nworld""#).unwrap();
286 assert_eq!(tokens.len(), 1);
287 assert_eq!(tokens[0].token, Token::StringLit("hello\nworld".to_string()));
288 }
289
290 #[test]
291 fn lex_complete_feature_block() {
292 let input = r#"Feature Checkout {
293 Variable enabled Boolean = true
294 Variable max_items Number = 50
295 Variable header_text String = "Complete your purchase"
296}"#;
297 let tokens = lex(input).unwrap();
298 assert_eq!(tokens.len(), 19);
302 assert_eq!(tokens[0].token, Token::Feature);
303 assert_eq!(tokens[1].token, Token::Ident("Checkout".to_string()));
304 assert_eq!(tokens[2].token, Token::LBrace);
305 assert_eq!(tokens[3].token, Token::Variable);
306 assert_eq!(tokens[4].token, Token::Ident("enabled".to_string()));
307 assert_eq!(tokens[5].token, Token::BooleanType);
308 assert_eq!(tokens[6].token, Token::Equals);
309 assert_eq!(tokens[7].token, Token::BoolLit(true));
310 assert_eq!(tokens[18].token, Token::RBrace);
311 }
312
313 #[test]
314 fn lex_error_unterminated_string() {
315 let result = lex(r#""hello"#);
316 assert!(result.is_err());
317 let err = result.unwrap_err();
318 assert_eq!(err.message, "unterminated string");
319 }
320
321 #[test]
322 fn lex_error_invalid_character() {
323 let result = lex("@");
324 assert!(result.is_err());
325 let err = result.unwrap_err();
326 assert!(err.message.contains("unexpected character"));
327 }
328
329 #[test]
330 fn lex_span_info_multiline() {
331 let input = "Feature\n Checkout";
332 let tokens = lex(input).unwrap();
333 assert_eq!(tokens[0].span, Span { offset: 0, line: 1, column: 1 });
334 assert_eq!(tokens[1].span, Span { offset: 10, line: 2, column: 3 });
335 }
336}