Skip to main content

variable_core/
lexer.rs

1#[derive(Debug, Clone, PartialEq)]
2pub struct Span {
3    pub offset: usize,
4    pub line: usize,
5    pub column: usize,
6}
7
8#[derive(Debug, Clone, PartialEq)]
9pub struct SpannedToken {
10    pub token: Token,
11    pub span: Span,
12}
13
14#[derive(Debug, Clone, PartialEq)]
15pub enum Token {
16    // Keywords
17    Feature,
18    Variable,
19
20    // Type keywords
21    BooleanType,
22    NumberType,
23    StringType,
24
25    // Literals
26    BoolLit(bool),
27    NumberLit(f64),
28    StringLit(String),
29
30    // Symbols
31    LBrace,
32    RBrace,
33    Equals,
34
35    // Identifiers
36    Ident(String),
37}
38
39#[derive(Debug, Clone, PartialEq)]
40pub struct LexError {
41    pub message: String,
42    pub span: Span,
43}
44
45fn compute_span(full_input: &str, offset: usize) -> Span {
46    let consumed = &full_input[..offset];
47    let line = consumed.chars().filter(|&c| c == '\n').count() + 1;
48    let column = match consumed.rfind('\n') {
49        Some(pos) => offset - pos,
50        None => offset + 1,
51    };
52    Span {
53        offset,
54        line,
55        column,
56    }
57}
58
59fn skip_whitespace(input: &str, pos: usize) -> usize {
60    let mut i = pos;
61    let bytes = input.as_bytes();
62    while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t' || bytes[i] == b'\n' || bytes[i] == b'\r') {
63        i += 1;
64    }
65    i
66}
67
68fn lex_word(input: &str, pos: usize) -> (usize, &str) {
69    let start = pos;
70    let mut i = pos;
71    let bytes = input.as_bytes();
72    while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
73        i += 1;
74    }
75    (i, &input[start..i])
76}
77
78fn lex_number_token(input: &str, pos: usize) -> Result<(usize, f64), String> {
79    let mut i = pos;
80    let bytes = input.as_bytes();
81
82    if i < bytes.len() && bytes[i] == b'-' {
83        i += 1;
84    }
85
86    let digit_start = i;
87    while i < bytes.len() && bytes[i].is_ascii_digit() {
88        i += 1;
89    }
90    if i == digit_start {
91        return Err("expected digit".to_string());
92    }
93
94    if i < bytes.len() && bytes[i] == b'.' {
95        i += 1;
96        let frac_start = i;
97        while i < bytes.len() && bytes[i].is_ascii_digit() {
98            i += 1;
99        }
100        if i == frac_start {
101            return Err("expected digit after decimal point".to_string());
102        }
103    }
104
105    let num_str = &input[pos..i];
106    let n: f64 = num_str.parse().map_err(|e| format!("{}", e))?;
107    Ok((i, n))
108}
109
110fn lex_string_token(input: &str, pos: usize) -> Result<(usize, String), String> {
111    // pos should be at the opening quote
112    let mut i = pos + 1; // skip opening "
113    let mut result = String::new();
114    let bytes = input.as_bytes();
115
116    loop {
117        if i >= bytes.len() {
118            return Err("unterminated string".to_string());
119        }
120        match bytes[i] {
121            b'"' => {
122                return Ok((i + 1, result));
123            }
124            b'\\' => {
125                i += 1;
126                if i >= bytes.len() {
127                    return Err("unterminated string".to_string());
128                }
129                match bytes[i] {
130                    b'n' => result.push('\n'),
131                    b't' => result.push('\t'),
132                    b'\\' => result.push('\\'),
133                    b'"' => result.push('"'),
134                    c => {
135                        result.push('\\');
136                        result.push(c as char);
137                    }
138                }
139                i += 1;
140            }
141            _ => {
142                // Handle UTF-8 properly
143                let ch = input[i..].chars().next().unwrap();
144                result.push(ch);
145                i += ch.len_utf8();
146            }
147        }
148    }
149}
150
151pub fn lex(input: &str) -> Result<Vec<SpannedToken>, LexError> {
152    let mut tokens = Vec::new();
153    let mut pos = 0;
154
155    loop {
156        pos = skip_whitespace(input, pos);
157        if pos >= input.len() {
158            break;
159        }
160
161        let span = compute_span(input, pos);
162        let byte = input.as_bytes()[pos];
163
164        match byte {
165            b'{' => {
166                tokens.push(SpannedToken { token: Token::LBrace, span });
167                pos += 1;
168            }
169            b'}' => {
170                tokens.push(SpannedToken { token: Token::RBrace, span });
171                pos += 1;
172            }
173            b'=' => {
174                tokens.push(SpannedToken { token: Token::Equals, span });
175                pos += 1;
176            }
177            b'"' => {
178                match lex_string_token(input, pos) {
179                    Ok((new_pos, s)) => {
180                        tokens.push(SpannedToken {
181                            token: Token::StringLit(s),
182                            span,
183                        });
184                        pos = new_pos;
185                    }
186                    Err(msg) => {
187                        return Err(LexError { message: msg, span });
188                    }
189                }
190            }
191            b'0'..=b'9' => {
192                match lex_number_token(input, pos) {
193                    Ok((new_pos, n)) => {
194                        tokens.push(SpannedToken {
195                            token: Token::NumberLit(n),
196                            span,
197                        });
198                        pos = new_pos;
199                    }
200                    Err(msg) => {
201                        return Err(LexError { message: msg, span });
202                    }
203                }
204            }
205            c if c.is_ascii_alphabetic() || c == b'_' => {
206                let (new_pos, word) = lex_word(input, pos);
207                let token = match word {
208                    "Feature" => Token::Feature,
209                    "Variable" => Token::Variable,
210                    "Boolean" => Token::BooleanType,
211                    "Number" => Token::NumberType,
212                    "String" => Token::StringType,
213                    "true" => Token::BoolLit(true),
214                    "false" => Token::BoolLit(false),
215                    _ => Token::Ident(word.to_string()),
216                };
217                tokens.push(SpannedToken { token, span });
218                pos = new_pos;
219            }
220            _ => {
221                return Err(LexError {
222                    message: format!("unexpected character: {:?}", byte as char),
223                    span,
224                });
225            }
226        }
227    }
228
229    Ok(tokens)
230}
231
232#[cfg(test)]
233mod tests {
234    use super::*;
235
236    #[test]
237    fn lex_feature_keyword() {
238        let tokens = lex("Feature").unwrap();
239        assert_eq!(tokens.len(), 1);
240        assert_eq!(tokens[0].token, Token::Feature);
241        assert_eq!(tokens[0].span, Span { offset: 0, line: 1, column: 1 });
242    }
243
244    #[test]
245    fn lex_variable_keyword() {
246        let tokens = lex("Variable").unwrap();
247        assert_eq!(tokens.len(), 1);
248        assert_eq!(tokens[0].token, Token::Variable);
249    }
250
251    #[test]
252    fn lex_type_keywords() {
253        let tokens = lex("Boolean Number String").unwrap();
254        assert_eq!(tokens.len(), 3);
255        assert_eq!(tokens[0].token, Token::BooleanType);
256        assert_eq!(tokens[1].token, Token::NumberType);
257        assert_eq!(tokens[2].token, Token::StringType);
258    }
259
260    #[test]
261    fn lex_bool_literals() {
262        let tokens = lex("true false").unwrap();
263        assert_eq!(tokens.len(), 2);
264        assert_eq!(tokens[0].token, Token::BoolLit(true));
265        assert_eq!(tokens[1].token, Token::BoolLit(false));
266    }
267
268    #[test]
269    fn lex_number_literals() {
270        let tokens = lex("42 3.14").unwrap();
271        assert_eq!(tokens.len(), 2);
272        assert_eq!(tokens[0].token, Token::NumberLit(42.0));
273        assert_eq!(tokens[1].token, Token::NumberLit(3.14));
274    }
275
276    #[test]
277    fn lex_string_literal() {
278        let tokens = lex(r#""hello""#).unwrap();
279        assert_eq!(tokens.len(), 1);
280        assert_eq!(tokens[0].token, Token::StringLit("hello".to_string()));
281    }
282
283    #[test]
284    fn lex_string_with_escapes() {
285        let tokens = lex(r#""hello\nworld""#).unwrap();
286        assert_eq!(tokens.len(), 1);
287        assert_eq!(tokens[0].token, Token::StringLit("hello\nworld".to_string()));
288    }
289
290    #[test]
291    fn lex_complete_feature_block() {
292        let input = r#"Feature Checkout {
293    Variable enabled Boolean = true
294    Variable max_items Number = 50
295    Variable header_text String = "Complete your purchase"
296}"#;
297        let tokens = lex(input).unwrap();
298        // Feature Checkout { (3)
299        // + 3 * (Variable name Type = value) = 3 * 5 = 15
300        // + } (1) = 19
301        assert_eq!(tokens.len(), 19);
302        assert_eq!(tokens[0].token, Token::Feature);
303        assert_eq!(tokens[1].token, Token::Ident("Checkout".to_string()));
304        assert_eq!(tokens[2].token, Token::LBrace);
305        assert_eq!(tokens[3].token, Token::Variable);
306        assert_eq!(tokens[4].token, Token::Ident("enabled".to_string()));
307        assert_eq!(tokens[5].token, Token::BooleanType);
308        assert_eq!(tokens[6].token, Token::Equals);
309        assert_eq!(tokens[7].token, Token::BoolLit(true));
310        assert_eq!(tokens[18].token, Token::RBrace);
311    }
312
313    #[test]
314    fn lex_error_unterminated_string() {
315        let result = lex(r#""hello"#);
316        assert!(result.is_err());
317        let err = result.unwrap_err();
318        assert_eq!(err.message, "unterminated string");
319    }
320
321    #[test]
322    fn lex_error_invalid_character() {
323        let result = lex("@");
324        assert!(result.is_err());
325        let err = result.unwrap_err();
326        assert!(err.message.contains("unexpected character"));
327    }
328
329    #[test]
330    fn lex_span_info_multiline() {
331        let input = "Feature\n  Checkout";
332        let tokens = lex(input).unwrap();
333        assert_eq!(tokens[0].span, Span { offset: 0, line: 1, column: 1 });
334        assert_eq!(tokens[1].span, Span { offset: 10, line: 2, column: 3 });
335    }
336}