bob_the/
lexer.rs

1#[derive(Debug, Clone, PartialEq)]
2pub enum Token {
3    Read,
4    Write,
5    Print,
6    Append,
7    Identifier(String),
8    StringLiteral(String),
9    IntLiteral(i64),
10    FloatLiteral(f64),
11    Arrow,
12    Eol,
13}
14
15pub fn tokenize(source: &str) -> Result<Vec<Token>, String> {
16    let mut tokens = Vec::new();
17    let mut chars = source.chars().peekable();
18
19    while let Some(ch) = chars.next() {
20        let token = match ch {
21            ' ' | '\t' | '\r' => continue,
22            '\n' => {
23                tokens.push(Token::Eol);
24                continue;
25            }
26            '#' => {
27                while let Some(c) = chars.peek() {
28                    if *c == '\n' {
29                        break;
30                    }
31                    chars.next();
32                }
33                continue;
34            }
35            '-' => {
36                let next_char = chars.next();
37                if next_char == Some('>') {
38                    Token::Arrow
39                } else {
40                    return Err(format!("Unexpected character: '{:?}'", next_char));
41                }
42            }
43            '"' => {
44                let mut s = String::new();
45                while let Some(c) = chars.next() {
46                    match c {
47                        '\\' => {
48                            if let Some(next_char) = chars.next() {
49                                match next_char {
50                                    'n' => s.push('\n'),
51                                    't' => s.push('\t'),
52                                    'r' => s.push('\r'),
53                                    '"' => s.push('"'),
54                                    '\\' => s.push('\\'),
55                                    _ => {
56                                        return Err(format!(
57                                            "Invalid escape sequence: \\{}",
58                                            next_char
59                                        ))
60                                    }
61                                }
62                            } else {
63                                return Err("Unexpected end of input.".to_string());
64                            }
65                        }
66                        '"' => break,
67                        _ => s.push(c),
68                    }
69                }
70                tokens.push(Token::StringLiteral(s));
71                continue;
72            }
73            '.' => {
74                if let Some(&ch) = chars.peek() {
75                    if ch.is_numeric() {
76                        let mut number = String::new();
77                        number.push(ch);
78                        chars.next();
79
80                        while let Some(ch) = chars.peek() {
81                            if ch.is_numeric() {
82                                number.push(*ch);
83                                chars.next();
84                            } else {
85                                break;
86                            }
87                        }
88
89                        match number.parse::<f64>() {
90                            Ok(n) => Token::FloatLiteral(n),
91                            Err(_) => return Err(format!("Invalid number: {}", number)),
92                        }
93                    } else {
94                        Token::Identifier(".".to_string())
95                    }
96                } else {
97                    Token::Identifier(".".to_string())
98                }
99            }
100            '0'..='9' => {
101                let mut number = String::new();
102                number.push(ch);
103
104                while let Some(c) = chars.peek() {
105                    if c.is_numeric() {
106                        number.push(*c);
107                        chars.next();
108                    } else {
109                        break;
110                    }
111                }
112
113                let mut is_float = false;
114                if let Some('.') = chars.peek() {
115                    is_float = true;
116                    number.push('.');
117                    chars.next();
118
119                    while let Some(c) = chars.peek() {
120                        if c.is_numeric() {
121                            number.push(*c);
122                            chars.next();
123                        } else {
124                            break;
125                        }
126                    }
127                }
128
129                if is_float {
130                    tokens.push(Token::FloatLiteral(number.parse::<f64>().unwrap()));
131                } else {
132                    tokens.push(Token::IntLiteral(number.parse::<i64>().unwrap()));
133                }
134                continue;
135            }
136            'a'..='z' | 'A'..='Z' | '_' => {
137                let mut identifier = String::new();
138                identifier.push(ch);
139
140                while let Some(c) = chars.peek() {
141                    if c.is_alphanumeric() || c == &'_' || c == &'.' {
142                        identifier.push(*c);
143                        chars.next();
144                    } else {
145                        break;
146                    }
147                }
148                match identifier.as_str() {
149                    "READ" => Token::Read,
150                    "WRITE" => Token::Write,
151                    "PRINT" => Token::Print,
152                    "APPEND" => Token::Append,
153                    _ => Token::Identifier(identifier),
154                }
155            }
156            _ => return Err(format!("Unexpected character: '{}'.", ch)),
157        };
158        tokens.push(token);
159    }
160    Ok(tokens)
161}
162
163#[cfg(test)]
164mod tests {
165    use super::*;
166
167    #[test]
168    fn test_lexer_basic() {
169        let source = r#"READ input.txt -> content
170WRITE output.txt "Hello, World!"
171PRINT "Hello, World!"
172APPEND var1 var2 -> result
173"#;
174
175        let tokens = tokenize(source).unwrap();
176        let expected = vec![
177            Token::Read,
178            Token::Identifier("input.txt".to_string()),
179            Token::Arrow,
180            Token::Identifier("content".to_string()),
181            Token::Eol,
182            Token::Write,
183            Token::Identifier("output.txt".to_string()),
184            Token::StringLiteral("Hello, World!".to_string()),
185            Token::Eol,
186            Token::Print,
187            Token::StringLiteral("Hello, World!".to_string()),
188            Token::Eol,
189            Token::Append,
190            Token::Identifier("var1".to_string()),
191            Token::Identifier("var2".to_string()),
192            Token::Arrow,
193            Token::Identifier("result".to_string()),
194            Token::Eol,
195        ];
196        assert_eq!(tokens, expected);
197    }
198}