Skip to main content

irkki_core/
lexer.rs

1#[derive(Debug, PartialEq, Eq)]
2pub enum TokenType {
3    Illegal,
4    EOF,
5    CrLf,
6    Colon,
7    Space,
8    Word,
9}
10
11pub struct Token {
12    pub token_type: TokenType,
13    pub literal: String,
14}
15
16pub struct Lexer<'a> {
17    input: &'a str,
18    current_char: Option<char>,
19    current_position: usize,
20    read_position: usize,
21}
22
23impl<'a> Lexer<'a> {
24    pub fn new(input: &'a str) -> Self {
25        let mut lexer = Lexer {
26            input,
27            current_char: None,
28            current_position: 0,
29            read_position: 0,
30        };
31
32        lexer.read_char();
33        lexer
34    }
35
36    fn read_char(&mut self) {
37        self.current_char = if self.read_position >= self.input.len() {
38            None
39        } else {
40            Some(self.input.chars().nth(self.read_position).unwrap())
41        };
42        self.current_position = self.read_position;
43        self.read_position += 1;
44    }
45
46    fn read_string(&mut self) -> String {
47        let start = self.current_position;
48        let mut next_char = self.peek_char();
49
50        while next_char != '\r' && next_char != ' ' && next_char != '\0' {
51            self.read_char();
52            next_char = self.peek_char();
53        }
54
55        self.input[start..=self.current_position].to_string()
56    }
57
58    fn peek_char(&self) -> char {
59        if self.read_position >= self.input.len() {
60            '\0'
61        } else {
62            self.input.chars().nth(self.read_position).unwrap()
63        }
64    }
65
66    pub fn next_token(&mut self) -> Token {
67        let token: Token;
68
69        match self.current_char {
70            Some(c) => match c {
71                ':' => {
72                    if self.current_position == 0 && self.read_position == 1 {
73                        // Special case for leading colon in prefix
74                        token = Token {
75                            token_type: TokenType::Colon,
76                            literal: ":".to_string(),
77                        };
78                    } else {
79                        token = Token {
80                            token_type: TokenType::Word,
81                            literal: self.read_string(),
82                        };
83                    }
84                }
85                ' ' => {
86                    token = Token {
87                        token_type: TokenType::Space,
88                        literal: c.to_string(),
89                    };
90                }
91                '\r' => {
92                    if self.peek_char() == '\n' {
93                        self.read_char();
94                        token = Token {
95                            token_type: TokenType::CrLf,
96                            literal: "\r\n".to_string(),
97                        };
98                    } else {
99                        token = Token {
100                            token_type: TokenType::Illegal,
101                            literal: c.to_string(),
102                        };
103                    }
104                }
105                _ => {
106                    token = Token {
107                        token_type: TokenType::Word,
108                        literal: self.read_string(),
109                    };
110                }
111            },
112            None => {
113                token = Token {
114                    token_type: TokenType::EOF,
115                    literal: "".to_string(),
116                };
117            }
118        }
119
120        self.read_char();
121
122        token
123    }
124}
125
126#[cfg(test)]
127mod tests {
128    use super::*;
129
130    #[test]
131    fn test_next_token() {
132        let input = ":prefix COMMAND arg1 arg2\r\n";
133        let mut lexer = Lexer::new(input);
134
135        let expected_tokens = vec![
136            (TokenType::Colon, ":"),
137            (TokenType::Word, "prefix"),
138            (TokenType::Space, " "),
139            (TokenType::Word, "COMMAND"),
140            (TokenType::Space, " "),
141            (TokenType::Word, "arg1"),
142            (TokenType::Space, " "),
143            (TokenType::Word, "arg2"),
144            (TokenType::CrLf, "\r\n"),
145            (TokenType::EOF, ""),
146        ];
147
148        for (expected_type, expected_literal) in expected_tokens {
149            let token = lexer.next_token();
150            assert_eq!(token.token_type, expected_type);
151            assert_eq!(token.literal, expected_literal);
152        }
153    }
154
155    #[test]
156    fn test_illegal_token() {
157        let input = "COMMAND arg1\r";
158        let mut lexer = Lexer::new(input);
159
160        let expected_tokens = vec![
161            (TokenType::Word, "COMMAND"),
162            (TokenType::Space, " "),
163            (TokenType::Word, "arg1"),
164            (TokenType::Illegal, "\r"),
165            (TokenType::EOF, ""),
166        ];
167
168        for (expected_type, expected_literal) in expected_tokens {
169            let token = lexer.next_token();
170            assert_eq!(token.token_type, expected_type);
171            assert_eq!(token.literal, expected_literal);
172        }
173    }
174}