Skip to main content

oak_go/lexer/
mod.rs

1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::GoLanguage, lexer::token_type::GoTokenType};
5use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
6
7type State<'a, S> = LexerState<'a, S, GoLanguage>;
8
9#[derive(Clone)]
10pub struct GoLexer<'config> {
11    _config: &'config GoLanguage,
12}
13
14impl<'config> Lexer<GoLanguage> for GoLexer<'config> {
15    fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<GoLanguage>) -> LexOutput<GoLanguage> {
16        let mut state = State::new_with_cache(source, 0, cache);
17        let result = self.run(&mut state);
18        if result.is_ok() {
19            state.add_eof()
20        }
21        state.finish_with_cache(result, cache)
22    }
23}
24
25impl<'config> GoLexer<'config> {
26    pub fn new(config: &'config GoLanguage) -> Self {
27        Self { _config: config }
28    }
29
30    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
31        while state.not_at_end() {
32            let safe_point = state.get_position();
33
34            if self.skip_whitespace(state) {
35                continue;
36            }
37
38            if self.skip_comment(state) {
39                continue;
40            }
41
42            if self.lex_identifier_or_keyword(state) {
43                continue;
44            }
45
46            if self.lex_literal(state) {
47                continue;
48            }
49
50            if self.lex_operator_or_delimiter(state) {
51                continue;
52            }
53
54            // Fallback
55            let start_pos = state.get_position();
56            if let Some(ch) = state.peek() {
57                state.advance(ch.len_utf8());
58                state.add_token(GoTokenType::Error, start_pos, state.get_position());
59            }
60
61            state.advance_if_dead_lock(safe_point)
62        }
63
64        Ok(())
65    }
66
67    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
68        let start = state.get_position();
69        while let Some(ch) = state.peek() {
70            if ch.is_whitespace() {
71                state.advance(ch.len_utf8());
72            }
73            else {
74                break;
75            }
76        }
77        if state.get_position() > start {
78            state.add_token(GoTokenType::Whitespace, start, state.get_position());
79            return true;
80        }
81        false
82    }
83
84    fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
85        let start = state.get_position();
86        if state.consume_if_starts_with("//") {
87            while let Some(ch) = state.peek() {
88                if ch == '\n' {
89                    break;
90                }
91                state.advance(ch.len_utf8());
92            }
93            state.add_token(GoTokenType::Comment, start, state.get_position());
94            return true;
95        }
96        if state.consume_if_starts_with("/*") {
97            while let Some(ch) = state.peek() {
98                if state.consume_if_starts_with("*/") {
99                    break;
100                }
101                state.advance(ch.len_utf8());
102            }
103            state.add_token(GoTokenType::Comment, start, state.get_position());
104            return true;
105        }
106        false
107    }
108
109    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
110        let start = state.get_position();
111        if let Some(ch) = state.peek() {
112            if ch.is_alphabetic() || ch == '_' {
113                state.advance(ch.len_utf8());
114                while let Some(ch) = state.peek() {
115                    if ch.is_alphanumeric() || ch == '_' {
116                        state.advance(ch.len_utf8());
117                    }
118                    else {
119                        break;
120                    }
121                }
122
123                let text = state.get_text_in(oak_core::Range { start, end: state.get_position() });
124                let kind = match text.as_ref() {
125                    "package" => GoTokenType::Package,
126                    "import" => GoTokenType::Import,
127                    "func" => GoTokenType::Func,
128                    "var" => GoTokenType::Var,
129                    "const" => GoTokenType::Const,
130                    "type" => GoTokenType::Type,
131                    "struct" => GoTokenType::Struct,
132                    "interface" => GoTokenType::Interface,
133                    "map" => GoTokenType::Map,
134                    "chan" => GoTokenType::Chan,
135                    "if" => GoTokenType::If,
136                    "else" => GoTokenType::Else,
137                    "for" => GoTokenType::For,
138                    "range" => GoTokenType::Range,
139                    "return" => GoTokenType::Return,
140                    "break" => GoTokenType::Break,
141                    "continue" => GoTokenType::Continue,
142                    "goto" => GoTokenType::Goto,
143                    "switch" => GoTokenType::Switch,
144                    "case" => GoTokenType::Case,
145                    "default" => GoTokenType::Default,
146                    "defer" => GoTokenType::Defer,
147                    "go" => GoTokenType::Go,
148                    "select" => GoTokenType::Select,
149                    "fallthrough" => GoTokenType::Fallthrough,
150                    "true" => GoTokenType::BoolLiteral,
151                    "false" => GoTokenType::BoolLiteral,
152                    "nil" => GoTokenType::NilLiteral,
153                    _ => GoTokenType::Identifier,
154                };
155                state.add_token(kind, start, state.get_position());
156                return true;
157            }
158        }
159        false
160    }
161
162    fn lex_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
163        let start = state.get_position();
164        if let Some(ch) = state.peek() {
165            // String literal
166            if ch == '"' {
167                state.advance(ch.len_utf8());
168                while let Some(ch) = state.peek() {
169                    if ch == '"' {
170                        state.advance(ch.len_utf8());
171                        break;
172                    }
173                    if ch == '\\' {
174                        state.advance(ch.len_utf8());
175                        if let Some(next) = state.peek() {
176                            state.advance(next.len_utf8());
177                        }
178                    }
179                    else {
180                        state.advance(ch.len_utf8());
181                    }
182                }
183                state.add_token(GoTokenType::StringLiteral, start, state.get_position());
184                return true;
185            }
186            // Raw string literal
187            if ch == '`' {
188                state.advance(ch.len_utf8());
189                while let Some(ch) = state.peek() {
190                    if ch == '`' {
191                        state.advance(ch.len_utf8());
192                        break;
193                    }
194                    state.advance(ch.len_utf8());
195                }
196                state.add_token(GoTokenType::StringLiteral, start, state.get_position());
197                return true;
198            }
199            // Rune literal
200            if ch == '\'' {
201                state.advance(ch.len_utf8());
202                while let Some(ch) = state.peek() {
203                    if ch == '\'' {
204                        state.advance(ch.len_utf8());
205                        break;
206                    }
207                    if ch == '\\' {
208                        state.advance(ch.len_utf8());
209                        if let Some(next) = state.peek() {
210                            state.advance(next.len_utf8());
211                        }
212                    }
213                    else {
214                        state.advance(ch.len_utf8());
215                    }
216                }
217                state.add_token(GoTokenType::RuneLiteral, start, state.get_position());
218                return true;
219            }
220            // Number literal
221            if ch.is_ascii_digit() {
222                state.advance(ch.len_utf8());
223                let mut has_dot = false;
224                while let Some(ch) = state.peek() {
225                    if ch.is_ascii_digit() {
226                        state.advance(ch.len_utf8());
227                    }
228                    else if ch == '.' && !has_dot {
229                        has_dot = true;
230                        state.advance(ch.len_utf8());
231                    }
232                    else {
233                        break;
234                    }
235                }
236                let kind = if has_dot { GoTokenType::FloatLiteral } else { GoTokenType::IntLiteral };
237                state.add_token(kind, start, state.get_position());
238                return true;
239            }
240        }
241        false
242    }
243
244    fn lex_operator_or_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
245        let start = state.get_position();
246        let kind = if state.consume_if_starts_with(":=") {
247            GoTokenType::ColonAssign
248        }
249        else if state.consume_if_starts_with("...") {
250            GoTokenType::Ellipsis
251        }
252        else if state.consume_if_starts_with("<<=") {
253            GoTokenType::LeftShiftAssign
254        }
255        else if state.consume_if_starts_with(">>=") {
256            GoTokenType::RightShiftAssign
257        }
258        else if state.consume_if_starts_with("&^=") {
259            GoTokenType::AmpersandCaretAssign
260        }
261        else if state.consume_if_starts_with("==") {
262            GoTokenType::Equal
263        }
264        else if state.consume_if_starts_with("!=") {
265            GoTokenType::NotEqual
266        }
267        else if state.consume_if_starts_with("<=") {
268            GoTokenType::LessEqual
269        }
270        else if state.consume_if_starts_with(">=") {
271            GoTokenType::GreaterEqual
272        }
273        else if state.consume_if_starts_with("&&") {
274            GoTokenType::LogicalAnd
275        }
276        else if state.consume_if_starts_with("||") {
277            GoTokenType::LogicalOr
278        }
279        else if state.consume_if_starts_with("<<") {
280            GoTokenType::LeftShift
281        }
282        else if state.consume_if_starts_with(">>") {
283            GoTokenType::RightShift
284        }
285        else if state.consume_if_starts_with("&^") {
286            GoTokenType::AmpersandCaret
287        }
288        else if state.consume_if_starts_with("++") {
289            GoTokenType::Increment
290        }
291        else if state.consume_if_starts_with("--") {
292            GoTokenType::Decrement
293        }
294        else if state.consume_if_starts_with("+=") {
295            GoTokenType::PlusAssign
296        }
297        else if state.consume_if_starts_with("-=") {
298            GoTokenType::MinusAssign
299        }
300        else if state.consume_if_starts_with("*=") {
301            GoTokenType::StarAssign
302        }
303        else if state.consume_if_starts_with("/=") {
304            GoTokenType::SlashAssign
305        }
306        else if state.consume_if_starts_with("%=") {
307            GoTokenType::PercentAssign
308        }
309        else if state.consume_if_starts_with("&=") {
310            GoTokenType::AmpersandAssign
311        }
312        else if state.consume_if_starts_with("|=") {
313            GoTokenType::PipeAssign
314        }
315        else if state.consume_if_starts_with("^=") {
316            GoTokenType::CaretAssign
317        }
318        else if state.consume_if_starts_with("<-") {
319            GoTokenType::Arrow
320        }
321        else if state.consume_if_starts_with("{") {
322            GoTokenType::LeftBrace
323        }
324        else if state.consume_if_starts_with("}") {
325            GoTokenType::RightBrace
326        }
327        else if state.consume_if_starts_with("(") {
328            GoTokenType::LeftParen
329        }
330        else if state.consume_if_starts_with(")") {
331            GoTokenType::RightParen
332        }
333        else if state.consume_if_starts_with("[") {
334            GoTokenType::LeftBracket
335        }
336        else if state.consume_if_starts_with("]") {
337            GoTokenType::RightBracket
338        }
339        else if state.consume_if_starts_with(".") {
340            GoTokenType::Dot
341        }
342        else if state.consume_if_starts_with(",") {
343            GoTokenType::Comma
344        }
345        else if state.consume_if_starts_with(";") {
346            GoTokenType::Semicolon
347        }
348        else if state.consume_if_starts_with(":") {
349            GoTokenType::Colon
350        }
351        else if state.consume_if_starts_with("+") {
352            GoTokenType::Plus
353        }
354        else if state.consume_if_starts_with("-") {
355            GoTokenType::Minus
356        }
357        else if state.consume_if_starts_with("*") {
358            GoTokenType::Star
359        }
360        else if state.consume_if_starts_with("/") {
361            GoTokenType::Slash
362        }
363        else if state.consume_if_starts_with("%") {
364            GoTokenType::Percent
365        }
366        else if state.consume_if_starts_with("&") {
367            GoTokenType::Ampersand
368        }
369        else if state.consume_if_starts_with("|") {
370            GoTokenType::Pipe
371        }
372        else if state.consume_if_starts_with("^") {
373            GoTokenType::Caret
374        }
375        else if state.consume_if_starts_with("!") {
376            GoTokenType::LogicalNot
377        }
378        else if state.consume_if_starts_with("<") {
379            GoTokenType::Less
380        }
381        else if state.consume_if_starts_with(">") {
382            GoTokenType::Greater
383        }
384        else if state.consume_if_starts_with("=") {
385            GoTokenType::Assign
386        }
387        else {
388            return false;
389        };
390
391        state.add_token(kind, start, state.get_position());
392        true
393    }
394}