1#![doc = include_str!("readme.md")]
2pub mod token_type;
4
5use crate::{language::GoLanguage, lexer::token_type::GoTokenType};
6use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
7
8pub(crate) type State<'a, S> = LexerState<'a, S, GoLanguage>;
10
11#[derive(Clone)]
13pub struct GoLexer<'config> {
14 config: &'config GoLanguage,
16}
17
18impl<'config> Lexer<GoLanguage> for GoLexer<'config> {
19 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<GoLanguage>) -> LexOutput<GoLanguage> {
20 let mut state = State::new_with_cache(source, 0, cache);
21 let result = self.run(&mut state);
22 if result.is_ok() {
23 state.add_eof()
24 }
25 state.finish_with_cache(result, cache)
26 }
27}
28
29impl<'config> GoLexer<'config> {
30 pub fn new(config: &'config GoLanguage) -> Self {
32 Self { config }
33 }
34
35 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
36 while state.not_at_end() {
37 let safe_point = state.get_position();
38
39 if self.skip_whitespace(state) {
40 continue;
41 }
42
43 if self.skip_comment(state) {
44 continue;
45 }
46
47 if self.lex_identifier_or_keyword(state) {
48 continue;
49 }
50
51 if self.lex_literal(state) {
52 continue;
53 }
54
55 if self.lex_operator_or_delimiter(state) {
56 continue;
57 }
58
59 let start_pos = state.get_position();
61 if let Some(ch) = state.peek() {
62 state.advance(ch.len_utf8());
63 state.add_token(GoTokenType::Error, start_pos, state.get_position());
64 }
65
66 state.advance_if_dead_lock(safe_point)
67 }
68
69 Ok(())
70 }
71
72 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
73 let start = state.get_position();
74 while let Some(ch) = state.peek() {
75 if ch.is_whitespace() {
76 state.advance(ch.len_utf8());
77 }
78 else {
79 break;
80 }
81 }
82 if state.get_position() > start {
83 state.add_token(GoTokenType::Whitespace, start, state.get_position());
84 return true;
85 }
86 false
87 }
88
89 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
90 let start = state.get_position();
91 if state.consume_if_starts_with("//") {
92 while let Some(ch) = state.peek() {
93 if ch == '\n' {
94 break;
95 }
96 state.advance(ch.len_utf8());
97 }
98 state.add_token(GoTokenType::Comment, start, state.get_position());
99 return true;
100 }
101 if state.consume_if_starts_with("/*") {
102 while let Some(ch) = state.peek() {
103 if state.consume_if_starts_with("*/") {
104 break;
105 }
106 state.advance(ch.len_utf8());
107 }
108 state.add_token(GoTokenType::Comment, start, state.get_position());
109 return true;
110 }
111 false
112 }
113
114 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
115 let start = state.get_position();
116 if let Some(ch) = state.peek() {
117 if ch.is_alphabetic() || ch == '_' {
118 state.advance(ch.len_utf8());
119 while let Some(ch) = state.peek() {
120 if ch.is_alphanumeric() || ch == '_' {
121 state.advance(ch.len_utf8());
122 }
123 else {
124 break;
125 }
126 }
127
128 let text = state.get_text_in(oak_core::Range { start, end: state.get_position() });
129 let kind = match text.as_ref() {
130 "package" => GoTokenType::Package,
131 "import" => GoTokenType::Import,
132 "func" => GoTokenType::Func,
133 "var" => GoTokenType::Var,
134 "const" => GoTokenType::Const,
135 "type" => GoTokenType::Type,
136 "struct" => GoTokenType::Struct,
137 "interface" => GoTokenType::Interface,
138 "map" => GoTokenType::Map,
139 "chan" => GoTokenType::Chan,
140 "if" => GoTokenType::If,
141 "else" => GoTokenType::Else,
142 "for" => GoTokenType::For,
143 "range" => GoTokenType::Range,
144 "return" => GoTokenType::Return,
145 "break" => GoTokenType::Break,
146 "continue" => GoTokenType::Continue,
147 "goto" => GoTokenType::Goto,
148 "switch" => GoTokenType::Switch,
149 "case" => GoTokenType::Case,
150 "default" => GoTokenType::Default,
151 "defer" => GoTokenType::Defer,
152 "go" => GoTokenType::Go,
153 "select" => GoTokenType::Select,
154 "fallthrough" => GoTokenType::Fallthrough,
155 "true" => GoTokenType::BoolLiteral,
156 "false" => GoTokenType::BoolLiteral,
157 "nil" => GoTokenType::NilLiteral,
158 _ => GoTokenType::Identifier,
159 };
160 state.add_token(kind, start, state.get_position());
161 return true;
162 }
163 }
164 false
165 }
166
167 fn lex_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
168 let start = state.get_position();
169 if let Some(ch) = state.peek() {
170 if ch == '"' {
172 state.advance(ch.len_utf8());
173 while let Some(ch) = state.peek() {
174 if ch == '"' {
175 state.advance(ch.len_utf8());
176 break;
177 }
178 if ch == '\\' {
179 state.advance(ch.len_utf8());
180 if let Some(next) = state.peek() {
181 state.advance(next.len_utf8());
182 }
183 }
184 else {
185 state.advance(ch.len_utf8());
186 }
187 }
188 state.add_token(GoTokenType::StringLiteral, start, state.get_position());
189 return true;
190 }
191 if ch == '`' {
193 state.advance(ch.len_utf8());
194 while let Some(ch) = state.peek() {
195 if ch == '`' {
196 state.advance(ch.len_utf8());
197 break;
198 }
199 state.advance(ch.len_utf8());
200 }
201 state.add_token(GoTokenType::StringLiteral, start, state.get_position());
202 return true;
203 }
204 if ch == '\'' {
206 state.advance(ch.len_utf8());
207 while let Some(ch) = state.peek() {
208 if ch == '\'' {
209 state.advance(ch.len_utf8());
210 break;
211 }
212 if ch == '\\' {
213 state.advance(ch.len_utf8());
214 if let Some(next) = state.peek() {
215 state.advance(next.len_utf8());
216 }
217 }
218 else {
219 state.advance(ch.len_utf8());
220 }
221 }
222 state.add_token(GoTokenType::RuneLiteral, start, state.get_position());
223 return true;
224 }
225 if ch.is_ascii_digit() {
227 state.advance(ch.len_utf8());
228 let mut has_dot = false;
229 while let Some(ch) = state.peek() {
230 if ch.is_ascii_digit() {
231 state.advance(ch.len_utf8());
232 }
233 else if ch == '.' && !has_dot {
234 has_dot = true;
235 state.advance(ch.len_utf8());
236 }
237 else {
238 break;
239 }
240 }
241 let kind = if has_dot { GoTokenType::FloatLiteral } else { GoTokenType::IntLiteral };
242 state.add_token(kind, start, state.get_position());
243 return true;
244 }
245 }
246 false
247 }
248
249 fn lex_operator_or_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
250 let start = state.get_position();
251 let kind = if state.consume_if_starts_with(":=") {
252 GoTokenType::ColonAssign
253 }
254 else if state.consume_if_starts_with("...") {
255 GoTokenType::Ellipsis
256 }
257 else if state.consume_if_starts_with("<<=") {
258 GoTokenType::LeftShiftAssign
259 }
260 else if state.consume_if_starts_with(">>=") {
261 GoTokenType::RightShiftAssign
262 }
263 else if state.consume_if_starts_with("&^=") {
264 GoTokenType::AmpersandCaretAssign
265 }
266 else if state.consume_if_starts_with("==") {
267 GoTokenType::Equal
268 }
269 else if state.consume_if_starts_with("!=") {
270 GoTokenType::NotEqual
271 }
272 else if state.consume_if_starts_with("<=") {
273 GoTokenType::LessEqual
274 }
275 else if state.consume_if_starts_with(">=") {
276 GoTokenType::GreaterEqual
277 }
278 else if state.consume_if_starts_with("&&") {
279 GoTokenType::LogicalAnd
280 }
281 else if state.consume_if_starts_with("||") {
282 GoTokenType::LogicalOr
283 }
284 else if state.consume_if_starts_with("<<") {
285 GoTokenType::LeftShift
286 }
287 else if state.consume_if_starts_with(">>") {
288 GoTokenType::RightShift
289 }
290 else if state.consume_if_starts_with("&^") {
291 GoTokenType::AmpersandCaret
292 }
293 else if state.consume_if_starts_with("++") {
294 GoTokenType::Increment
295 }
296 else if state.consume_if_starts_with("--") {
297 GoTokenType::Decrement
298 }
299 else if state.consume_if_starts_with("+=") {
300 GoTokenType::PlusAssign
301 }
302 else if state.consume_if_starts_with("-=") {
303 GoTokenType::MinusAssign
304 }
305 else if state.consume_if_starts_with("*=") {
306 GoTokenType::StarAssign
307 }
308 else if state.consume_if_starts_with("/=") {
309 GoTokenType::SlashAssign
310 }
311 else if state.consume_if_starts_with("%=") {
312 GoTokenType::PercentAssign
313 }
314 else if state.consume_if_starts_with("&=") {
315 GoTokenType::AmpersandAssign
316 }
317 else if state.consume_if_starts_with("|=") {
318 GoTokenType::PipeAssign
319 }
320 else if state.consume_if_starts_with("^=") {
321 GoTokenType::CaretAssign
322 }
323 else if state.consume_if_starts_with("<-") {
324 GoTokenType::Arrow
325 }
326 else if state.consume_if_starts_with("{") {
327 GoTokenType::LeftBrace
328 }
329 else if state.consume_if_starts_with("}") {
330 GoTokenType::RightBrace
331 }
332 else if state.consume_if_starts_with("(") {
333 GoTokenType::LeftParen
334 }
335 else if state.consume_if_starts_with(")") {
336 GoTokenType::RightParen
337 }
338 else if state.consume_if_starts_with("[") {
339 GoTokenType::LeftBracket
340 }
341 else if state.consume_if_starts_with("]") {
342 GoTokenType::RightBracket
343 }
344 else if state.consume_if_starts_with(".") {
345 GoTokenType::Dot
346 }
347 else if state.consume_if_starts_with(",") {
348 GoTokenType::Comma
349 }
350 else if state.consume_if_starts_with(";") {
351 GoTokenType::Semicolon
352 }
353 else if state.consume_if_starts_with(":") {
354 GoTokenType::Colon
355 }
356 else if state.consume_if_starts_with("+") {
357 GoTokenType::Plus
358 }
359 else if state.consume_if_starts_with("-") {
360 GoTokenType::Minus
361 }
362 else if state.consume_if_starts_with("*") {
363 GoTokenType::Star
364 }
365 else if state.consume_if_starts_with("/") {
366 GoTokenType::Slash
367 }
368 else if state.consume_if_starts_with("%") {
369 GoTokenType::Percent
370 }
371 else if state.consume_if_starts_with("&") {
372 GoTokenType::Ampersand
373 }
374 else if state.consume_if_starts_with("|") {
375 GoTokenType::Pipe
376 }
377 else if state.consume_if_starts_with("^") {
378 GoTokenType::Caret
379 }
380 else if state.consume_if_starts_with("!") {
381 GoTokenType::LogicalNot
382 }
383 else if state.consume_if_starts_with("<") {
384 GoTokenType::Less
385 }
386 else if state.consume_if_starts_with(">") {
387 GoTokenType::Greater
388 }
389 else if state.consume_if_starts_with("=") {
390 GoTokenType::Assign
391 }
392 else {
393 return false;
394 };
395
396 state.add_token(kind, start, state.get_position());
397 true
398 }
399}