1use crate::{kind::GoSyntaxKind, language::GoLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
3
4type State<'a, S> = LexerState<'a, S, GoLanguage>;
5
6#[derive(Clone)]
7pub struct GoLexer<'config> {
8 _config: &'config GoLanguage,
9}
10
11impl<'config> Lexer<GoLanguage> for GoLexer<'config> {
12 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<GoLanguage>) -> LexOutput<GoLanguage> {
13 let mut state = State::new_with_cache(source, 0, cache);
14 let result = self.run(&mut state);
15 if result.is_ok() {
16 state.add_eof();
17 }
18 state.finish_with_cache(result, cache)
19 }
20}
21
22impl<'config> GoLexer<'config> {
23 pub fn new(config: &'config GoLanguage) -> Self {
24 Self { _config: config }
25 }
26
27 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
28 while state.not_at_end() {
29 let safe_point = state.get_position();
30
31 if self.skip_whitespace(state) {
32 continue;
33 }
34
35 if self.skip_comment(state) {
36 continue;
37 }
38
39 if self.lex_identifier_or_keyword(state) {
40 continue;
41 }
42
43 if self.lex_literal(state) {
44 continue;
45 }
46
47 if self.lex_operator_or_delimiter(state) {
48 continue;
49 }
50
51 let start_pos = state.get_position();
53 if let Some(ch) = state.peek() {
54 state.advance(ch.len_utf8());
55 state.add_token(GoSyntaxKind::Error, start_pos, state.get_position());
56 }
57
58 state.advance_if_dead_lock(safe_point);
59 }
60
61 Ok(())
62 }
63
64 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
65 let start = state.get_position();
66 while let Some(ch) = state.peek() {
67 if ch.is_whitespace() {
68 state.advance(ch.len_utf8());
69 }
70 else {
71 break;
72 }
73 }
74 if state.get_position() > start {
75 state.add_token(GoSyntaxKind::Whitespace, start, state.get_position());
76 return true;
77 }
78 false
79 }
80
81 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
82 let start = state.get_position();
83 if state.consume_if_starts_with("//") {
84 while let Some(ch) = state.peek() {
85 if ch == '\n' {
86 break;
87 }
88 state.advance(ch.len_utf8());
89 }
90 state.add_token(GoSyntaxKind::Comment, start, state.get_position());
91 return true;
92 }
93 if state.consume_if_starts_with("/*") {
94 while let Some(ch) = state.peek() {
95 if state.consume_if_starts_with("*/") {
96 break;
97 }
98 state.advance(ch.len_utf8());
99 }
100 state.add_token(GoSyntaxKind::Comment, start, state.get_position());
101 return true;
102 }
103 false
104 }
105
106 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
107 let start = state.get_position();
108 if let Some(ch) = state.peek() {
109 if ch.is_alphabetic() || ch == '_' {
110 state.advance(ch.len_utf8());
111 while let Some(ch) = state.peek() {
112 if ch.is_alphanumeric() || ch == '_' {
113 state.advance(ch.len_utf8());
114 }
115 else {
116 break;
117 }
118 }
119
120 let text = state.get_text_in(oak_core::Range { start, end: state.get_position() });
121 let kind = match text.as_ref() {
122 "package" => GoSyntaxKind::Package,
123 "import" => GoSyntaxKind::Import,
124 "func" => GoSyntaxKind::Func,
125 "var" => GoSyntaxKind::Var,
126 "const" => GoSyntaxKind::Const,
127 "type" => GoSyntaxKind::Type,
128 "struct" => GoSyntaxKind::Struct,
129 "interface" => GoSyntaxKind::Interface,
130 "map" => GoSyntaxKind::Map,
131 "chan" => GoSyntaxKind::Chan,
132 "if" => GoSyntaxKind::If,
133 "else" => GoSyntaxKind::Else,
134 "for" => GoSyntaxKind::For,
135 "range" => GoSyntaxKind::Range,
136 "return" => GoSyntaxKind::Return,
137 "break" => GoSyntaxKind::Break,
138 "continue" => GoSyntaxKind::Continue,
139 "goto" => GoSyntaxKind::Goto,
140 "switch" => GoSyntaxKind::Switch,
141 "case" => GoSyntaxKind::Case,
142 "default" => GoSyntaxKind::Default,
143 "defer" => GoSyntaxKind::Defer,
144 "go" => GoSyntaxKind::Go,
145 "select" => GoSyntaxKind::Select,
146 "fallthrough" => GoSyntaxKind::Fallthrough,
147 "true" => GoSyntaxKind::BoolLiteral,
148 "false" => GoSyntaxKind::BoolLiteral,
149 "nil" => GoSyntaxKind::NilLiteral,
150 _ => GoSyntaxKind::Identifier,
151 };
152
153 state.add_token(kind, start, state.get_position());
154 return true;
155 }
156 }
157 false
158 }
159
160 fn lex_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
161 let start = state.get_position();
162 if let Some(ch) = state.peek() {
163 if ch == '"' {
165 state.advance(ch.len_utf8());
166 while let Some(ch) = state.peek() {
167 if ch == '"' {
168 state.advance(ch.len_utf8());
169 break;
170 }
171 if ch == '\\' {
172 state.advance(ch.len_utf8());
173 if let Some(next) = state.peek() {
174 state.advance(next.len_utf8());
175 }
176 }
177 else {
178 state.advance(ch.len_utf8());
179 }
180 }
181 state.add_token(GoSyntaxKind::StringLiteral, start, state.get_position());
182 return true;
183 }
184 if ch == '`' {
186 state.advance(ch.len_utf8());
187 while let Some(ch) = state.peek() {
188 if ch == '`' {
189 state.advance(ch.len_utf8());
190 break;
191 }
192 state.advance(ch.len_utf8());
193 }
194 state.add_token(GoSyntaxKind::StringLiteral, start, state.get_position());
195 return true;
196 }
197 if ch == '\'' {
199 state.advance(ch.len_utf8());
200 while let Some(ch) = state.peek() {
201 if ch == '\'' {
202 state.advance(ch.len_utf8());
203 break;
204 }
205 if ch == '\\' {
206 state.advance(ch.len_utf8());
207 if let Some(next) = state.peek() {
208 state.advance(next.len_utf8());
209 }
210 }
211 else {
212 state.advance(ch.len_utf8());
213 }
214 }
215 state.add_token(GoSyntaxKind::RuneLiteral, start, state.get_position());
216 return true;
217 }
218 if ch.is_ascii_digit() {
220 state.advance(ch.len_utf8());
221 let mut has_dot = false;
222 while let Some(ch) = state.peek() {
223 if ch.is_ascii_digit() {
224 state.advance(ch.len_utf8());
225 }
226 else if ch == '.' && !has_dot {
227 has_dot = true;
228 state.advance(ch.len_utf8());
229 }
230 else {
231 break;
232 }
233 }
234 let kind = if has_dot { GoSyntaxKind::FloatLiteral } else { GoSyntaxKind::IntLiteral };
235 state.add_token(kind, start, state.get_position());
236 return true;
237 }
238 }
239 false
240 }
241
242 fn lex_operator_or_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
243 let start = state.get_position();
244 let kind = if state.consume_if_starts_with(":=") {
245 GoSyntaxKind::ColonAssign
246 }
247 else if state.consume_if_starts_with("...") {
248 GoSyntaxKind::Ellipsis
249 }
250 else if state.consume_if_starts_with("<<=") {
251 GoSyntaxKind::LeftShiftAssign
252 }
253 else if state.consume_if_starts_with(">>=") {
254 GoSyntaxKind::RightShiftAssign
255 }
256 else if state.consume_if_starts_with("&^=") {
257 GoSyntaxKind::AmpersandCaretAssign
258 }
259 else if state.consume_if_starts_with("==") {
260 GoSyntaxKind::Equal
261 }
262 else if state.consume_if_starts_with("!=") {
263 GoSyntaxKind::NotEqual
264 }
265 else if state.consume_if_starts_with("<=") {
266 GoSyntaxKind::LessEqual
267 }
268 else if state.consume_if_starts_with(">=") {
269 GoSyntaxKind::GreaterEqual
270 }
271 else if state.consume_if_starts_with("&&") {
272 GoSyntaxKind::LogicalAnd
273 }
274 else if state.consume_if_starts_with("||") {
275 GoSyntaxKind::LogicalOr
276 }
277 else if state.consume_if_starts_with("<<") {
278 GoSyntaxKind::LeftShift
279 }
280 else if state.consume_if_starts_with(">>") {
281 GoSyntaxKind::RightShift
282 }
283 else if state.consume_if_starts_with("&^") {
284 GoSyntaxKind::AmpersandCaret
285 }
286 else if state.consume_if_starts_with("++") {
287 GoSyntaxKind::Increment
288 }
289 else if state.consume_if_starts_with("--") {
290 GoSyntaxKind::Decrement
291 }
292 else if state.consume_if_starts_with("+=") {
293 GoSyntaxKind::PlusAssign
294 }
295 else if state.consume_if_starts_with("-=") {
296 GoSyntaxKind::MinusAssign
297 }
298 else if state.consume_if_starts_with("*=") {
299 GoSyntaxKind::StarAssign
300 }
301 else if state.consume_if_starts_with("/=") {
302 GoSyntaxKind::SlashAssign
303 }
304 else if state.consume_if_starts_with("%=") {
305 GoSyntaxKind::PercentAssign
306 }
307 else if state.consume_if_starts_with("&=") {
308 GoSyntaxKind::AmpersandAssign
309 }
310 else if state.consume_if_starts_with("|=") {
311 GoSyntaxKind::PipeAssign
312 }
313 else if state.consume_if_starts_with("^=") {
314 GoSyntaxKind::CaretAssign
315 }
316 else if state.consume_if_starts_with("<-") {
317 GoSyntaxKind::Arrow
318 }
319 else if state.consume_if_starts_with("{") {
320 GoSyntaxKind::LeftBrace
321 }
322 else if state.consume_if_starts_with("}") {
323 GoSyntaxKind::RightBrace
324 }
325 else if state.consume_if_starts_with("(") {
326 GoSyntaxKind::LeftParen
327 }
328 else if state.consume_if_starts_with(")") {
329 GoSyntaxKind::RightParen
330 }
331 else if state.consume_if_starts_with("[") {
332 GoSyntaxKind::LeftBracket
333 }
334 else if state.consume_if_starts_with("]") {
335 GoSyntaxKind::RightBracket
336 }
337 else if state.consume_if_starts_with(".") {
338 GoSyntaxKind::Dot
339 }
340 else if state.consume_if_starts_with(",") {
341 GoSyntaxKind::Comma
342 }
343 else if state.consume_if_starts_with(";") {
344 GoSyntaxKind::Semicolon
345 }
346 else if state.consume_if_starts_with(":") {
347 GoSyntaxKind::Colon
348 }
349 else if state.consume_if_starts_with("+") {
350 GoSyntaxKind::Plus
351 }
352 else if state.consume_if_starts_with("-") {
353 GoSyntaxKind::Minus
354 }
355 else if state.consume_if_starts_with("*") {
356 GoSyntaxKind::Star
357 }
358 else if state.consume_if_starts_with("/") {
359 GoSyntaxKind::Slash
360 }
361 else if state.consume_if_starts_with("%") {
362 GoSyntaxKind::Percent
363 }
364 else if state.consume_if_starts_with("&") {
365 GoSyntaxKind::Ampersand
366 }
367 else if state.consume_if_starts_with("|") {
368 GoSyntaxKind::Pipe
369 }
370 else if state.consume_if_starts_with("^") {
371 GoSyntaxKind::Caret
372 }
373 else if state.consume_if_starts_with("!") {
374 GoSyntaxKind::LogicalNot
375 }
376 else if state.consume_if_starts_with("<") {
377 GoSyntaxKind::Less
378 }
379 else if state.consume_if_starts_with(">") {
380 GoSyntaxKind::Greater
381 }
382 else if state.consume_if_starts_with("=") {
383 GoSyntaxKind::Assign
384 }
385 else {
386 return false;
387 };
388
389 state.add_token(kind, start, state.get_position());
390 true
391 }
392}