1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::GoLanguage, lexer::token_type::GoTokenType};
5use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
6
7pub(crate) type State<'a, S> = LexerState<'a, S, GoLanguage>;
8
9#[derive(Clone)]
10pub struct GoLexer<'config> {
11 config: &'config GoLanguage,
12}
13
14impl<'config> Lexer<GoLanguage> for GoLexer<'config> {
15 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<GoLanguage>) -> LexOutput<GoLanguage> {
16 let mut state = State::new_with_cache(source, 0, cache);
17 let result = self.run(&mut state);
18 if result.is_ok() {
19 state.add_eof()
20 }
21 state.finish_with_cache(result, cache)
22 }
23}
24
25impl<'config> GoLexer<'config> {
26 pub fn new(config: &'config GoLanguage) -> Self {
27 Self { config }
28 }
29
30 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
31 while state.not_at_end() {
32 let safe_point = state.get_position();
33
34 if self.skip_whitespace(state) {
35 continue;
36 }
37
38 if self.skip_comment(state) {
39 continue;
40 }
41
42 if self.lex_identifier_or_keyword(state) {
43 continue;
44 }
45
46 if self.lex_literal(state) {
47 continue;
48 }
49
50 if self.lex_operator_or_delimiter(state) {
51 continue;
52 }
53
54 let start_pos = state.get_position();
56 if let Some(ch) = state.peek() {
57 state.advance(ch.len_utf8());
58 state.add_token(GoTokenType::Error, start_pos, state.get_position());
59 }
60
61 state.advance_if_dead_lock(safe_point)
62 }
63
64 Ok(())
65 }
66
67 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
68 let start = state.get_position();
69 while let Some(ch) = state.peek() {
70 if ch.is_whitespace() {
71 state.advance(ch.len_utf8());
72 }
73 else {
74 break;
75 }
76 }
77 if state.get_position() > start {
78 state.add_token(GoTokenType::Whitespace, start, state.get_position());
79 return true;
80 }
81 false
82 }
83
84 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
85 let start = state.get_position();
86 if state.consume_if_starts_with("//") {
87 while let Some(ch) = state.peek() {
88 if ch == '\n' {
89 break;
90 }
91 state.advance(ch.len_utf8());
92 }
93 state.add_token(GoTokenType::Comment, start, state.get_position());
94 return true;
95 }
96 if state.consume_if_starts_with("/*") {
97 while let Some(ch) = state.peek() {
98 if state.consume_if_starts_with("*/") {
99 break;
100 }
101 state.advance(ch.len_utf8());
102 }
103 state.add_token(GoTokenType::Comment, start, state.get_position());
104 return true;
105 }
106 false
107 }
108
109 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
110 let start = state.get_position();
111 if let Some(ch) = state.peek() {
112 if ch.is_alphabetic() || ch == '_' {
113 state.advance(ch.len_utf8());
114 while let Some(ch) = state.peek() {
115 if ch.is_alphanumeric() || ch == '_' {
116 state.advance(ch.len_utf8());
117 }
118 else {
119 break;
120 }
121 }
122
123 let text = state.get_text_in(oak_core::Range { start, end: state.get_position() });
124 let kind = match text.as_ref() {
125 "package" => GoTokenType::Package,
126 "import" => GoTokenType::Import,
127 "func" => GoTokenType::Func,
128 "var" => GoTokenType::Var,
129 "const" => GoTokenType::Const,
130 "type" => GoTokenType::Type,
131 "struct" => GoTokenType::Struct,
132 "interface" => GoTokenType::Interface,
133 "map" => GoTokenType::Map,
134 "chan" => GoTokenType::Chan,
135 "if" => GoTokenType::If,
136 "else" => GoTokenType::Else,
137 "for" => GoTokenType::For,
138 "range" => GoTokenType::Range,
139 "return" => GoTokenType::Return,
140 "break" => GoTokenType::Break,
141 "continue" => GoTokenType::Continue,
142 "goto" => GoTokenType::Goto,
143 "switch" => GoTokenType::Switch,
144 "case" => GoTokenType::Case,
145 "default" => GoTokenType::Default,
146 "defer" => GoTokenType::Defer,
147 "go" => GoTokenType::Go,
148 "select" => GoTokenType::Select,
149 "fallthrough" => GoTokenType::Fallthrough,
150 "true" => GoTokenType::BoolLiteral,
151 "false" => GoTokenType::BoolLiteral,
152 "nil" => GoTokenType::NilLiteral,
153 _ => GoTokenType::Identifier,
154 };
155 state.add_token(kind, start, state.get_position());
156 return true;
157 }
158 }
159 false
160 }
161
162 fn lex_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
163 let start = state.get_position();
164 if let Some(ch) = state.peek() {
165 if ch == '"' {
167 state.advance(ch.len_utf8());
168 while let Some(ch) = state.peek() {
169 if ch == '"' {
170 state.advance(ch.len_utf8());
171 break;
172 }
173 if ch == '\\' {
174 state.advance(ch.len_utf8());
175 if let Some(next) = state.peek() {
176 state.advance(next.len_utf8());
177 }
178 }
179 else {
180 state.advance(ch.len_utf8());
181 }
182 }
183 state.add_token(GoTokenType::StringLiteral, start, state.get_position());
184 return true;
185 }
186 if ch == '`' {
188 state.advance(ch.len_utf8());
189 while let Some(ch) = state.peek() {
190 if ch == '`' {
191 state.advance(ch.len_utf8());
192 break;
193 }
194 state.advance(ch.len_utf8());
195 }
196 state.add_token(GoTokenType::StringLiteral, start, state.get_position());
197 return true;
198 }
199 if ch == '\'' {
201 state.advance(ch.len_utf8());
202 while let Some(ch) = state.peek() {
203 if ch == '\'' {
204 state.advance(ch.len_utf8());
205 break;
206 }
207 if ch == '\\' {
208 state.advance(ch.len_utf8());
209 if let Some(next) = state.peek() {
210 state.advance(next.len_utf8());
211 }
212 }
213 else {
214 state.advance(ch.len_utf8());
215 }
216 }
217 state.add_token(GoTokenType::RuneLiteral, start, state.get_position());
218 return true;
219 }
220 if ch.is_ascii_digit() {
222 state.advance(ch.len_utf8());
223 let mut has_dot = false;
224 while let Some(ch) = state.peek() {
225 if ch.is_ascii_digit() {
226 state.advance(ch.len_utf8());
227 }
228 else if ch == '.' && !has_dot {
229 has_dot = true;
230 state.advance(ch.len_utf8());
231 }
232 else {
233 break;
234 }
235 }
236 let kind = if has_dot { GoTokenType::FloatLiteral } else { GoTokenType::IntLiteral };
237 state.add_token(kind, start, state.get_position());
238 return true;
239 }
240 }
241 false
242 }
243
244 fn lex_operator_or_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
245 let start = state.get_position();
246 let kind = if state.consume_if_starts_with(":=") {
247 GoTokenType::ColonAssign
248 }
249 else if state.consume_if_starts_with("...") {
250 GoTokenType::Ellipsis
251 }
252 else if state.consume_if_starts_with("<<=") {
253 GoTokenType::LeftShiftAssign
254 }
255 else if state.consume_if_starts_with(">>=") {
256 GoTokenType::RightShiftAssign
257 }
258 else if state.consume_if_starts_with("&^=") {
259 GoTokenType::AmpersandCaretAssign
260 }
261 else if state.consume_if_starts_with("==") {
262 GoTokenType::Equal
263 }
264 else if state.consume_if_starts_with("!=") {
265 GoTokenType::NotEqual
266 }
267 else if state.consume_if_starts_with("<=") {
268 GoTokenType::LessEqual
269 }
270 else if state.consume_if_starts_with(">=") {
271 GoTokenType::GreaterEqual
272 }
273 else if state.consume_if_starts_with("&&") {
274 GoTokenType::LogicalAnd
275 }
276 else if state.consume_if_starts_with("||") {
277 GoTokenType::LogicalOr
278 }
279 else if state.consume_if_starts_with("<<") {
280 GoTokenType::LeftShift
281 }
282 else if state.consume_if_starts_with(">>") {
283 GoTokenType::RightShift
284 }
285 else if state.consume_if_starts_with("&^") {
286 GoTokenType::AmpersandCaret
287 }
288 else if state.consume_if_starts_with("++") {
289 GoTokenType::Increment
290 }
291 else if state.consume_if_starts_with("--") {
292 GoTokenType::Decrement
293 }
294 else if state.consume_if_starts_with("+=") {
295 GoTokenType::PlusAssign
296 }
297 else if state.consume_if_starts_with("-=") {
298 GoTokenType::MinusAssign
299 }
300 else if state.consume_if_starts_with("*=") {
301 GoTokenType::StarAssign
302 }
303 else if state.consume_if_starts_with("/=") {
304 GoTokenType::SlashAssign
305 }
306 else if state.consume_if_starts_with("%=") {
307 GoTokenType::PercentAssign
308 }
309 else if state.consume_if_starts_with("&=") {
310 GoTokenType::AmpersandAssign
311 }
312 else if state.consume_if_starts_with("|=") {
313 GoTokenType::PipeAssign
314 }
315 else if state.consume_if_starts_with("^=") {
316 GoTokenType::CaretAssign
317 }
318 else if state.consume_if_starts_with("<-") {
319 GoTokenType::Arrow
320 }
321 else if state.consume_if_starts_with("{") {
322 GoTokenType::LeftBrace
323 }
324 else if state.consume_if_starts_with("}") {
325 GoTokenType::RightBrace
326 }
327 else if state.consume_if_starts_with("(") {
328 GoTokenType::LeftParen
329 }
330 else if state.consume_if_starts_with(")") {
331 GoTokenType::RightParen
332 }
333 else if state.consume_if_starts_with("[") {
334 GoTokenType::LeftBracket
335 }
336 else if state.consume_if_starts_with("]") {
337 GoTokenType::RightBracket
338 }
339 else if state.consume_if_starts_with(".") {
340 GoTokenType::Dot
341 }
342 else if state.consume_if_starts_with(",") {
343 GoTokenType::Comma
344 }
345 else if state.consume_if_starts_with(";") {
346 GoTokenType::Semicolon
347 }
348 else if state.consume_if_starts_with(":") {
349 GoTokenType::Colon
350 }
351 else if state.consume_if_starts_with("+") {
352 GoTokenType::Plus
353 }
354 else if state.consume_if_starts_with("-") {
355 GoTokenType::Minus
356 }
357 else if state.consume_if_starts_with("*") {
358 GoTokenType::Star
359 }
360 else if state.consume_if_starts_with("/") {
361 GoTokenType::Slash
362 }
363 else if state.consume_if_starts_with("%") {
364 GoTokenType::Percent
365 }
366 else if state.consume_if_starts_with("&") {
367 GoTokenType::Ampersand
368 }
369 else if state.consume_if_starts_with("|") {
370 GoTokenType::Pipe
371 }
372 else if state.consume_if_starts_with("^") {
373 GoTokenType::Caret
374 }
375 else if state.consume_if_starts_with("!") {
376 GoTokenType::LogicalNot
377 }
378 else if state.consume_if_starts_with("<") {
379 GoTokenType::Less
380 }
381 else if state.consume_if_starts_with(">") {
382 GoTokenType::Greater
383 }
384 else if state.consume_if_starts_with("=") {
385 GoTokenType::Assign
386 }
387 else {
388 return false;
389 };
390
391 state.add_token(kind, start, state.get_position());
392 true
393 }
394}