1use crate::{kind::ZigSyntaxKind, language::ZigLanguage};
2use oak_core::{
3 Lexer, LexerCache, LexerState, OakError,
4 lexer::{LexOutput, WhitespaceConfig},
5 source::Source,
6};
7use std::sync::LazyLock;
8
9type State<'a, S> = LexerState<'a, S, ZigLanguage>;
10
11static ZIG_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12
13#[derive(Clone)]
14pub struct ZigLexer<'config> {
15 _config: &'config ZigLanguage,
16}
17
18impl<'config> Lexer<ZigLanguage> for ZigLexer<'config> {
19 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<ZigLanguage>) -> LexOutput<ZigLanguage> {
20 let mut state = State::new_with_cache(source, 0, cache);
21 let result = self.run(&mut state);
22 if result.is_ok() {
23 state.add_eof();
24 }
25 state.finish_with_cache(result, cache)
26 }
27}
28
29impl<'config> ZigLexer<'config> {
30 pub fn new(config: &'config ZigLanguage) -> Self {
31 Self { _config: config }
32 }
33
34 fn run<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> Result<(), OakError> {
36 while state.not_at_end() {
37 let safe_point = state.get_position();
38
39 if self.skip_whitespace(state) {
40 continue;
41 }
42
43 if self.skip_comment(state) {
44 continue;
45 }
46
47 if self.lex_string_literal(state) {
48 continue;
49 }
50
51 if self.lex_char_literal(state) {
52 continue;
53 }
54
55 if self.lex_number_literal(state) {
56 continue;
57 }
58
59 if self.lex_identifier_or_keyword(state) {
60 continue;
61 }
62
63 if self.lex_builtin(state) {
64 continue;
65 }
66
67 if self.lex_operators(state) {
68 continue;
69 }
70
71 if self.lex_single_char_tokens(state) {
72 continue;
73 }
74
75 let start_pos = state.get_position();
77 if let Some(ch) = state.peek() {
78 state.advance(ch.len_utf8());
79 state.add_token(ZigSyntaxKind::Error, start_pos, state.get_position());
80 }
81
82 state.advance_if_dead_lock(safe_point);
83 }
84
85 Ok(())
86 }
87
88 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
90 ZIG_WHITESPACE.scan(state, ZigSyntaxKind::Whitespace)
91 }
92
93 fn skip_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
95 let start = state.get_position();
96 let rest = state.rest();
97
98 if rest.starts_with("//") {
100 state.advance(2);
101
102 let is_doc_comment = if state.peek() == Some('/') {
104 state.advance(1);
105 true
106 }
107 else {
108 false
109 };
110
111 while let Some(ch) = state.peek() {
112 if ch == '\n' || ch == '\r' {
113 break;
114 }
115 state.advance(ch.len_utf8());
116 }
117
118 let kind = if is_doc_comment { ZigSyntaxKind::DocComment } else { ZigSyntaxKind::Comment };
119 state.add_token(kind, start, state.get_position());
120 return true;
121 }
122
123 false
124 }
125
126 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
128 let start = state.get_position();
129
130 if state.rest().starts_with("\\\\") {
132 state.advance(2);
133
134 while let Some(ch) = state.peek() {
136 if ch == '\n' {
137 state.advance(1);
138 break;
139 }
140 state.advance(ch.len_utf8());
141 }
142
143 while state.not_at_end() {
145 let _line_start = state.get_position();
146
147 if !state.rest().starts_with("\\\\") {
149 break;
150 }
151
152 state.advance(2);
153
154 while let Some(ch) = state.peek() {
156 if ch == '\n' {
157 state.advance(1);
158 break;
159 }
160 state.advance(ch.len_utf8());
161 }
162 }
163
164 state.add_token(ZigSyntaxKind::StringLiteral, start, state.get_position());
165 return true;
166 }
167
168 if state.current() == Some('"') {
170 state.advance(1);
171 while let Some(ch) = state.peek() {
172 if ch == '"' {
173 state.advance(1);
174 break;
175 }
176 if ch == '\\' {
177 state.advance(1);
178 if let Some(next) = state.peek() {
179 state.advance(next.len_utf8());
180 }
181 continue;
182 }
183 state.advance(ch.len_utf8());
184 }
185 state.add_token(ZigSyntaxKind::StringLiteral, start, state.get_position());
186 return true;
187 }
188
189 false
190 }
191
192 fn lex_char_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
194 let start = state.get_position();
195 if state.current() == Some('\'') {
196 state.advance(1);
197 while let Some(ch) = state.peek() {
198 if ch == '\'' {
199 state.advance(1);
200 break;
201 }
202 if ch == '\\' {
203 state.advance(1);
204 if let Some(next) = state.peek() {
205 state.advance(next.len_utf8());
206 }
207 continue;
208 }
209 state.advance(ch.len_utf8());
210 }
211 state.add_token(ZigSyntaxKind::CharLiteral, start, state.get_position());
212 return true;
213 }
214 false
215 }
216
217 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
219 let start = state.get_position();
220 let ch = state.current();
221 let mut is_float = false;
222
223 if let Some(ch) = ch {
224 if ch.is_ascii_digit() {
225 state.advance(1);
226 if ch == '0' {
228 if let Some(next) = state.peek() {
229 match next {
230 'x' | 'X' => {
231 state.advance(1);
232 state.take_while(|c| c.is_ascii_hexdigit() || c == '_');
233 }
234 'b' | 'B' => {
235 state.advance(1);
236 state.take_while(|c| c == '0' || c == '1' || c == '_');
237 }
238 'o' | 'O' => {
239 state.advance(1);
240 state.take_while(|c| ('0'..='7').contains(&c) || c == '_');
241 }
242 _ => {
243 state.take_while(|c| c.is_ascii_digit() || c == '_');
244 }
245 }
246 }
247 }
248 else {
249 state.take_while(|c| c.is_ascii_digit() || c == '_');
250 }
251
252 if state.current() == Some('.') {
254 if let Some(next) = state.peek() {
255 if next.is_ascii_digit() {
256 is_float = true;
257 state.advance(1);
258 state.take_while(|c| c.is_ascii_digit() || c == '_');
259 }
260 }
261 }
262
263 if let Some(c) = state.current() {
265 if c == 'e' || c == 'E' || c == 'p' || c == 'P' {
266 is_float = true;
267 state.advance(1);
268 if let Some(next) = state.peek() {
269 if next == '+' || next == '-' {
270 state.advance(1);
271 }
272 }
273 state.take_while(|c| c.is_ascii_digit() || c == '_');
274 }
275 }
276
277 let kind = if is_float { ZigSyntaxKind::FloatLiteral } else { ZigSyntaxKind::IntegerLiteral };
278 state.add_token(kind, start, state.get_position());
279 return true;
280 }
281 }
282 false
283 }
284
285 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
287 let start = state.get_position();
288 if let Some(ch) = state.current() {
289 if ch.is_ascii_alphabetic() || ch == '_' {
290 state.advance(ch.len_utf8());
291 state.take_while(|c| c.is_ascii_alphanumeric() || c == '_');
292
293 let end = state.get_position();
294 let text = state.get_text_in((start..end).into());
295 let kind = self.get_keyword_or_identifier(&text);
296 state.add_token(kind, start, state.get_position());
297 return true;
298 }
299 }
300 false
301 }
302
303 fn get_keyword_or_identifier(&self, text: &str) -> ZigSyntaxKind {
305 match text {
306 "const" => ZigSyntaxKind::Const,
308 "var" => ZigSyntaxKind::Var,
309 "fn" => ZigSyntaxKind::Fn,
310 "struct" => ZigSyntaxKind::Struct,
311 "union" => ZigSyntaxKind::Union,
312 "enum" => ZigSyntaxKind::Enum,
313 "opaque" => ZigSyntaxKind::Opaque,
314 "type" => ZigSyntaxKind::Type,
315 "comptime" => ZigSyntaxKind::Comptime,
316 "inline" => ZigSyntaxKind::Inline,
317 "noinline" => ZigSyntaxKind::NoInline,
318 "pub" => ZigSyntaxKind::Pub,
319 "export" => ZigSyntaxKind::Export,
320 "extern" => ZigSyntaxKind::Extern,
321 "packed" => ZigSyntaxKind::Packed,
322 "align" => ZigSyntaxKind::Align,
323 "callconv" => ZigSyntaxKind::CallConv,
324 "linksection" => ZigSyntaxKind::LinkSection,
325
326 "if" => ZigSyntaxKind::If,
328 "else" => ZigSyntaxKind::Else,
329 "switch" => ZigSyntaxKind::Switch,
330 "while" => ZigSyntaxKind::While,
331 "for" => ZigSyntaxKind::For,
332 "break" => ZigSyntaxKind::Break,
333 "continue" => ZigSyntaxKind::Continue,
334 "return" => ZigSyntaxKind::Return,
335 "defer" => ZigSyntaxKind::Defer,
336 "errdefer" => ZigSyntaxKind::ErrDefer,
337 "unreachable" => ZigSyntaxKind::Unreachable,
338 "noreturn" => ZigSyntaxKind::NoReturn,
339
340 "try" => ZigSyntaxKind::TryKeyword,
342 "catch" => ZigSyntaxKind::CatchKeyword,
343 "orelse" => ZigSyntaxKind::OrElse,
344 "error" => ZigSyntaxKind::ErrorKeyword,
345
346 "test" => ZigSyntaxKind::Test,
348 "async" => ZigSyntaxKind::Async,
349 "await" => ZigSyntaxKind::AwaitKeyword,
350 "suspend" => ZigSyntaxKind::Suspend,
351 "resume" => ZigSyntaxKind::Resume,
352 "cancel" => ZigSyntaxKind::Cancel,
353
354 "undefined" => ZigSyntaxKind::Undefined,
356 "null" => ZigSyntaxKind::Null,
357 "volatile" => ZigSyntaxKind::Volatile,
358 "allowzero" => ZigSyntaxKind::AllowZero,
359 "noalias" => ZigSyntaxKind::NoAlias,
360
361 "and" => ZigSyntaxKind::And,
363 "or" => ZigSyntaxKind::Or,
364
365 "anyframe" => ZigSyntaxKind::AnyFrame,
367 "anytype" => ZigSyntaxKind::AnyType,
368 "threadlocal" => ZigSyntaxKind::ThreadLocal,
369
370 "bool" => ZigSyntaxKind::Bool,
372 "i8" => ZigSyntaxKind::I8,
373 "i16" => ZigSyntaxKind::I16,
374 "i32" => ZigSyntaxKind::I32,
375 "i64" => ZigSyntaxKind::I64,
376 "i128" => ZigSyntaxKind::I128,
377 "isize" => ZigSyntaxKind::Isize,
378 "u8" => ZigSyntaxKind::U8,
379 "u16" => ZigSyntaxKind::U16,
380 "u32" => ZigSyntaxKind::U32,
381 "u64" => ZigSyntaxKind::U64,
382 "u128" => ZigSyntaxKind::U128,
383 "usize" => ZigSyntaxKind::Usize,
384 "f16" => ZigSyntaxKind::F16,
385 "f32" => ZigSyntaxKind::F32,
386 "f64" => ZigSyntaxKind::F64,
387 "f80" => ZigSyntaxKind::F80,
388 "f128" => ZigSyntaxKind::F128,
389 "c_short" => ZigSyntaxKind::CShort,
390 "c_ushort" => ZigSyntaxKind::CUshort,
391 "c_int" => ZigSyntaxKind::CInt,
392 "c_uint" => ZigSyntaxKind::CUint,
393 "c_long" => ZigSyntaxKind::CLong,
394 "c_ulong" => ZigSyntaxKind::CUlong,
395 "c_longlong" => ZigSyntaxKind::CLongLong,
396 "c_ulonglong" => ZigSyntaxKind::CUlongLong,
397 "c_longdouble" => ZigSyntaxKind::CLongDouble,
398 "c_void" => ZigSyntaxKind::CVoid,
399 "void" => ZigSyntaxKind::Void,
400 "comptime_int" => ZigSyntaxKind::ComptimeInt,
401 "comptime_float" => ZigSyntaxKind::ComptimeFloat,
402
403 "true" | "false" => ZigSyntaxKind::BooleanLiteral,
405
406 _ => ZigSyntaxKind::Identifier,
407 }
408 }
409
410 fn lex_builtin<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
412 let start = state.get_position();
413 if state.current() == Some('@') {
414 state.advance(1);
415 if let Some(ch) = state.current() {
416 if ch.is_ascii_alphabetic() || ch == '_' {
417 state.advance(ch.len_utf8());
418 state.take_while(|c| c.is_ascii_alphanumeric() || c == '_');
419 state.add_token(ZigSyntaxKind::BuiltinIdentifier, start, state.get_position());
420 return true;
421 }
422 }
423 }
424 false
425 }
426
427 fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
429 let start = state.get_position();
430 let rest = state.rest();
431
432 let ops = [
434 ("<<=", ZigSyntaxKind::LessLessAssign),
435 (">>=", ZigSyntaxKind::GreaterGreaterAssign),
436 ("...", ZigSyntaxKind::DotDotDot),
437 ("==", ZigSyntaxKind::Equal),
438 ("!=", ZigSyntaxKind::NotEqual),
439 ("<=", ZigSyntaxKind::LessEqual),
440 (">=", ZigSyntaxKind::GreaterEqual),
441 ("&&", ZigSyntaxKind::AndAnd),
442 ("||", ZigSyntaxKind::OrOr),
443 ("+=", ZigSyntaxKind::PlusAssign),
444 ("-=", ZigSyntaxKind::MinusAssign),
445 ("*=", ZigSyntaxKind::StarAssign),
446 ("/=", ZigSyntaxKind::SlashAssign),
447 ("%=", ZigSyntaxKind::PercentAssign),
448 ("&=", ZigSyntaxKind::AmpersandAssign),
449 ("|=", ZigSyntaxKind::PipeAssign),
450 ("^=", ZigSyntaxKind::CaretAssign),
451 ("++", ZigSyntaxKind::PlusPlus),
452 ("--", ZigSyntaxKind::MinusMinus),
453 ("**", ZigSyntaxKind::StarStar),
454 ("->", ZigSyntaxKind::Arrow),
455 ("=>", ZigSyntaxKind::FatArrow),
456 ("<<", ZigSyntaxKind::LessLess),
457 (">>", ZigSyntaxKind::GreaterGreater),
458 (".?", ZigSyntaxKind::DotQuestion),
459 (".*", ZigSyntaxKind::DotStar),
460 ];
461
462 for (op, kind) in ops {
463 if rest.starts_with(op) {
464 state.advance(op.len());
465 state.add_token(kind, start, state.get_position());
466 return true;
467 }
468 }
469
470 false
471 }
472
473 fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
475 let start = state.get_position();
476 if let Some(ch) = state.current() {
477 let kind = match ch {
478 '(' => ZigSyntaxKind::LeftParen,
479 ')' => ZigSyntaxKind::RightParen,
480 '{' => ZigSyntaxKind::LeftBrace,
481 '}' => ZigSyntaxKind::RightBrace,
482 '[' => ZigSyntaxKind::LeftBracket,
483 ']' => ZigSyntaxKind::RightBracket,
484 ',' => ZigSyntaxKind::Comma,
485 '.' => ZigSyntaxKind::Dot,
486 ':' => ZigSyntaxKind::Colon,
487 ';' => ZigSyntaxKind::Semicolon,
488 '+' => ZigSyntaxKind::Plus,
489 '-' => ZigSyntaxKind::Minus,
490 '*' => ZigSyntaxKind::Star,
491 '/' => ZigSyntaxKind::Slash,
492 '%' => ZigSyntaxKind::Percent,
493 '&' => ZigSyntaxKind::Ampersand,
494 '|' => ZigSyntaxKind::Pipe,
495 '^' => ZigSyntaxKind::Caret,
496 '~' => ZigSyntaxKind::Tilde,
497 '!' => ZigSyntaxKind::Exclamation,
498 '?' => ZigSyntaxKind::Question,
499 '<' => ZigSyntaxKind::Less,
500 '>' => ZigSyntaxKind::Greater,
501 '=' => ZigSyntaxKind::Assign,
502 _ => return false,
503 };
504 state.advance(1);
505 state.add_token(kind, start, state.get_position());
506 return true;
507 }
508 false
509 }
510}