1use crate::{kind::ZigSyntaxKind, language::ZigLanguage};
2use oak_core::{
3 IncrementalCache, Lexer, LexerState, OakError,
4 lexer::{CommentLine, LexOutput, StringConfig, WhitespaceConfig},
5 source::Source,
6};
7use std::sync::LazyLock;
8
9type State<S> = LexerState<S, ZigLanguage>;
10
11static ZIG_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12static ZIG_COMMENT: LazyLock<CommentLine> = LazyLock::new(|| CommentLine { line_markers: &["//"] });
13static ZIG_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
14static ZIG_CHAR: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['\''], escape: Some('\\') });
15
16#[derive(Clone)]
17pub struct ZigLexer<'config> {
18 config: &'config ZigLanguage,
19}
20
21impl<'config> Lexer<ZigLanguage> for ZigLexer<'config> {
22 fn lex_incremental(
23 &self,
24 source: impl Source,
25 changed: usize,
26 cache: IncrementalCache<ZigLanguage>,
27 ) -> LexOutput<ZigLanguage> {
28 let mut state = LexerState::new_with_cache(source, changed, cache);
29 let result = self.run(&mut state);
30 state.finish(result)
31 }
32}
33
34impl<'config> ZigLexer<'config> {
35 pub fn new(config: &'config ZigLanguage) -> Self {
36 Self { config }
37 }
38
39 fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
41 while state.not_at_end() {
42 let safe_point = state.get_position();
43
44 if self.skip_whitespace(state) {
45 continue;
46 }
47
48 if self.skip_comment(state) {
49 continue;
50 }
51
52 if self.lex_string_literal(state) {
53 continue;
54 }
55
56 if self.lex_char_literal(state) {
57 continue;
58 }
59
60 if self.lex_number_literal(state) {
61 continue;
62 }
63
64 if self.lex_identifier_or_keyword(state) {
65 continue;
66 }
67
68 if self.lex_builtin(state) {
69 continue;
70 }
71
72 if self.lex_operators(state) {
73 continue;
74 }
75
76 if self.lex_single_char_tokens(state) {
77 continue;
78 }
79
80 state.safe_check(safe_point);
81 }
82
83 let eof_pos = state.get_position();
85 state.add_token(ZigSyntaxKind::Eof, eof_pos, eof_pos);
86 Ok(())
87 }
88
89 fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
91 match ZIG_WHITESPACE.scan(state.rest(), state.get_position(), ZigSyntaxKind::Whitespace) {
92 Some(token) => {
93 state.advance_with(token);
94 return true;
95 }
96 None => {}
97 }
98 false
99 }
100
101 fn skip_comment<S: Source>(&self, state: &mut State<S>) -> bool {
103 let start = state.get_position();
104 let rest = state.rest();
105
106 if rest.starts_with("//") {
108 state.advance(2);
109
110 let is_doc_comment = if state.peek() == Some('/') {
112 state.advance(1);
113 true
114 }
115 else {
116 false
117 };
118
119 while let Some(ch) = state.peek() {
120 if ch == '\n' || ch == '\r' {
121 break;
122 }
123 state.advance(ch.len_utf8());
124 }
125
126 let kind = if is_doc_comment { ZigSyntaxKind::DocComment } else { ZigSyntaxKind::Comment };
127 state.add_token(kind, start, state.get_position());
128 return true;
129 }
130
131 false
132 }
133
134 fn lex_string_literal<S: Source>(&self, state: &mut State<S>) -> bool {
136 let start = state.get_position();
137
138 if state.rest().starts_with("\\\\") {
140 state.advance(2);
141
142 while let Some(ch) = state.peek() {
144 if ch == '\n' {
145 state.advance(1);
146 break;
147 }
148 state.advance(ch.len_utf8());
149 }
150
151 while state.not_at_end() {
153 let _line_start = state.get_position();
154
155 if !state.rest().starts_with("\\\\") {
157 break;
158 }
159
160 state.advance(2);
161
162 while let Some(ch) = state.peek() {
164 if ch == '\n' {
165 state.advance(1);
166 break;
167 }
168 state.advance(ch.len_utf8());
169 }
170 }
171
172 state.add_token(ZigSyntaxKind::StringLiteral, start, state.get_position());
173 return true;
174 }
175
176 if state.current() == Some('"') {
178 state.advance(1);
179 let mut escaped = false;
180
181 while let Some(ch) = state.peek() {
182 if ch == '"' && !escaped {
183 state.advance(1); break;
185 }
186
187 state.advance(ch.len_utf8());
188
189 if escaped {
190 escaped = false;
191 continue;
192 }
193
194 if ch == '\\' {
195 escaped = true;
196 continue;
197 }
198
199 if ch == '\n' || ch == '\r' {
200 break;
201 }
202 }
203
204 state.add_token(ZigSyntaxKind::StringLiteral, start, state.get_position());
205 return true;
206 }
207
208 false
209 }
210
211 fn lex_char_literal<S: Source>(&self, state: &mut State<S>) -> bool {
213 let start = state.get_position();
214
215 if state.current() != Some('\'') {
216 return false;
217 }
218
219 state.advance(1); if let Some('\\') = state.peek() {
222 state.advance(1);
223 if let Some(c) = state.peek() {
224 state.advance(c.len_utf8());
225 }
226 }
227 else if let Some(c) = state.peek() {
228 state.advance(c.len_utf8());
229 }
230 else {
231 state.set_position(start);
232 return false;
233 }
234
235 if state.peek() == Some('\'') {
236 state.advance(1);
237 state.add_token(ZigSyntaxKind::CharLiteral, start, state.get_position());
238 return true;
239 }
240
241 state.set_position(start);
242 false
243 }
244
245 fn lex_number_literal<S: Source>(&self, state: &mut State<S>) -> bool {
247 let start = state.get_position();
248 let first = match state.current() {
249 Some(c) => c,
250 None => return false,
251 };
252
253 if !first.is_ascii_digit() {
254 return false;
255 }
256
257 let mut is_float = false;
258
259 if first == '0' {
261 match state.peek_next_n(1) {
262 Some('x') | Some('X') => {
263 state.advance(2);
264 while let Some(c) = state.peek() {
265 if c.is_ascii_hexdigit() || c == '_' {
266 state.advance(1);
267 }
268 else {
269 break;
270 }
271 }
272 }
273 Some('b') | Some('B') => {
274 state.advance(2);
275 while let Some(c) = state.peek() {
276 if c == '0' || c == '1' || c == '_' {
277 state.advance(1);
278 }
279 else {
280 break;
281 }
282 }
283 }
284 Some('o') | Some('O') => {
285 state.advance(2);
286 while let Some(c) = state.peek() {
287 if ('0'..='7').contains(&c) || c == '_' {
288 state.advance(1);
289 }
290 else {
291 break;
292 }
293 }
294 }
295 _ => {
296 state.advance(1);
297 while let Some(c) = state.peek() {
298 if c.is_ascii_digit() || c == '_' {
299 state.advance(1);
300 }
301 else {
302 break;
303 }
304 }
305 }
306 }
307 }
308 else {
309 state.advance(1);
310 while let Some(c) = state.peek() {
311 if c.is_ascii_digit() || c == '_' {
312 state.advance(1);
313 }
314 else {
315 break;
316 }
317 }
318 }
319
320 if state.peek() == Some('.') {
322 let n1 = state.peek_next_n(1);
323 if n1.map(|c| c.is_ascii_digit()).unwrap_or(false) {
324 is_float = true;
325 state.advance(1); while let Some(c) = state.peek() {
327 if c.is_ascii_digit() || c == '_' {
328 state.advance(1);
329 }
330 else {
331 break;
332 }
333 }
334 }
335 }
336
337 if let Some(c) = state.peek() {
339 if c == 'e' || c == 'E' {
340 let n1 = state.peek_next_n(1);
341 if n1 == Some('+') || n1 == Some('-') || n1.map(|d| d.is_ascii_digit()).unwrap_or(false) {
342 is_float = true;
343 state.advance(1);
344 if let Some(sign) = state.peek() {
345 if sign == '+' || sign == '-' {
346 state.advance(1);
347 }
348 }
349 while let Some(d) = state.peek() {
350 if d.is_ascii_digit() || d == '_' {
351 state.advance(1);
352 }
353 else {
354 break;
355 }
356 }
357 }
358 }
359 }
360
361 let end = state.get_position();
362 state.add_token(if is_float { ZigSyntaxKind::FloatLiteral } else { ZigSyntaxKind::IntegerLiteral }, start, end);
363 true
364 }
365
366 fn lex_identifier_or_keyword<S: Source>(&self, state: &mut State<S>) -> bool {
368 let start = state.get_position();
369 let ch = match state.current() {
370 Some(c) => c,
371 None => return false,
372 };
373
374 if !(ch.is_ascii_alphabetic() || ch == '_') {
375 return false;
376 }
377
378 state.advance(1);
379 while let Some(c) = state.current() {
380 if c.is_ascii_alphanumeric() || c == '_' {
381 state.advance(1);
382 }
383 else {
384 break;
385 }
386 }
387
388 let end = state.get_position();
389 let text = state.get_text_in((start..end).into());
390 let kind = self.get_keyword_or_identifier(text);
391 state.add_token(kind, start, state.get_position());
392 true
393 }
394
395 fn get_keyword_or_identifier(&self, text: &str) -> ZigSyntaxKind {
397 match text {
398 "const" => ZigSyntaxKind::Const,
400 "var" => ZigSyntaxKind::Var,
401 "fn" => ZigSyntaxKind::Fn,
402 "struct" => ZigSyntaxKind::Struct,
403 "union" => ZigSyntaxKind::Union,
404 "enum" => ZigSyntaxKind::Enum,
405 "opaque" => ZigSyntaxKind::Opaque,
406 "type" => ZigSyntaxKind::Type,
407 "comptime" => ZigSyntaxKind::Comptime,
408 "inline" => ZigSyntaxKind::Inline,
409 "noinline" => ZigSyntaxKind::NoInline,
410 "pub" => ZigSyntaxKind::Pub,
411 "export" => ZigSyntaxKind::Export,
412 "extern" => ZigSyntaxKind::Extern,
413 "packed" => ZigSyntaxKind::Packed,
414 "align" => ZigSyntaxKind::Align,
415 "callconv" => ZigSyntaxKind::CallConv,
416 "linksection" => ZigSyntaxKind::LinkSection,
417
418 "if" => ZigSyntaxKind::If,
420 "else" => ZigSyntaxKind::Else,
421 "switch" => ZigSyntaxKind::Switch,
422 "while" => ZigSyntaxKind::While,
423 "for" => ZigSyntaxKind::For,
424 "break" => ZigSyntaxKind::Break,
425 "continue" => ZigSyntaxKind::Continue,
426 "return" => ZigSyntaxKind::Return,
427 "defer" => ZigSyntaxKind::Defer,
428 "errdefer" => ZigSyntaxKind::ErrDefer,
429 "unreachable" => ZigSyntaxKind::Unreachable,
430 "noreturn" => ZigSyntaxKind::NoReturn,
431
432 "try" => ZigSyntaxKind::TryKeyword,
434 "catch" => ZigSyntaxKind::CatchKeyword,
435 "orelse" => ZigSyntaxKind::OrElse,
436 "error" => ZigSyntaxKind::ErrorKeyword,
437
438 "test" => ZigSyntaxKind::Test,
440 "async" => ZigSyntaxKind::Async,
441 "await" => ZigSyntaxKind::AwaitKeyword,
442 "suspend" => ZigSyntaxKind::Suspend,
443 "resume" => ZigSyntaxKind::Resume,
444 "cancel" => ZigSyntaxKind::Cancel,
445
446 "undefined" => ZigSyntaxKind::Undefined,
448 "null" => ZigSyntaxKind::Null,
449 "volatile" => ZigSyntaxKind::Volatile,
450 "allowzero" => ZigSyntaxKind::AllowZero,
451 "noalias" => ZigSyntaxKind::NoAlias,
452
453 "and" => ZigSyntaxKind::And,
455 "or" => ZigSyntaxKind::Or,
456
457 "anyframe" => ZigSyntaxKind::AnyFrame,
459 "anytype" => ZigSyntaxKind::AnyType,
460 "threadlocal" => ZigSyntaxKind::ThreadLocal,
461
462 "bool" => ZigSyntaxKind::Bool,
464 "i8" => ZigSyntaxKind::I8,
465 "i16" => ZigSyntaxKind::I16,
466 "i32" => ZigSyntaxKind::I32,
467 "i64" => ZigSyntaxKind::I64,
468 "i128" => ZigSyntaxKind::I128,
469 "isize" => ZigSyntaxKind::Isize,
470 "u8" => ZigSyntaxKind::U8,
471 "u16" => ZigSyntaxKind::U16,
472 "u32" => ZigSyntaxKind::U32,
473 "u64" => ZigSyntaxKind::U64,
474 "u128" => ZigSyntaxKind::U128,
475 "usize" => ZigSyntaxKind::Usize,
476 "f16" => ZigSyntaxKind::F16,
477 "f32" => ZigSyntaxKind::F32,
478 "f64" => ZigSyntaxKind::F64,
479 "f80" => ZigSyntaxKind::F80,
480 "f128" => ZigSyntaxKind::F128,
481 "c_short" => ZigSyntaxKind::C_Short,
482 "c_ushort" => ZigSyntaxKind::C_UShort,
483 "c_int" => ZigSyntaxKind::C_Int,
484 "c_uint" => ZigSyntaxKind::C_UInt,
485 "c_long" => ZigSyntaxKind::C_Long,
486 "c_ulong" => ZigSyntaxKind::C_ULong,
487 "c_longlong" => ZigSyntaxKind::C_LongLong,
488 "c_ulonglong" => ZigSyntaxKind::C_ULongLong,
489 "c_longdouble" => ZigSyntaxKind::C_LongDouble,
490 "c_void" => ZigSyntaxKind::C_Void,
491 "void" => ZigSyntaxKind::Void,
492 "comptime_int" => ZigSyntaxKind::Comptime_Int,
493 "comptime_float" => ZigSyntaxKind::Comptime_Float,
494
495 "true" | "false" => ZigSyntaxKind::BooleanLiteral,
497
498 _ => ZigSyntaxKind::Identifier,
499 }
500 }
501
502 fn lex_builtin<S: Source>(&self, state: &mut State<S>) -> bool {
504 let start = state.get_position();
505
506 if state.current() != Some('@') {
507 return false;
508 }
509
510 state.advance(1); while let Some(c) = state.peek() {
514 if c.is_ascii_alphanumeric() || c == '_' {
515 state.advance(1);
516 }
517 else {
518 break;
519 }
520 }
521
522 state.add_token(ZigSyntaxKind::At, start, state.get_position());
523 true
524 }
525
526 fn lex_operators<S: Source>(&self, state: &mut State<S>) -> bool {
528 let start = state.get_position();
529 let rest = state.rest();
530
531 let patterns: &[(&str, ZigSyntaxKind)] = &[
533 ("**", ZigSyntaxKind::StarStar),
534 ("+%", ZigSyntaxKind::PlusPercent),
535 ("-%", ZigSyntaxKind::MinusPercent),
536 ("*%", ZigSyntaxKind::StarPercent),
537 ("++", ZigSyntaxKind::PlusPlus),
538 ("<<", ZigSyntaxKind::LessLess),
539 (">>", ZigSyntaxKind::GreaterGreater),
540 ("==", ZigSyntaxKind::Equal),
541 ("!=", ZigSyntaxKind::NotEqual),
542 ("<=", ZigSyntaxKind::LessEqual),
543 (">=", ZigSyntaxKind::GreaterEqual),
544 ("+=", ZigSyntaxKind::PlusAssign),
545 ("-=", ZigSyntaxKind::MinusAssign),
546 ("*=", ZigSyntaxKind::StarAssign),
547 ("/=", ZigSyntaxKind::SlashAssign),
548 ("%=", ZigSyntaxKind::PercentAssign),
549 ("&=", ZigSyntaxKind::AmpersandAssign),
550 ("|=", ZigSyntaxKind::PipeAssign),
551 ("^=", ZigSyntaxKind::CaretAssign),
552 ("<<=", ZigSyntaxKind::LessLessAssign),
553 (">>=", ZigSyntaxKind::GreaterGreaterAssign),
554 ("...", ZigSyntaxKind::DotDotDot),
555 ("..", ZigSyntaxKind::DotDot),
556 ("=>", ZigSyntaxKind::FatArrow),
557 ];
558
559 for (pat, kind) in patterns {
560 if rest.starts_with(pat) {
561 state.advance(pat.len());
562 state.add_token(*kind, start, state.get_position());
563 return true;
564 }
565 }
566
567 if let Some(ch) = state.current() {
569 let kind = match ch {
570 '+' => Some(ZigSyntaxKind::Plus),
571 '-' => Some(ZigSyntaxKind::Minus),
572 '*' => Some(ZigSyntaxKind::Star),
573 '/' => Some(ZigSyntaxKind::Slash),
574 '%' => Some(ZigSyntaxKind::Percent),
575 '&' => Some(ZigSyntaxKind::Ampersand),
576 '|' => Some(ZigSyntaxKind::Pipe),
577 '^' => Some(ZigSyntaxKind::Caret),
578 '~' => Some(ZigSyntaxKind::Tilde),
579 '=' => Some(ZigSyntaxKind::Assign),
580 '<' => Some(ZigSyntaxKind::Less),
581 '>' => Some(ZigSyntaxKind::Greater),
582 '.' => Some(ZigSyntaxKind::Dot),
583 '!' => Some(ZigSyntaxKind::Exclamation),
584 '?' => Some(ZigSyntaxKind::Question),
585 _ => None,
586 };
587
588 if let Some(k) = kind {
589 state.advance(ch.len_utf8());
590 state.add_token(k, start, state.get_position());
591 return true;
592 }
593 }
594
595 false
596 }
597
598 fn lex_single_char_tokens<S: Source>(&self, state: &mut State<S>) -> bool {
600 let start = state.get_position();
601
602 if let Some(ch) = state.current() {
603 let kind = match ch {
604 '(' => ZigSyntaxKind::LeftParen,
605 ')' => ZigSyntaxKind::RightParen,
606 '{' => ZigSyntaxKind::LeftBrace,
607 '}' => ZigSyntaxKind::RightBrace,
608 '[' => ZigSyntaxKind::LeftBracket,
609 ']' => ZigSyntaxKind::RightBracket,
610 ',' => ZigSyntaxKind::Comma,
611 ';' => ZigSyntaxKind::Semicolon,
612 ':' => ZigSyntaxKind::Colon,
613 _ => return false,
614 };
615
616 state.advance(ch.len_utf8());
617 state.add_token(kind, start, state.get_position());
618 return true;
619 }
620
621 false
622 }
623}