1use crate::{kind::ZigSyntaxKind, language::ZigLanguage};
2use oak_core::{
3 Lexer, LexerCache, LexerState, OakError,
4 lexer::{LexOutput, WhitespaceConfig},
5 source::Source,
6};
7use std::sync::LazyLock;
8
9type State<'a, S> = LexerState<'a, S, ZigLanguage>;
10
11static ZIG_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12
13#[derive(Clone)]
14pub struct ZigLexer;
15
16impl Lexer<ZigLanguage> for ZigLexer {
17 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<ZigLanguage>) -> LexOutput<ZigLanguage> {
18 let mut state = LexerState::new(source);
19 let result = self.run(&mut state);
20 if result.is_ok() {
21 state.add_eof();
22 }
23 state.finish_with_cache(result, cache)
24 }
25}
26
27impl ZigLexer {
28 pub fn new(_config: &ZigLanguage) -> Self {
29 Self
30 }
31
32 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
34 while state.not_at_end() {
35 let safe_point = state.get_position();
36
37 if self.skip_whitespace(state) {
38 continue;
39 }
40
41 if self.skip_comment(state) {
42 continue;
43 }
44
45 if self.lex_string_literal(state) {
46 continue;
47 }
48
49 if self.lex_char_literal(state) {
50 continue;
51 }
52
53 if self.lex_number_literal(state) {
54 continue;
55 }
56
57 if self.lex_identifier_or_keyword(state) {
58 continue;
59 }
60
61 if self.lex_builtin(state) {
62 continue;
63 }
64
65 if self.lex_operators(state) {
66 continue;
67 }
68
69 if self.lex_single_char_tokens(state) {
70 continue;
71 }
72
73 let start_pos = state.get_position();
75 if let Some(ch) = state.peek() {
76 state.advance(ch.len_utf8());
77 state.add_token(ZigSyntaxKind::Error, start_pos, state.get_position());
78 }
79
80 state.advance_if_dead_lock(safe_point);
81 }
82
83 Ok(())
84 }
85
86 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
88 ZIG_WHITESPACE.scan(state, ZigSyntaxKind::Whitespace)
89 }
90
91 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
93 let start = state.get_position();
94 let rest = state.rest();
95
96 if rest.starts_with("//") {
98 state.advance(2);
99
100 let is_doc_comment = if state.peek() == Some('/') {
102 state.advance(1);
103 true
104 }
105 else {
106 false
107 };
108
109 while let Some(ch) = state.peek() {
110 if ch == '\n' || ch == '\r' {
111 break;
112 }
113 state.advance(ch.len_utf8());
114 }
115
116 let kind = if is_doc_comment { ZigSyntaxKind::DocComment } else { ZigSyntaxKind::Comment };
117 state.add_token(kind, start, state.get_position());
118 return true;
119 }
120
121 false
122 }
123
124 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
126 let start = state.get_position();
127
128 if state.rest().starts_with("\\\\") {
130 state.advance(2);
131
132 while let Some(ch) = state.peek() {
134 if ch == '\n' {
135 state.advance(1);
136 break;
137 }
138 state.advance(ch.len_utf8());
139 }
140
141 while state.not_at_end() {
143 let _line_start = state.get_position();
144
145 if !state.rest().starts_with("\\\\") {
147 break;
148 }
149
150 state.advance(2);
151
152 while let Some(ch) = state.peek() {
154 if ch == '\n' {
155 state.advance(1);
156 break;
157 }
158 state.advance(ch.len_utf8());
159 }
160 }
161
162 state.add_token(ZigSyntaxKind::StringLiteral, start, state.get_position());
163 return true;
164 }
165
166 if state.current() == Some('"') {
168 state.advance(1);
169 while let Some(ch) = state.peek() {
170 if ch == '"' {
171 state.advance(1);
172 break;
173 }
174 if ch == '\\' {
175 state.advance(1);
176 if let Some(next) = state.peek() {
177 state.advance(next.len_utf8());
178 }
179 continue;
180 }
181 state.advance(ch.len_utf8());
182 }
183 state.add_token(ZigSyntaxKind::StringLiteral, start, state.get_position());
184 return true;
185 }
186
187 false
188 }
189
190 fn lex_char_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
192 let start = state.get_position();
193 if state.current() == Some('\'') {
194 state.advance(1);
195 while let Some(ch) = state.peek() {
196 if ch == '\'' {
197 state.advance(1);
198 break;
199 }
200 if ch == '\\' {
201 state.advance(1);
202 if let Some(next) = state.peek() {
203 state.advance(next.len_utf8());
204 }
205 continue;
206 }
207 state.advance(ch.len_utf8());
208 }
209 state.add_token(ZigSyntaxKind::CharLiteral, start, state.get_position());
210 return true;
211 }
212 false
213 }
214
215 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
217 let start = state.get_position();
218 let ch = state.current();
219 let mut is_float = false;
220
221 if let Some(ch) = ch {
222 if ch.is_ascii_digit() {
223 state.advance(1);
224 if ch == '0' {
226 if let Some(next) = state.peek() {
227 match next {
228 'x' | 'X' => {
229 state.advance(1);
230 state.take_while(|c| c.is_ascii_hexdigit() || c == '_');
231 }
232 'b' | 'B' => {
233 state.advance(1);
234 state.take_while(|c| c == '0' || c == '1' || c == '_');
235 }
236 'o' | 'O' => {
237 state.advance(1);
238 state.take_while(|c| ('0'..='7').contains(&c) || c == '_');
239 }
240 _ => {
241 state.take_while(|c| c.is_ascii_digit() || c == '_');
242 }
243 }
244 }
245 }
246 else {
247 state.take_while(|c| c.is_ascii_digit() || c == '_');
248 }
249
250 if state.current() == Some('.') {
252 if let Some(next) = state.peek() {
253 if next.is_ascii_digit() {
254 is_float = true;
255 state.advance(1);
256 state.take_while(|c| c.is_ascii_digit() || c == '_');
257 }
258 }
259 }
260
261 if let Some(c) = state.current() {
263 if c == 'e' || c == 'E' || c == 'p' || c == 'P' {
264 is_float = true;
265 state.advance(1);
266 if let Some(next) = state.peek() {
267 if next == '+' || next == '-' {
268 state.advance(1);
269 }
270 }
271 state.take_while(|c| c.is_ascii_digit() || c == '_');
272 }
273 }
274
275 let kind = if is_float { ZigSyntaxKind::FloatLiteral } else { ZigSyntaxKind::IntegerLiteral };
276 state.add_token(kind, start, state.get_position());
277 return true;
278 }
279 }
280 false
281 }
282
283 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
285 let start = state.get_position();
286 if let Some(ch) = state.current() {
287 if ch.is_ascii_alphabetic() || ch == '_' {
288 state.advance(ch.len_utf8());
289 state.take_while(|c| c.is_ascii_alphanumeric() || c == '_');
290
291 let end = state.get_position();
292 let text = state.get_text_in((start..end).into());
293 let kind = self.get_keyword_or_identifier(&text);
294 state.add_token(kind, start, state.get_position());
295 return true;
296 }
297 }
298 false
299 }
300
301 fn get_keyword_or_identifier(&self, text: &str) -> ZigSyntaxKind {
303 match text {
304 "const" => ZigSyntaxKind::Const,
306 "var" => ZigSyntaxKind::Var,
307 "fn" => ZigSyntaxKind::Fn,
308 "struct" => ZigSyntaxKind::Struct,
309 "union" => ZigSyntaxKind::Union,
310 "enum" => ZigSyntaxKind::Enum,
311 "opaque" => ZigSyntaxKind::Opaque,
312 "type" => ZigSyntaxKind::Type,
313 "comptime" => ZigSyntaxKind::Comptime,
314 "inline" => ZigSyntaxKind::Inline,
315 "noinline" => ZigSyntaxKind::NoInline,
316 "pub" => ZigSyntaxKind::Pub,
317 "export" => ZigSyntaxKind::Export,
318 "extern" => ZigSyntaxKind::Extern,
319 "packed" => ZigSyntaxKind::Packed,
320 "align" => ZigSyntaxKind::Align,
321 "callconv" => ZigSyntaxKind::CallConv,
322 "linksection" => ZigSyntaxKind::LinkSection,
323
324 "if" => ZigSyntaxKind::If,
326 "else" => ZigSyntaxKind::Else,
327 "switch" => ZigSyntaxKind::Switch,
328 "while" => ZigSyntaxKind::While,
329 "for" => ZigSyntaxKind::For,
330 "break" => ZigSyntaxKind::Break,
331 "continue" => ZigSyntaxKind::Continue,
332 "return" => ZigSyntaxKind::Return,
333 "defer" => ZigSyntaxKind::Defer,
334 "errdefer" => ZigSyntaxKind::ErrDefer,
335 "unreachable" => ZigSyntaxKind::Unreachable,
336 "noreturn" => ZigSyntaxKind::NoReturn,
337
338 "try" => ZigSyntaxKind::TryKeyword,
340 "catch" => ZigSyntaxKind::CatchKeyword,
341 "orelse" => ZigSyntaxKind::OrElse,
342 "error" => ZigSyntaxKind::ErrorKeyword,
343
344 "test" => ZigSyntaxKind::Test,
346 "async" => ZigSyntaxKind::Async,
347 "await" => ZigSyntaxKind::AwaitKeyword,
348 "suspend" => ZigSyntaxKind::Suspend,
349 "resume" => ZigSyntaxKind::Resume,
350 "cancel" => ZigSyntaxKind::Cancel,
351
352 "undefined" => ZigSyntaxKind::Undefined,
354 "null" => ZigSyntaxKind::Null,
355 "volatile" => ZigSyntaxKind::Volatile,
356 "allowzero" => ZigSyntaxKind::AllowZero,
357 "noalias" => ZigSyntaxKind::NoAlias,
358
359 "and" => ZigSyntaxKind::And,
361 "or" => ZigSyntaxKind::Or,
362
363 "anyframe" => ZigSyntaxKind::AnyFrame,
365 "anytype" => ZigSyntaxKind::AnyType,
366 "threadlocal" => ZigSyntaxKind::ThreadLocal,
367
368 "bool" => ZigSyntaxKind::Bool,
370 "i8" => ZigSyntaxKind::I8,
371 "i16" => ZigSyntaxKind::I16,
372 "i32" => ZigSyntaxKind::I32,
373 "i64" => ZigSyntaxKind::I64,
374 "i128" => ZigSyntaxKind::I128,
375 "isize" => ZigSyntaxKind::Isize,
376 "u8" => ZigSyntaxKind::U8,
377 "u16" => ZigSyntaxKind::U16,
378 "u32" => ZigSyntaxKind::U32,
379 "u64" => ZigSyntaxKind::U64,
380 "u128" => ZigSyntaxKind::U128,
381 "usize" => ZigSyntaxKind::Usize,
382 "f16" => ZigSyntaxKind::F16,
383 "f32" => ZigSyntaxKind::F32,
384 "f64" => ZigSyntaxKind::F64,
385 "f80" => ZigSyntaxKind::F80,
386 "f128" => ZigSyntaxKind::F128,
387 "c_short" => ZigSyntaxKind::CShort,
388 "c_ushort" => ZigSyntaxKind::CUshort,
389 "c_int" => ZigSyntaxKind::CInt,
390 "c_uint" => ZigSyntaxKind::CUint,
391 "c_long" => ZigSyntaxKind::CLong,
392 "c_ulong" => ZigSyntaxKind::CUlong,
393 "c_longlong" => ZigSyntaxKind::CLongLong,
394 "c_ulonglong" => ZigSyntaxKind::CUlongLong,
395 "c_longdouble" => ZigSyntaxKind::CLongDouble,
396 "c_void" => ZigSyntaxKind::CVoid,
397 "void" => ZigSyntaxKind::Void,
398 "comptime_int" => ZigSyntaxKind::ComptimeInt,
399 "comptime_float" => ZigSyntaxKind::ComptimeFloat,
400
401 "true" | "false" => ZigSyntaxKind::BooleanLiteral,
403
404 _ => ZigSyntaxKind::Identifier,
405 }
406 }
407
408 fn lex_builtin<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
410 let start = state.get_position();
411 if state.current() == Some('@') {
412 state.advance(1);
413 if let Some(ch) = state.current() {
414 if ch.is_ascii_alphabetic() || ch == '_' {
415 state.advance(ch.len_utf8());
416 state.take_while(|c| c.is_ascii_alphanumeric() || c == '_');
417 state.add_token(ZigSyntaxKind::BuiltinIdentifier, start, state.get_position());
418 return true;
419 }
420 }
421 }
422 false
423 }
424
425 fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
427 let start = state.get_position();
428 let rest = state.rest();
429
430 let ops = [
432 ("<<=", ZigSyntaxKind::LessLessAssign),
433 (">>=", ZigSyntaxKind::GreaterGreaterAssign),
434 ("...", ZigSyntaxKind::DotDotDot),
435 ("==", ZigSyntaxKind::Equal),
436 ("!=", ZigSyntaxKind::NotEqual),
437 ("<=", ZigSyntaxKind::LessEqual),
438 (">=", ZigSyntaxKind::GreaterEqual),
439 ("&&", ZigSyntaxKind::AndAnd),
440 ("||", ZigSyntaxKind::OrOr),
441 ("+=", ZigSyntaxKind::PlusAssign),
442 ("-=", ZigSyntaxKind::MinusAssign),
443 ("*=", ZigSyntaxKind::StarAssign),
444 ("/=", ZigSyntaxKind::SlashAssign),
445 ("%=", ZigSyntaxKind::PercentAssign),
446 ("&=", ZigSyntaxKind::AmpersandAssign),
447 ("|=", ZigSyntaxKind::PipeAssign),
448 ("^=", ZigSyntaxKind::CaretAssign),
449 ("++", ZigSyntaxKind::PlusPlus),
450 ("--", ZigSyntaxKind::MinusMinus),
451 ("**", ZigSyntaxKind::StarStar),
452 ("->", ZigSyntaxKind::Arrow),
453 ("=>", ZigSyntaxKind::FatArrow),
454 ("<<", ZigSyntaxKind::LessLess),
455 (">>", ZigSyntaxKind::GreaterGreater),
456 (".?", ZigSyntaxKind::DotQuestion),
457 (".*", ZigSyntaxKind::DotStar),
458 ];
459
460 for (op, kind) in ops {
461 if rest.starts_with(op) {
462 state.advance(op.len());
463 state.add_token(kind, start, state.get_position());
464 return true;
465 }
466 }
467
468 false
469 }
470
471 fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
473 let start = state.get_position();
474 if let Some(ch) = state.current() {
475 let kind = match ch {
476 '(' => ZigSyntaxKind::LeftParen,
477 ')' => ZigSyntaxKind::RightParen,
478 '{' => ZigSyntaxKind::LeftBrace,
479 '}' => ZigSyntaxKind::RightBrace,
480 '[' => ZigSyntaxKind::LeftBracket,
481 ']' => ZigSyntaxKind::RightBracket,
482 ',' => ZigSyntaxKind::Comma,
483 '.' => ZigSyntaxKind::Dot,
484 ':' => ZigSyntaxKind::Colon,
485 ';' => ZigSyntaxKind::Semicolon,
486 '+' => ZigSyntaxKind::Plus,
487 '-' => ZigSyntaxKind::Minus,
488 '*' => ZigSyntaxKind::Star,
489 '/' => ZigSyntaxKind::Slash,
490 '%' => ZigSyntaxKind::Percent,
491 '&' => ZigSyntaxKind::Ampersand,
492 '|' => ZigSyntaxKind::Pipe,
493 '^' => ZigSyntaxKind::Caret,
494 '~' => ZigSyntaxKind::Tilde,
495 '!' => ZigSyntaxKind::Exclamation,
496 '?' => ZigSyntaxKind::Question,
497 '<' => ZigSyntaxKind::Less,
498 '>' => ZigSyntaxKind::Greater,
499 '=' => ZigSyntaxKind::Assign,
500 _ => return false,
501 };
502 state.advance(1);
503 state.add_token(kind, start, state.get_position());
504 return true;
505 }
506 false
507 }
508}