1#![doc = include_str!("readme.md")]
2use crate::{language::ZigLanguage, lexer::token_type::ZigTokenType};
3pub mod token_type;
4use oak_core::{
5 Lexer, LexerCache, LexerState, OakError, Source,
6 lexer::{LexOutput, WhitespaceConfig},
7};
8use std::sync::LazyLock;
9
10type State<'a, S> = LexerState<'a, S, ZigLanguage>;
11
12static ZIG_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
13
14#[derive(Clone)]
15pub struct ZigLexer<'config> {
16 _config: &'config ZigLanguage,
17}
18
19impl<'config> Lexer<ZigLanguage> for ZigLexer<'config> {
20 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<ZigLanguage>) -> LexOutput<ZigLanguage> {
21 let mut state = State::new_with_cache(source, 0, cache);
22 let result = self.run(&mut state);
23 if result.is_ok() {
24 state.add_eof()
25 }
26 state.finish_with_cache(result, cache)
27 }
28}
29
30impl<'config> ZigLexer<'config> {
31 pub fn new(config: &'config ZigLanguage) -> Self {
32 Self { _config: config }
33 }
34
35 fn run<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> Result<(), OakError> {
37 while state.not_at_end() {
38 let safe_point = state.get_position();
39
40 if self.skip_whitespace(state) {
41 continue;
42 }
43
44 if self.skip_comment(state) {
45 continue;
46 }
47
48 if self.lex_string_literal(state) {
49 continue;
50 }
51
52 if self.lex_char_literal(state) {
53 continue;
54 }
55
56 if self.lex_number_literal(state) {
57 continue;
58 }
59
60 if self.lex_identifier_or_keyword(state) {
61 continue;
62 }
63
64 if self.lex_builtin(state) {
65 continue;
66 }
67
68 if self.lex_operators(state) {
69 continue;
70 }
71
72 if self.lex_single_char_tokens(state) {
73 continue;
74 }
75
76 let start_pos = state.get_position();
78 if let Some(ch) = state.peek() {
79 state.advance(ch.len_utf8());
80 state.add_token(ZigTokenType::Error, start_pos, state.get_position())
81 }
82
83 state.advance_if_dead_lock(safe_point)
84 }
85
86 Ok(())
87 }
88
89 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
91 ZIG_WHITESPACE.scan(state, ZigTokenType::Whitespace)
92 }
93
94 fn skip_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
96 let start = state.get_position();
97 let rest = state.rest();
98
99 if rest.starts_with("//") {
101 state.advance(2);
102
103 let is_doc_comment = if state.peek() == Some('/') {
105 state.advance(1);
106 true
107 }
108 else {
109 false
110 };
111
112 while let Some(ch) = state.peek() {
113 if ch == '\n' || ch == '\r' {
114 break;
115 }
116 state.advance(ch.len_utf8())
117 }
118
119 let kind = if is_doc_comment { ZigTokenType::DocComment } else { ZigTokenType::Comment };
120 state.add_token(kind, start, state.get_position());
121 return true;
122 }
123
124 false
125 }
126
127 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
129 let start = state.get_position();
130
131 if state.rest().starts_with("\\\\") {
133 state.advance(2);
134
135 while let Some(ch) = state.peek() {
137 if ch == '\n' {
138 state.advance(1);
139 break;
140 }
141 state.advance(ch.len_utf8())
142 }
143
144 while state.not_at_end() {
146 let _line_start = state.get_position();
147
148 if !state.rest().starts_with("\\\\") {
150 break;
151 }
152
153 state.advance(2);
154
155 while let Some(ch) = state.peek() {
157 if ch == '\n' {
158 state.advance(1);
159 break;
160 }
161 state.advance(ch.len_utf8())
162 }
163 }
164
165 state.add_token(ZigTokenType::StringLiteral, start, state.get_position());
166 return true;
167 }
168
169 if state.current() == Some('"') {
171 state.advance(1);
172 while let Some(ch) = state.peek() {
173 if ch == '"' {
174 state.advance(1);
175 break;
176 }
177 if ch == '\\' {
178 state.advance(1);
179 if let Some(next) = state.peek() {
180 state.advance(next.len_utf8())
181 }
182 continue;
183 }
184 state.advance(ch.len_utf8())
185 }
186 state.add_token(ZigTokenType::StringLiteral, start, state.get_position());
187 return true;
188 }
189
190 false
191 }
192
193 fn lex_char_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
195 let start = state.get_position();
196 if state.current() == Some('\'') {
197 state.advance(1);
198 while let Some(ch) = state.peek() {
199 if ch == '\'' {
200 state.advance(1);
201 break;
202 }
203 if ch == '\\' {
204 state.advance(1);
205 if let Some(next) = state.peek() {
206 state.advance(next.len_utf8())
207 }
208 continue;
209 }
210 state.advance(ch.len_utf8())
211 }
212 state.add_token(ZigTokenType::CharLiteral, start, state.get_position());
213 return true;
214 }
215 false
216 }
217
218 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
220 let start = state.get_position();
221 let ch = state.current();
222 let mut is_float = false;
223
224 if let Some(ch) = ch {
225 if ch.is_ascii_digit() {
226 state.advance(1);
227 if ch == '0' {
229 if let Some(next) = state.peek() {
230 match next {
231 'x' | 'X' => {
232 state.advance(1);
233 state.take_while(|c| c.is_ascii_hexdigit() || c == '_');
234 }
235 'b' | 'B' => {
236 state.advance(1);
237 state.take_while(|c| c == '0' || c == '1' || c == '_');
238 }
239 'o' | 'O' => {
240 state.advance(1);
241 state.take_while(|c| ('0'..='7').contains(&c) || c == '_');
242 }
243 _ => {
244 state.take_while(|c| c.is_ascii_digit() || c == '_');
245 }
246 }
247 }
248 }
249 else {
250 state.take_while(|c| c.is_ascii_digit() || c == '_');
251 }
252
253 if state.current() == Some('.') {
255 if let Some(next) = state.peek() {
256 if next.is_ascii_digit() {
257 is_float = true;
258 state.advance(1);
259 state.take_while(|c| c.is_ascii_digit() || c == '_');
260 }
261 }
262 }
263
264 if let Some(c) = state.current() {
266 if c == 'e' || c == 'E' || c == 'p' || c == 'P' {
267 is_float = true;
268 state.advance(1);
269 if let Some(next) = state.peek() {
270 if next == '+' || next == '-' {
271 state.advance(1);
272 }
273 }
274 state.take_while(|c| c.is_ascii_digit() || c == '_');
275 }
276 }
277
278 let kind = if is_float { ZigTokenType::FloatLiteral } else { ZigTokenType::IntegerLiteral };
279 state.add_token(kind, start, state.get_position());
280 return true;
281 }
282 }
283 false
284 }
285
286 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
288 let start = state.get_position();
289 if let Some(ch) = state.current() {
290 if ch.is_ascii_alphabetic() || ch == '_' {
291 state.advance(ch.len_utf8());
292 state.take_while(|c| c.is_ascii_alphanumeric() || c == '_');
293
294 let end = state.get_position();
295 let text = state.get_text_in((start..end).into());
296 let kind = self.get_keyword_or_identifier(&text);
297 state.add_token(kind, start, state.get_position());
298 return true;
299 }
300 }
301 false
302 }
303
304 fn get_keyword_or_identifier(&self, text: &str) -> ZigTokenType {
306 match text {
307 "const" => ZigTokenType::Const,
309 "var" => ZigTokenType::Var,
310 "fn" => ZigTokenType::Fn,
311 "struct" => ZigTokenType::Struct,
312 "union" => ZigTokenType::Union,
313 "enum" => ZigTokenType::Enum,
314 "opaque" => ZigTokenType::Opaque,
315 "type" => ZigTokenType::Type,
316 "comptime" => ZigTokenType::Comptime,
317 "inline" => ZigTokenType::Inline,
318 "noinline" => ZigTokenType::NoInline,
319 "pub" => ZigTokenType::Pub,
320 "export" => ZigTokenType::Export,
321 "extern" => ZigTokenType::Extern,
322 "packed" => ZigTokenType::Packed,
323 "align" => ZigTokenType::Align,
324 "callconv" => ZigTokenType::CallConv,
325 "linksection" => ZigTokenType::LinkSection,
326
327 "if" => ZigTokenType::If,
329 "else" => ZigTokenType::Else,
330 "switch" => ZigTokenType::Switch,
331 "while" => ZigTokenType::While,
332 "for" => ZigTokenType::For,
333 "break" => ZigTokenType::Break,
334 "continue" => ZigTokenType::Continue,
335 "return" => ZigTokenType::Return,
336 "defer" => ZigTokenType::Defer,
337 "errdefer" => ZigTokenType::ErrDefer,
338 "unreachable" => ZigTokenType::Unreachable,
339 "noreturn" => ZigTokenType::NoReturn,
340
341 "try" => ZigTokenType::TryKeyword,
343 "catch" => ZigTokenType::CatchKeyword,
344 "orelse" => ZigTokenType::OrElse,
345 "error" => ZigTokenType::ErrorKeyword,
346
347 "test" => ZigTokenType::Test,
349 "async" => ZigTokenType::Async,
350 "await" => ZigTokenType::AwaitKeyword,
351 "suspend" => ZigTokenType::Suspend,
352 "resume" => ZigTokenType::Resume,
353 "cancel" => ZigTokenType::Cancel,
354
355 "undefined" => ZigTokenType::Undefined,
357 "null" => ZigTokenType::Null,
358 "volatile" => ZigTokenType::Volatile,
359 "allowzero" => ZigTokenType::AllowZero,
360 "noalias" => ZigTokenType::NoAlias,
361
362 "and" => ZigTokenType::And,
364 "or" => ZigTokenType::Or,
365
366 "anyframe" => ZigTokenType::AnyFrame,
368 "anytype" => ZigTokenType::AnyType,
369 "threadlocal" => ZigTokenType::ThreadLocal,
370
371 "bool" => ZigTokenType::Bool,
373 "i8" => ZigTokenType::I8,
374 "i16" => ZigTokenType::I16,
375 "i32" => ZigTokenType::I32,
376 "i64" => ZigTokenType::I64,
377 "i128" => ZigTokenType::I128,
378 "isize" => ZigTokenType::Isize,
379 "u8" => ZigTokenType::U8,
380 "u16" => ZigTokenType::U16,
381 "u32" => ZigTokenType::U32,
382 "u64" => ZigTokenType::U64,
383 "u128" => ZigTokenType::U128,
384 "usize" => ZigTokenType::Usize,
385 "f16" => ZigTokenType::F16,
386 "f32" => ZigTokenType::F32,
387 "f64" => ZigTokenType::F64,
388 "f80" => ZigTokenType::F80,
389 "f128" => ZigTokenType::F128,
390 "c_short" => ZigTokenType::CShort,
391 "c_ushort" => ZigTokenType::CUshort,
392 "c_int" => ZigTokenType::CInt,
393 "c_uint" => ZigTokenType::CUint,
394 "c_long" => ZigTokenType::CLong,
395 "c_ulong" => ZigTokenType::CUlong,
396 "c_longlong" => ZigTokenType::CLongLong,
397 "c_ulonglong" => ZigTokenType::CUlongLong,
398 "c_longdouble" => ZigTokenType::CLongDouble,
399 "c_void" => ZigTokenType::CVoid,
400 "void" => ZigTokenType::Void,
401 "comptime_int" => ZigTokenType::ComptimeInt,
402 "comptime_float" => ZigTokenType::ComptimeFloat,
403
404 "true" | "false" => ZigTokenType::BooleanLiteral,
406
407 _ => ZigTokenType::Identifier,
408 }
409 }
410
411 fn lex_builtin<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
413 let start = state.get_position();
414 if state.current() == Some('↯') {
415 state.advance(1);
416 if let Some(ch) = state.current() {
417 if ch.is_ascii_alphabetic() || ch == '_' {
418 state.advance(ch.len_utf8());
419 state.take_while(|c| c.is_ascii_alphanumeric() || c == '_');
420 state.add_token(ZigTokenType::BuiltinIdentifier, start, state.get_position());
421 return true;
422 }
423 }
424 }
425 false
426 }
427
428 fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
430 let start = state.get_position();
431 let rest = state.rest();
432
433 let ops = [
435 ("<<=", ZigTokenType::LessLessAssign),
436 (">>=", ZigTokenType::GreaterGreaterAssign),
437 ("...", ZigTokenType::DotDotDot),
438 ("==", ZigTokenType::Equal),
439 ("!=", ZigTokenType::NotEqual),
440 ("<=", ZigTokenType::LessEqual),
441 (">=", ZigTokenType::GreaterEqual),
442 ("&&", ZigTokenType::AndAnd),
443 ("||", ZigTokenType::OrOr),
444 ("+=", ZigTokenType::PlusAssign),
445 ("-=", ZigTokenType::MinusAssign),
446 ("*=", ZigTokenType::StarAssign),
447 ("/=", ZigTokenType::SlashAssign),
448 ("%=", ZigTokenType::PercentAssign),
449 ("&=", ZigTokenType::AmpersandAssign),
450 ("|=", ZigTokenType::PipeAssign),
451 ("^=", ZigTokenType::CaretAssign),
452 ("++", ZigTokenType::PlusPlus),
453 ("--", ZigTokenType::MinusMinus),
454 ("**", ZigTokenType::StarStar),
455 ("->", ZigTokenType::Arrow),
456 ("=>", ZigTokenType::FatArrow),
457 ("<<", ZigTokenType::LessLess),
458 (">>", ZigTokenType::GreaterGreater),
459 (".?", ZigTokenType::DotQuestion),
460 (".*", ZigTokenType::DotStar),
461 ];
462
463 for (op, kind) in ops {
464 if rest.starts_with(op) {
465 state.advance(op.len());
466 state.add_token(kind, start, state.get_position());
467 return true;
468 }
469 }
470
471 false
472 }
473
474 fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
476 let start = state.get_position();
477 if let Some(ch) = state.current() {
478 let kind = match ch {
479 '(' => ZigTokenType::LeftParen,
480 ')' => ZigTokenType::RightParen,
481 '{' => ZigTokenType::LeftBrace,
482 '}' => ZigTokenType::RightBrace,
483 '[' => ZigTokenType::LeftBracket,
484 ']' => ZigTokenType::RightBracket,
485 ',' => ZigTokenType::Comma,
486 '.' => ZigTokenType::Dot,
487 ':' => ZigTokenType::Colon,
488 ';' => ZigTokenType::Semicolon,
489 '+' => ZigTokenType::Plus,
490 '-' => ZigTokenType::Minus,
491 '*' => ZigTokenType::Star,
492 '/' => ZigTokenType::Slash,
493 '%' => ZigTokenType::Percent,
494 '&' => ZigTokenType::Ampersand,
495 '|' => ZigTokenType::Pipe,
496 '^' => ZigTokenType::Caret,
497 '~' => ZigTokenType::Tilde,
498 '!' => ZigTokenType::Exclamation,
499 '?' => ZigTokenType::Question,
500 '<' => ZigTokenType::Less,
501 '>' => ZigTokenType::Greater,
502 '=' => ZigTokenType::Assign,
503 _ => return false,
504 };
505 state.advance(1);
506 state.add_token(kind, start, state.get_position());
507 return true;
508 }
509 false
510 }
511}