1#![doc = include_str!("readme.md")]
2use crate::{language::ZigLanguage, lexer::token_type::ZigTokenType};
3pub mod token_type;
4use oak_core::{
5 Lexer, LexerCache, LexerState, OakError, Source,
6 lexer::{LexOutput, WhitespaceConfig},
7};
8use std::sync::LazyLock;
9
10pub(crate) type State<'a, S> = LexerState<'a, S, ZigLanguage>;
11
12static ZIG_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
13
14#[derive(Clone)]
16pub struct ZigLexer<'config> {
17 config: &'config ZigLanguage,
18}
19
20impl<'config> Lexer<ZigLanguage> for ZigLexer<'config> {
21 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<ZigLanguage>) -> LexOutput<ZigLanguage> {
22 let mut state = State::new_with_cache(source, 0, cache);
23 let result = self.run(&mut state);
24 if result.is_ok() {
25 state.add_eof()
26 }
27 state.finish_with_cache(result, cache)
28 }
29}
30
31impl<'config> ZigLexer<'config> {
32 pub fn new(config: &'config ZigLanguage) -> Self {
34 Self { config }
35 }
36
37 fn run<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> Result<(), OakError> {
39 while state.not_at_end() {
40 let safe_point = state.get_position();
41
42 if self.skip_whitespace(state) {
43 continue;
44 }
45
46 if self.skip_comment(state) {
47 continue;
48 }
49
50 if self.lex_string_literal(state) {
51 continue;
52 }
53
54 if self.lex_char_literal(state) {
55 continue;
56 }
57
58 if self.lex_number_literal(state) {
59 continue;
60 }
61
62 if self.lex_identifier_or_keyword(state) {
63 continue;
64 }
65
66 if self.lex_builtin(state) {
67 continue;
68 }
69
70 if self.lex_operators(state) {
71 continue;
72 }
73
74 if self.lex_single_char_tokens(state) {
75 continue;
76 }
77
78 let start_pos = state.get_position();
80 if let Some(ch) = state.peek() {
81 state.advance(ch.len_utf8());
82 state.add_token(ZigTokenType::Error, start_pos, state.get_position())
83 }
84
85 state.advance_if_dead_lock(safe_point)
86 }
87
88 Ok(())
89 }
90
91 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
93 ZIG_WHITESPACE.scan(state, ZigTokenType::Whitespace)
94 }
95
96 fn skip_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
98 let start = state.get_position();
99 let rest = state.rest();
100
101 if rest.starts_with("//") {
103 state.advance(2);
104
105 let is_doc_comment = if state.peek() == Some('/') {
107 state.advance(1);
108 true
109 }
110 else {
111 false
112 };
113
114 while let Some(ch) = state.peek() {
115 if ch == '\n' || ch == '\r' {
116 break;
117 }
118 state.advance(ch.len_utf8())
119 }
120
121 let kind = if is_doc_comment { ZigTokenType::DocComment } else { ZigTokenType::Comment };
122 state.add_token(kind, start, state.get_position());
123 return true;
124 }
125
126 false
127 }
128
129 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
131 let start = state.get_position();
132
133 if state.rest().starts_with("\\\\") {
135 state.advance(2);
136
137 while let Some(ch) = state.peek() {
139 if ch == '\n' {
140 state.advance(1);
141 break;
142 }
143 state.advance(ch.len_utf8())
144 }
145
146 while state.not_at_end() {
148 let _line_start = state.get_position();
149
150 if !state.rest().starts_with("\\\\") {
152 break;
153 }
154
155 state.advance(2);
156
157 while let Some(ch) = state.peek() {
159 if ch == '\n' {
160 state.advance(1);
161 break;
162 }
163 state.advance(ch.len_utf8())
164 }
165 }
166
167 state.add_token(ZigTokenType::StringLiteral, start, state.get_position());
168 return true;
169 }
170
171 if state.current() == Some('"') {
173 state.advance(1);
174 while let Some(ch) = state.peek() {
175 if ch == '"' {
176 state.advance(1);
177 break;
178 }
179 if ch == '\\' {
180 state.advance(1);
181 if let Some(next) = state.peek() {
182 state.advance(next.len_utf8())
183 }
184 continue;
185 }
186 state.advance(ch.len_utf8())
187 }
188 state.add_token(ZigTokenType::StringLiteral, start, state.get_position());
189 return true;
190 }
191
192 false
193 }
194
195 fn lex_char_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
197 let start = state.get_position();
198 if state.current() == Some('\'') {
199 state.advance(1);
200 while let Some(ch) = state.peek() {
201 if ch == '\'' {
202 state.advance(1);
203 break;
204 }
205 if ch == '\\' {
206 state.advance(1);
207 if let Some(next) = state.peek() {
208 state.advance(next.len_utf8())
209 }
210 continue;
211 }
212 state.advance(ch.len_utf8())
213 }
214 state.add_token(ZigTokenType::CharLiteral, start, state.get_position());
215 return true;
216 }
217 false
218 }
219
220 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
222 let start = state.get_position();
223 let ch = state.current();
224 let mut is_float = false;
225
226 if let Some(ch) = ch {
227 if ch.is_ascii_digit() {
228 state.advance(1);
229 if ch == '0' {
231 if let Some(next) = state.peek() {
232 match next {
233 'x' | 'X' => {
234 state.advance(1);
235 state.take_while(|c| c.is_ascii_hexdigit() || c == '_');
236 }
237 'b' | 'B' => {
238 state.advance(1);
239 state.take_while(|c| c == '0' || c == '1' || c == '_');
240 }
241 'o' | 'O' => {
242 state.advance(1);
243 state.take_while(|c| ('0'..='7').contains(&c) || c == '_');
244 }
245 _ => {
246 state.take_while(|c| c.is_ascii_digit() || c == '_');
247 }
248 }
249 }
250 }
251 else {
252 state.take_while(|c| c.is_ascii_digit() || c == '_');
253 }
254
255 if state.current() == Some('.') {
257 if let Some(next) = state.peek() {
258 if next.is_ascii_digit() {
259 is_float = true;
260 state.advance(1);
261 state.take_while(|c| c.is_ascii_digit() || c == '_');
262 }
263 }
264 }
265
266 if let Some(c) = state.current() {
268 if c == 'e' || c == 'E' || c == 'p' || c == 'P' {
269 is_float = true;
270 state.advance(1);
271 if let Some(next) = state.peek() {
272 if next == '+' || next == '-' {
273 state.advance(1);
274 }
275 }
276 state.take_while(|c| c.is_ascii_digit() || c == '_');
277 }
278 }
279
280 let kind = if is_float { ZigTokenType::FloatLiteral } else { ZigTokenType::IntegerLiteral };
281 state.add_token(kind, start, state.get_position());
282 return true;
283 }
284 }
285 false
286 }
287
288 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
290 let start = state.get_position();
291 if let Some(ch) = state.current() {
292 if ch.is_ascii_alphabetic() || ch == '_' {
293 state.advance(ch.len_utf8());
294 state.take_while(|c| c.is_ascii_alphanumeric() || c == '_');
295
296 let end = state.get_position();
297 let text = state.get_text_in((start..end).into());
298 let kind = self.get_keyword_or_identifier(&text);
299 state.add_token(kind, start, state.get_position());
300 return true;
301 }
302 }
303 false
304 }
305
306 fn get_keyword_or_identifier(&self, text: &str) -> ZigTokenType {
308 match text {
309 "const" => ZigTokenType::Const,
311 "var" => ZigTokenType::Var,
312 "fn" => ZigTokenType::Fn,
313 "struct" => ZigTokenType::Struct,
314 "union" => ZigTokenType::Union,
315 "enum" => ZigTokenType::Enum,
316 "opaque" => ZigTokenType::Opaque,
317 "type" => ZigTokenType::Type,
318 "comptime" => ZigTokenType::Comptime,
319 "inline" => ZigTokenType::Inline,
320 "noinline" => ZigTokenType::NoInline,
321 "pub" => ZigTokenType::Pub,
322 "export" => ZigTokenType::Export,
323 "extern" => ZigTokenType::Extern,
324 "packed" => ZigTokenType::Packed,
325 "align" => ZigTokenType::Align,
326 "callconv" => ZigTokenType::CallConv,
327 "linksection" => ZigTokenType::LinkSection,
328
329 "if" => ZigTokenType::If,
331 "else" => ZigTokenType::Else,
332 "switch" => ZigTokenType::Switch,
333 "while" => ZigTokenType::While,
334 "for" => ZigTokenType::For,
335 "break" => ZigTokenType::Break,
336 "continue" => ZigTokenType::Continue,
337 "return" => ZigTokenType::Return,
338 "defer" => ZigTokenType::Defer,
339 "errdefer" => ZigTokenType::ErrDefer,
340 "unreachable" => ZigTokenType::Unreachable,
341 "noreturn" => ZigTokenType::NoReturn,
342
343 "try" => ZigTokenType::TryKeyword,
345 "catch" => ZigTokenType::CatchKeyword,
346 "orelse" => ZigTokenType::OrElse,
347 "error" => ZigTokenType::ErrorKeyword,
348
349 "test" => ZigTokenType::Test,
351 "async" => ZigTokenType::Async,
352 "await" => ZigTokenType::AwaitKeyword,
353 "suspend" => ZigTokenType::Suspend,
354 "resume" => ZigTokenType::Resume,
355 "cancel" => ZigTokenType::Cancel,
356
357 "undefined" => ZigTokenType::Undefined,
359 "null" => ZigTokenType::Null,
360 "volatile" => ZigTokenType::Volatile,
361 "allowzero" => ZigTokenType::AllowZero,
362 "noalias" => ZigTokenType::NoAlias,
363
364 "and" => ZigTokenType::And,
366 "or" => ZigTokenType::Or,
367
368 "anyframe" => ZigTokenType::AnyFrame,
370 "anytype" => ZigTokenType::AnyType,
371 "threadlocal" => ZigTokenType::ThreadLocal,
372
373 "bool" => ZigTokenType::Bool,
375 "i8" => ZigTokenType::I8,
376 "i16" => ZigTokenType::I16,
377 "i32" => ZigTokenType::I32,
378 "i64" => ZigTokenType::I64,
379 "i128" => ZigTokenType::I128,
380 "isize" => ZigTokenType::Isize,
381 "u8" => ZigTokenType::U8,
382 "u16" => ZigTokenType::U16,
383 "u32" => ZigTokenType::U32,
384 "u64" => ZigTokenType::U64,
385 "u128" => ZigTokenType::U128,
386 "usize" => ZigTokenType::Usize,
387 "f16" => ZigTokenType::F16,
388 "f32" => ZigTokenType::F32,
389 "f64" => ZigTokenType::F64,
390 "f80" => ZigTokenType::F80,
391 "f128" => ZigTokenType::F128,
392 "c_short" => ZigTokenType::CShort,
393 "c_ushort" => ZigTokenType::CUshort,
394 "c_int" => ZigTokenType::CInt,
395 "c_uint" => ZigTokenType::CUint,
396 "c_long" => ZigTokenType::CLong,
397 "c_ulong" => ZigTokenType::CUlong,
398 "c_longlong" => ZigTokenType::CLongLong,
399 "c_ulonglong" => ZigTokenType::CUlongLong,
400 "c_longdouble" => ZigTokenType::CLongDouble,
401 "c_void" => ZigTokenType::CVoid,
402 "void" => ZigTokenType::Void,
403 "comptime_int" => ZigTokenType::ComptimeInt,
404 "comptime_float" => ZigTokenType::ComptimeFloat,
405
406 "true" | "false" => ZigTokenType::BooleanLiteral,
408
409 _ => ZigTokenType::Identifier,
410 }
411 }
412
413 fn lex_builtin<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
415 let start = state.get_position();
416 if state.current() == Some('@') {
417 state.advance(1);
418 if let Some(ch) = state.current() {
419 if ch.is_ascii_alphabetic() || ch == '_' {
420 state.advance(ch.len_utf8());
421 state.take_while(|c| c.is_ascii_alphanumeric() || c == '_');
422 state.add_token(ZigTokenType::BuiltinIdentifier, start, state.get_position());
423 return true;
424 }
425 }
426 }
427 false
428 }
429
430 fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
432 let start = state.get_position();
433 let rest = state.rest();
434
435 let ops = [
437 ("<<=", ZigTokenType::LessLessAssign),
438 (">>=", ZigTokenType::GreaterGreaterAssign),
439 ("...", ZigTokenType::DotDotDot),
440 ("==", ZigTokenType::Equal),
441 ("!=", ZigTokenType::NotEqual),
442 ("<=", ZigTokenType::LessEqual),
443 (">=", ZigTokenType::GreaterEqual),
444 ("&&", ZigTokenType::AndAnd),
445 ("||", ZigTokenType::OrOr),
446 ("+=", ZigTokenType::PlusAssign),
447 ("-=", ZigTokenType::MinusAssign),
448 ("*=", ZigTokenType::StarAssign),
449 ("/=", ZigTokenType::SlashAssign),
450 ("%=", ZigTokenType::PercentAssign),
451 ("&=", ZigTokenType::AmpersandAssign),
452 ("|=", ZigTokenType::PipeAssign),
453 ("^=", ZigTokenType::CaretAssign),
454 ("++", ZigTokenType::PlusPlus),
455 ("--", ZigTokenType::MinusMinus),
456 ("**", ZigTokenType::StarStar),
457 ("->", ZigTokenType::Arrow),
458 ("=>", ZigTokenType::FatArrow),
459 ("<<", ZigTokenType::LessLess),
460 (">>", ZigTokenType::GreaterGreater),
461 (".?", ZigTokenType::DotQuestion),
462 (".*", ZigTokenType::DotStar),
463 ];
464
465 for (op, kind) in ops {
466 if rest.starts_with(op) {
467 state.advance(op.len());
468 state.add_token(kind, start, state.get_position());
469 return true;
470 }
471 }
472
473 false
474 }
475
476 fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
478 let start = state.get_position();
479 if let Some(ch) = state.current() {
480 let kind = match ch {
481 '(' => ZigTokenType::LeftParen,
482 ')' => ZigTokenType::RightParen,
483 '{' => ZigTokenType::LeftBrace,
484 '}' => ZigTokenType::RightBrace,
485 '[' => ZigTokenType::LeftBracket,
486 ']' => ZigTokenType::RightBracket,
487 ',' => ZigTokenType::Comma,
488 '.' => ZigTokenType::Dot,
489 ':' => ZigTokenType::Colon,
490 ';' => ZigTokenType::Semicolon,
491 '+' => ZigTokenType::Plus,
492 '-' => ZigTokenType::Minus,
493 '*' => ZigTokenType::Star,
494 '/' => ZigTokenType::Slash,
495 '%' => ZigTokenType::Percent,
496 '&' => ZigTokenType::Ampersand,
497 '|' => ZigTokenType::Pipe,
498 '^' => ZigTokenType::Caret,
499 '~' => ZigTokenType::Tilde,
500 '!' => ZigTokenType::Exclamation,
501 '?' => ZigTokenType::Question,
502 '<' => ZigTokenType::Less,
503 '>' => ZigTokenType::Greater,
504 '=' => ZigTokenType::Assign,
505 _ => return false,
506 };
507 state.advance(1);
508 state.add_token(kind, start, state.get_position());
509 return true;
510 }
511 false
512 }
513}