1#![doc = include_str!("readme.md")]
2use crate::language::CSharpLanguage;
5
6pub mod token_type;
8
9use oak_core::{
10 Lexer, LexerCache, LexerState,
11 lexer::LexOutput,
12 source::{Source, TextEdit},
13};
14pub use token_type::CSharpTokenType;
15
16pub(crate) type State<'a, S> = LexerState<'a, S, CSharpLanguage>;
17
18pub struct CSharpLexer<'config> {
20 config: &'config CSharpLanguage,
21}
22
23impl<'config> CSharpLexer<'config> {
24 pub fn new(config: &'config CSharpLanguage) -> Self {
26 Self { config }
27 }
28
29 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
31 let start_pos = state.get_position();
32
33 while let Some(ch) = state.peek() {
34 if ch == ' ' || ch == '\t' {
35 state.advance(ch.len_utf8());
36 }
37 else {
38 break;
39 }
40 }
41
42 if state.get_position() > start_pos {
43 state.add_token(CSharpTokenType::Whitespace, start_pos, state.get_position());
44 true
45 }
46 else {
47 false
48 }
49 }
50
51 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
53 let start_pos = state.get_position();
54
55 if let Some('\n') = state.peek() {
56 state.advance(1);
57 state.add_token(CSharpTokenType::Newline, start_pos, state.get_position());
58 true
59 }
60 else if let Some('\r') = state.peek() {
61 state.advance(1);
62 if let Some('\n') = state.peek() {
63 state.advance(1);
64 }
65 state.add_token(CSharpTokenType::Newline, start_pos, state.get_position());
66 true
67 }
68 else {
69 false
70 }
71 }
72
73 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
75 let start_pos = state.get_position();
76
77 if let Some('/') = state.peek() {
78 state.advance(1);
79 if let Some('/') = state.peek() {
80 state.advance(1);
82 while let Some(ch) = state.peek() {
83 if ch == '\n' || ch == '\r' {
84 break;
85 }
86 state.advance(ch.len_utf8());
87 }
88 state.add_token(CSharpTokenType::Comment, start_pos, state.get_position());
89 return true;
90 }
91 else if let Some('*') = state.peek() {
92 state.advance(1);
94 while let Some(ch) = state.peek() {
95 if ch == '*' {
96 state.advance(1);
97 if let Some('/') = state.peek() {
98 state.advance(1);
99 break;
100 }
101 }
102 else {
103 state.advance(ch.len_utf8());
104 }
105 }
106 state.add_token(CSharpTokenType::Comment, start_pos, state.get_position());
107 return true;
108 }
109 else {
110 state.set_position(start_pos);
112 return false;
113 }
114 }
115 false
116 }
117
118 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
121 let start_pos = state.get_position();
122
123 if let Some('"') = state.peek() {
124 state.advance(1);
125 while let Some(ch) = state.peek() {
126 if ch == '"' {
127 state.advance(1);
128 break;
129 }
130 else if ch == '\\' {
131 state.advance(1);
132 if let Some(_) = state.peek() {
133 state.advance(1)
134 }
135 }
136 else {
137 state.advance(ch.len_utf8())
138 }
139 }
140 state.add_token(CSharpTokenType::StringLiteral, start_pos, state.get_position());
141 true
142 }
143 else if let Some('\'') = state.peek() {
144 state.advance(1);
146 while let Some(ch) = state.peek() {
147 if ch == '\'' {
148 state.advance(1);
149 break;
150 }
151 else if ch == '\\' {
152 state.advance(1);
153 if let Some(_) = state.peek() {
154 state.advance(1)
155 }
156 }
157 else {
158 state.advance(ch.len_utf8())
159 }
160 }
161 state.add_token(CSharpTokenType::CharLiteral, start_pos, state.get_position());
162 true
163 }
164 else {
165 false
166 }
167 }
168
169 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
177 let start_pos = state.get_position();
178
179 if let Some(ch) = state.peek() {
180 if ch.is_ascii_digit() {
181 state.advance(ch.len_utf8());
182
183 while let Some(ch) = state.peek() {
184 if ch.is_ascii_digit() || ch == '.' || ch == '_' { state.advance(ch.len_utf8()) } else { break }
185 }
186
187 if let Some(ch) = state.peek() {
189 if ch.is_ascii_alphabetic() {
190 state.advance(ch.len_utf8());
191 if let Some(ch2) = state.peek() {
192 if ch2.is_ascii_alphabetic() {
193 state.advance(ch2.len_utf8())
194 }
195 }
196 }
197 }
198
199 state.add_token(CSharpTokenType::NumberLiteral, start_pos, state.get_position());
200 true
201 }
202 else {
203 false
204 }
205 }
206 else {
207 false
208 }
209 }
210
211 fn lex_keyword_or_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
219 let start_pos = state.get_position();
220
221 if let Some(ch) = state.peek() {
222 if ch.is_ascii_alphabetic() || ch == '_' || ch == '@' {
223 state.advance(ch.len_utf8());
224
225 while let Some(ch) = state.peek() {
226 if ch.is_ascii_alphanumeric() || ch == '_' { state.advance(ch.len_utf8()) } else { break }
227 }
228
229 let text = state.get_text_in((start_pos..state.get_position()).into());
230 let token_kind = match text.as_ref() {
231 "abstract" => CSharpTokenType::Abstract,
233 "as" => CSharpTokenType::As,
234 "async" => CSharpTokenType::AsyncKeyword,
235 "await" => CSharpTokenType::AwaitKeyword,
236 "base" => CSharpTokenType::Base,
237 "bool" => CSharpTokenType::Bool,
238 "break" => CSharpTokenType::Break,
239 "byte" => CSharpTokenType::Byte,
240 "case" => CSharpTokenType::Case,
241 "catch" => CSharpTokenType::Catch,
242 "char" => CSharpTokenType::Char,
243 "checked" => CSharpTokenType::Checked,
244 "class" => CSharpTokenType::Class,
245 "const" => CSharpTokenType::Const,
246 "continue" => CSharpTokenType::Continue,
247 "decimal" => CSharpTokenType::Decimal,
248 "default" => CSharpTokenType::Default,
249 "delegate" => CSharpTokenType::Delegate,
250 "do" => CSharpTokenType::Do,
251 "double" => CSharpTokenType::Double,
252 "else" => CSharpTokenType::Else,
253 "enum" => CSharpTokenType::Enum,
254 "event" => CSharpTokenType::Event,
255 "explicit" => CSharpTokenType::Explicit,
256 "extern" => CSharpTokenType::Extern,
257 "false" => CSharpTokenType::False,
258 "finally" => CSharpTokenType::Finally,
259 "fixed" => CSharpTokenType::Fixed,
260 "float" => CSharpTokenType::Float,
261 "for" => CSharpTokenType::For,
262 "foreach" => CSharpTokenType::Foreach,
263 "goto" => CSharpTokenType::Goto,
264 "if" => CSharpTokenType::If,
265 "implicit" => CSharpTokenType::Implicit,
266 "in" => CSharpTokenType::In,
267 "int" => CSharpTokenType::Int,
268 "interface" => CSharpTokenType::Interface,
269 "internal" => CSharpTokenType::Internal,
270 "is" => CSharpTokenType::Is,
271 "lock" => CSharpTokenType::Lock,
272 "long" => CSharpTokenType::Long,
273 "namespace" => CSharpTokenType::Namespace,
274 "new" => CSharpTokenType::New,
275 "null" => CSharpTokenType::Null,
276 "object" => CSharpTokenType::Object,
277 "operator" => CSharpTokenType::Operator,
278 "out" => CSharpTokenType::Out,
279 "override" => CSharpTokenType::Override,
280 "params" => CSharpTokenType::Params,
281 "private" => CSharpTokenType::Private,
282 "protected" => CSharpTokenType::Protected,
283 "public" => CSharpTokenType::Public,
284 "readonly" => CSharpTokenType::Readonly,
285 "record" => CSharpTokenType::Record,
286 "ref" => CSharpTokenType::Ref,
287 "return" => CSharpTokenType::Return,
288 "sbyte" => CSharpTokenType::Sbyte,
289 "sealed" => CSharpTokenType::Sealed,
290 "short" => CSharpTokenType::Short,
291 "sizeof" => CSharpTokenType::Sizeof,
292 "stackalloc" => CSharpTokenType::Stackalloc,
293 "static" => CSharpTokenType::Static,
294 "string" => CSharpTokenType::String,
295 "struct" => CSharpTokenType::Struct,
296 "switch" => CSharpTokenType::Switch,
297 "this" => CSharpTokenType::This,
298 "throw" => CSharpTokenType::Throw,
299 "true" => CSharpTokenType::True,
300 "try" => CSharpTokenType::Try,
301 "typeof" => CSharpTokenType::Typeof,
302 "uint" => CSharpTokenType::Uint,
303 "ulong" => CSharpTokenType::Ulong,
304 "unchecked" => CSharpTokenType::Unchecked,
305 "unsafe" => CSharpTokenType::Unsafe,
306 "ushort" => CSharpTokenType::Ushort,
307 "using" => CSharpTokenType::Using,
308 "virtual" => CSharpTokenType::Virtual,
309 "void" => CSharpTokenType::Void,
310 "volatile" => CSharpTokenType::Volatile,
311 "while" => CSharpTokenType::While,
312 _ => CSharpTokenType::Identifier,
313 };
314
315 state.add_token(token_kind, start_pos, state.get_position());
316 true
317 }
318 else {
319 false
320 }
321 }
322 else {
323 false
324 }
325 }
326
327 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
337 let start_pos = state.get_position();
338
339 if let Some(ch) = state.peek() {
340 let token_kind = match ch {
341 '+' => {
342 state.advance(1);
343 if let Some('=') = state.peek() {
344 state.advance(1);
345 CSharpTokenType::PlusAssign
346 }
347 else if let Some('+') = state.peek() {
348 state.advance(1);
349 CSharpTokenType::Increment
350 }
351 else {
352 CSharpTokenType::Plus
353 }
354 }
355 '-' => {
356 state.advance(1);
357 if let Some('=') = state.peek() {
358 state.advance(1);
359 CSharpTokenType::MinusAssign
360 }
361 else if let Some('-') = state.peek() {
362 state.advance(1);
363 CSharpTokenType::Decrement
364 }
365 else {
366 CSharpTokenType::Minus
367 }
368 }
369 '*' => {
370 state.advance(1);
371 if let Some('=') = state.peek() {
372 state.advance(1);
373 CSharpTokenType::StarAssign
374 }
375 else {
376 CSharpTokenType::Star
377 }
378 }
379 '/' => {
380 state.advance(1);
382 if let Some('=') = state.peek() {
383 state.advance(1);
384 CSharpTokenType::SlashAssign
385 }
386 else {
387 CSharpTokenType::Slash
388 }
389 }
390 '%' => {
391 state.advance(1);
392 if let Some('=') = state.peek() {
393 state.advance(1);
394 CSharpTokenType::PercentAssign
395 }
396 else {
397 CSharpTokenType::Percent
398 }
399 }
400 '=' => {
401 state.advance(1);
402 if let Some('=') = state.peek() {
403 state.advance(1);
404 CSharpTokenType::Equal
405 }
406 else {
407 CSharpTokenType::Assign
408 }
409 }
410 '!' => {
411 state.advance(1);
412 if let Some('=') = state.peek() {
413 state.advance(1);
414 CSharpTokenType::NotEqual
415 }
416 else {
417 CSharpTokenType::LogicalNot
418 }
419 }
420 '<' => {
421 state.advance(1);
422 if let Some('=') = state.peek() {
423 state.advance(1);
424 CSharpTokenType::LessEqual
425 }
426 else if let Some('<') = state.peek() {
427 state.advance(1);
428 CSharpTokenType::LeftShift
429 }
430 else {
431 CSharpTokenType::Less
432 }
433 }
434 '>' => {
435 state.advance(1);
436 if let Some('=') = state.peek() {
437 state.advance(1);
438 CSharpTokenType::GreaterEqual
439 }
440 else if let Some('>') = state.peek() {
441 state.advance(1);
442 CSharpTokenType::RightShift
443 }
444 else {
445 CSharpTokenType::Greater
446 }
447 }
448 '&' => {
449 state.advance(1);
450 if let Some('&') = state.peek() {
451 state.advance(1);
452 CSharpTokenType::LogicalAnd
453 }
454 else {
455 CSharpTokenType::Ampersand
456 }
457 }
458 '|' => {
459 state.advance(1);
460 if let Some('|') = state.peek() {
461 state.advance(1);
462 CSharpTokenType::LogicalOr
463 }
464 else {
465 CSharpTokenType::Pipe
466 }
467 }
468 '^' => {
469 state.advance(1);
470 CSharpTokenType::Caret
471 }
472 '~' => {
473 state.advance(1);
474 CSharpTokenType::Tilde
475 }
476 _ => return false,
477 };
478
479 state.add_token(token_kind, start_pos, state.get_position());
480 true
481 }
482 else {
483 false
484 }
485 }
486
487 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
495 let start_pos = state.get_position();
496
497 if let Some(ch) = state.peek() {
498 let token_kind = match ch {
499 '(' => CSharpTokenType::LeftParen,
500 ')' => CSharpTokenType::RightParen,
501 '[' => CSharpTokenType::LeftBracket,
502 ']' => CSharpTokenType::RightBracket,
503 '{' => CSharpTokenType::LeftBrace,
504 '}' => CSharpTokenType::RightBrace,
505 ';' => CSharpTokenType::Semicolon,
506 ',' => CSharpTokenType::Comma,
507 '.' => CSharpTokenType::Dot,
508 ':' => CSharpTokenType::Colon,
509 '?' => CSharpTokenType::Question,
510 _ => return false,
511 };
512
513 state.advance(ch.len_utf8());
514 state.add_token(token_kind, start_pos, state.get_position());
515 true
516 }
517 else {
518 false
519 }
520 }
521
522 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), oak_core::OakError> {
528 while state.not_at_end() {
529 let safe_point = state.get_position();
530
531 if self.skip_whitespace(state) {
532 continue;
533 }
534
535 if self.lex_newline(state) {
536 continue;
537 }
538
539 if self.lex_comment(state) {
540 continue;
541 }
542
543 if self.lex_string(state) {
544 continue;
545 }
546
547 if self.lex_number(state) {
548 continue;
549 }
550
551 if self.lex_keyword_or_identifier(state) {
552 continue;
553 }
554
555 if self.lex_operator(state) {
556 continue;
557 }
558
559 if self.lex_delimiter(state) {
560 continue;
561 }
562
563 let start_pos = state.get_position();
565 if let Some(ch) = state.peek() {
566 state.advance(ch.len_utf8());
567 state.add_token(CSharpTokenType::Error, start_pos, state.get_position());
568 }
569
570 state.advance_if_dead_lock(safe_point)
571 }
572 Ok(())
573 }
574}
575
576impl<'config> Lexer<CSharpLanguage> for CSharpLexer<'config> {
577 fn lex<'a, S: Source + ?Sized>(&self, text: &S, _edits: &[TextEdit], mut cache: &'a mut impl LexerCache<CSharpLanguage>) -> LexOutput<CSharpLanguage> {
578 let mut state = LexerState::new(text);
579 let result = self.run(&mut state);
580 if result.is_ok() {
581 state.add_eof();
582 }
583 state.finish_with_cache(result, &mut cache)
584 }
585}