1#![doc = include_str!("readme.md")]
2
3use crate::language::CSharpLanguage;
4
5pub mod token_type;
7
8use oak_core::{
9 Lexer, LexerCache, LexerState,
10 lexer::LexOutput,
11 source::{Source, TextEdit},
12};
13pub use token_type::CSharpTokenType;
14
15pub(crate) type State<'a, S> = LexerState<'a, S, CSharpLanguage>;
16
17pub struct CSharpLexer<'config> {
19 config: &'config CSharpLanguage,
20}
21
22impl<'config> CSharpLexer<'config> {
23 pub fn new(config: &'config CSharpLanguage) -> Self {
25 Self { config }
26 }
27
28 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
30 let start_pos = state.get_position();
31
32 while let Some(ch) = state.peek() {
33 if ch == ' ' || ch == '\t' {
34 state.advance(ch.len_utf8());
35 }
36 else {
37 break;
38 }
39 }
40
41 if state.get_position() > start_pos {
42 state.add_token(CSharpTokenType::Whitespace, start_pos, state.get_position());
43 true
44 }
45 else {
46 false
47 }
48 }
49
50 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
52 let start_pos = state.get_position();
53
54 if let Some('\n') = state.peek() {
55 state.advance(1);
56 state.add_token(CSharpTokenType::Newline, start_pos, state.get_position());
57 true
58 }
59 else if let Some('\r') = state.peek() {
60 state.advance(1);
61 if let Some('\n') = state.peek() {
62 state.advance(1);
63 }
64 state.add_token(CSharpTokenType::Newline, start_pos, state.get_position());
65 true
66 }
67 else {
68 false
69 }
70 }
71
72 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
74 let start_pos = state.get_position();
75
76 if let Some('/') = state.peek() {
77 state.advance(1);
78 if let Some('/') = state.peek() {
79 state.advance(1);
81 while let Some(ch) = state.peek() {
82 if ch == '\n' || ch == '\r' {
83 break;
84 }
85 state.advance(ch.len_utf8());
86 }
87 state.add_token(CSharpTokenType::Comment, start_pos, state.get_position());
88 return true;
89 }
90 else if let Some('*') = state.peek() {
91 state.advance(1);
93 while let Some(ch) = state.peek() {
94 if ch == '*' {
95 state.advance(1);
96 if let Some('/') = state.peek() {
97 state.advance(1);
98 break;
99 }
100 }
101 else {
102 state.advance(ch.len_utf8());
103 }
104 }
105 state.add_token(CSharpTokenType::Comment, start_pos, state.get_position());
106 return true;
107 }
108 else {
109 state.set_position(start_pos);
111 return false;
112 }
113 }
114 false
115 }
116
117 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
120 let start_pos = state.get_position();
121
122 if let Some('"') = state.peek() {
123 state.advance(1);
124 while let Some(ch) = state.peek() {
125 if ch == '"' {
126 state.advance(1);
127 break;
128 }
129 else if ch == '\\' {
130 state.advance(1);
131 if let Some(_) = state.peek() {
132 state.advance(1)
133 }
134 }
135 else {
136 state.advance(ch.len_utf8())
137 }
138 }
139 state.add_token(CSharpTokenType::StringLiteral, start_pos, state.get_position());
140 true
141 }
142 else if let Some('\'') = state.peek() {
143 state.advance(1);
145 while let Some(ch) = state.peek() {
146 if ch == '\'' {
147 state.advance(1);
148 break;
149 }
150 else if ch == '\\' {
151 state.advance(1);
152 if let Some(_) = state.peek() {
153 state.advance(1)
154 }
155 }
156 else {
157 state.advance(ch.len_utf8())
158 }
159 }
160 state.add_token(CSharpTokenType::CharLiteral, start_pos, state.get_position());
161 true
162 }
163 else {
164 false
165 }
166 }
167
168 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
176 let start_pos = state.get_position();
177
178 if let Some(ch) = state.peek() {
179 if ch.is_ascii_digit() {
180 state.advance(ch.len_utf8());
181
182 while let Some(ch) = state.peek() {
183 if ch.is_ascii_digit() || ch == '.' || ch == '_' { state.advance(ch.len_utf8()) } else { break }
184 }
185
186 if let Some(ch) = state.peek() {
188 if ch.is_ascii_alphabetic() {
189 state.advance(ch.len_utf8());
190 if let Some(ch2) = state.peek() {
191 if ch2.is_ascii_alphabetic() {
192 state.advance(ch2.len_utf8())
193 }
194 }
195 }
196 }
197
198 state.add_token(CSharpTokenType::NumberLiteral, start_pos, state.get_position());
199 true
200 }
201 else {
202 false
203 }
204 }
205 else {
206 false
207 }
208 }
209
210 fn lex_keyword_or_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
218 let start_pos = state.get_position();
219
220 if let Some(ch) = state.peek() {
221 if ch.is_ascii_alphabetic() || ch == '_' || ch == '@' {
222 state.advance(ch.len_utf8());
223
224 while let Some(ch) = state.peek() {
225 if ch.is_ascii_alphanumeric() || ch == '_' { state.advance(ch.len_utf8()) } else { break }
226 }
227
228 let text = state.get_text_in((start_pos..state.get_position()).into());
229 let token_kind = match text.as_ref() {
230 "abstract" => CSharpTokenType::Abstract,
232 "as" => CSharpTokenType::As,
233 "async" => CSharpTokenType::AsyncKeyword,
234 "await" => CSharpTokenType::AwaitKeyword,
235 "base" => CSharpTokenType::Base,
236 "bool" => CSharpTokenType::Bool,
237 "break" => CSharpTokenType::Break,
238 "byte" => CSharpTokenType::Byte,
239 "case" => CSharpTokenType::Case,
240 "catch" => CSharpTokenType::Catch,
241 "char" => CSharpTokenType::Char,
242 "checked" => CSharpTokenType::Checked,
243 "class" => CSharpTokenType::Class,
244 "const" => CSharpTokenType::Const,
245 "continue" => CSharpTokenType::Continue,
246 "decimal" => CSharpTokenType::Decimal,
247 "default" => CSharpTokenType::Default,
248 "delegate" => CSharpTokenType::Delegate,
249 "do" => CSharpTokenType::Do,
250 "double" => CSharpTokenType::Double,
251 "else" => CSharpTokenType::Else,
252 "enum" => CSharpTokenType::Enum,
253 "event" => CSharpTokenType::Event,
254 "explicit" => CSharpTokenType::Explicit,
255 "extern" => CSharpTokenType::Extern,
256 "false" => CSharpTokenType::False,
257 "finally" => CSharpTokenType::Finally,
258 "fixed" => CSharpTokenType::Fixed,
259 "float" => CSharpTokenType::Float,
260 "for" => CSharpTokenType::For,
261 "foreach" => CSharpTokenType::Foreach,
262 "goto" => CSharpTokenType::Goto,
263 "if" => CSharpTokenType::If,
264 "implicit" => CSharpTokenType::Implicit,
265 "in" => CSharpTokenType::In,
266 "int" => CSharpTokenType::Int,
267 "interface" => CSharpTokenType::Interface,
268 "internal" => CSharpTokenType::Internal,
269 "is" => CSharpTokenType::Is,
270 "lock" => CSharpTokenType::Lock,
271 "long" => CSharpTokenType::Long,
272 "namespace" => CSharpTokenType::Namespace,
273 "new" => CSharpTokenType::New,
274 "null" => CSharpTokenType::Null,
275 "object" => CSharpTokenType::Object,
276 "operator" => CSharpTokenType::Operator,
277 "out" => CSharpTokenType::Out,
278 "override" => CSharpTokenType::Override,
279 "params" => CSharpTokenType::Params,
280 "private" => CSharpTokenType::Private,
281 "protected" => CSharpTokenType::Protected,
282 "public" => CSharpTokenType::Public,
283 "readonly" => CSharpTokenType::Readonly,
284 "record" => CSharpTokenType::Record,
285 "ref" => CSharpTokenType::Ref,
286 "return" => CSharpTokenType::Return,
287 "sbyte" => CSharpTokenType::Sbyte,
288 "sealed" => CSharpTokenType::Sealed,
289 "short" => CSharpTokenType::Short,
290 "sizeof" => CSharpTokenType::Sizeof,
291 "stackalloc" => CSharpTokenType::Stackalloc,
292 "static" => CSharpTokenType::Static,
293 "string" => CSharpTokenType::String,
294 "struct" => CSharpTokenType::Struct,
295 "switch" => CSharpTokenType::Switch,
296 "this" => CSharpTokenType::This,
297 "throw" => CSharpTokenType::Throw,
298 "true" => CSharpTokenType::True,
299 "try" => CSharpTokenType::Try,
300 "typeof" => CSharpTokenType::Typeof,
301 "uint" => CSharpTokenType::Uint,
302 "ulong" => CSharpTokenType::Ulong,
303 "unchecked" => CSharpTokenType::Unchecked,
304 "unsafe" => CSharpTokenType::Unsafe,
305 "ushort" => CSharpTokenType::Ushort,
306 "using" => CSharpTokenType::Using,
307 "virtual" => CSharpTokenType::Virtual,
308 "void" => CSharpTokenType::Void,
309 "volatile" => CSharpTokenType::Volatile,
310 "while" => CSharpTokenType::While,
311 _ => CSharpTokenType::Identifier,
312 };
313
314 state.add_token(token_kind, start_pos, state.get_position());
315 true
316 }
317 else {
318 false
319 }
320 }
321 else {
322 false
323 }
324 }
325
326 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
336 let start_pos = state.get_position();
337
338 if let Some(ch) = state.peek() {
339 let token_kind = match ch {
340 '+' => {
341 state.advance(1);
342 if let Some('=') = state.peek() {
343 state.advance(1);
344 CSharpTokenType::PlusAssign
345 }
346 else if let Some('+') = state.peek() {
347 state.advance(1);
348 CSharpTokenType::Increment
349 }
350 else {
351 CSharpTokenType::Plus
352 }
353 }
354 '-' => {
355 state.advance(1);
356 if let Some('=') = state.peek() {
357 state.advance(1);
358 CSharpTokenType::MinusAssign
359 }
360 else if let Some('-') = state.peek() {
361 state.advance(1);
362 CSharpTokenType::Decrement
363 }
364 else {
365 CSharpTokenType::Minus
366 }
367 }
368 '*' => {
369 state.advance(1);
370 if let Some('=') = state.peek() {
371 state.advance(1);
372 CSharpTokenType::StarAssign
373 }
374 else {
375 CSharpTokenType::Star
376 }
377 }
378 '/' => {
379 state.advance(1);
381 if let Some('=') = state.peek() {
382 state.advance(1);
383 CSharpTokenType::SlashAssign
384 }
385 else {
386 CSharpTokenType::Slash
387 }
388 }
389 '%' => {
390 state.advance(1);
391 if let Some('=') = state.peek() {
392 state.advance(1);
393 CSharpTokenType::PercentAssign
394 }
395 else {
396 CSharpTokenType::Percent
397 }
398 }
399 '=' => {
400 state.advance(1);
401 if let Some('=') = state.peek() {
402 state.advance(1);
403 CSharpTokenType::Equal
404 }
405 else {
406 CSharpTokenType::Assign
407 }
408 }
409 '!' => {
410 state.advance(1);
411 if let Some('=') = state.peek() {
412 state.advance(1);
413 CSharpTokenType::NotEqual
414 }
415 else {
416 CSharpTokenType::LogicalNot
417 }
418 }
419 '<' => {
420 state.advance(1);
421 if let Some('=') = state.peek() {
422 state.advance(1);
423 CSharpTokenType::LessEqual
424 }
425 else if let Some('<') = state.peek() {
426 state.advance(1);
427 CSharpTokenType::LeftShift
428 }
429 else {
430 CSharpTokenType::Less
431 }
432 }
433 '>' => {
434 state.advance(1);
435 if let Some('=') = state.peek() {
436 state.advance(1);
437 CSharpTokenType::GreaterEqual
438 }
439 else if let Some('>') = state.peek() {
440 state.advance(1);
441 CSharpTokenType::RightShift
442 }
443 else {
444 CSharpTokenType::Greater
445 }
446 }
447 '&' => {
448 state.advance(1);
449 if let Some('&') = state.peek() {
450 state.advance(1);
451 CSharpTokenType::LogicalAnd
452 }
453 else {
454 CSharpTokenType::Ampersand
455 }
456 }
457 '|' => {
458 state.advance(1);
459 if let Some('|') = state.peek() {
460 state.advance(1);
461 CSharpTokenType::LogicalOr
462 }
463 else {
464 CSharpTokenType::Pipe
465 }
466 }
467 '^' => {
468 state.advance(1);
469 CSharpTokenType::Caret
470 }
471 '~' => {
472 state.advance(1);
473 CSharpTokenType::Tilde
474 }
475 _ => return false,
476 };
477
478 state.add_token(token_kind, start_pos, state.get_position());
479 true
480 }
481 else {
482 false
483 }
484 }
485
486 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
494 let start_pos = state.get_position();
495
496 if let Some(ch) = state.peek() {
497 let token_kind = match ch {
498 '(' => CSharpTokenType::LeftParen,
499 ')' => CSharpTokenType::RightParen,
500 '[' => CSharpTokenType::LeftBracket,
501 ']' => CSharpTokenType::RightBracket,
502 '{' => CSharpTokenType::LeftBrace,
503 '}' => CSharpTokenType::RightBrace,
504 ';' => CSharpTokenType::Semicolon,
505 ',' => CSharpTokenType::Comma,
506 '.' => CSharpTokenType::Dot,
507 ':' => CSharpTokenType::Colon,
508 '?' => CSharpTokenType::Question,
509 _ => return false,
510 };
511
512 state.advance(ch.len_utf8());
513 state.add_token(token_kind, start_pos, state.get_position());
514 true
515 }
516 else {
517 false
518 }
519 }
520
521 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), oak_core::OakError> {
527 while state.not_at_end() {
528 let safe_point = state.get_position();
529
530 if self.skip_whitespace(state) {
531 continue;
532 }
533
534 if self.lex_newline(state) {
535 continue;
536 }
537
538 if self.lex_comment(state) {
539 continue;
540 }
541
542 if self.lex_string(state) {
543 continue;
544 }
545
546 if self.lex_number(state) {
547 continue;
548 }
549
550 if self.lex_keyword_or_identifier(state) {
551 continue;
552 }
553
554 if self.lex_operator(state) {
555 continue;
556 }
557
558 if self.lex_delimiter(state) {
559 continue;
560 }
561
562 let start_pos = state.get_position();
564 if let Some(ch) = state.peek() {
565 state.advance(ch.len_utf8());
566 state.add_token(CSharpTokenType::Error, start_pos, state.get_position());
567 }
568
569 state.advance_if_dead_lock(safe_point)
570 }
571 Ok(())
572 }
573}
574
575impl<'config> Lexer<CSharpLanguage> for CSharpLexer<'config> {
576 fn lex<'a, S: Source + ?Sized>(&self, text: &S, _edits: &[TextEdit], mut cache: &'a mut impl LexerCache<CSharpLanguage>) -> LexOutput<CSharpLanguage> {
577 let mut state = LexerState::new(text);
578 let result = self.run(&mut state);
579 if result.is_ok() {
580 state.add_eof();
581 }
582 state.finish_with_cache(result, &mut cache)
583 }
584}