1use crate::language::CSharpLanguage;
2pub mod token_type;
3use oak_core::{
4 Lexer, LexerCache, LexerState,
5 lexer::LexOutput,
6 source::{Source, TextEdit},
7};
8pub use token_type::CSharpTokenType;
9
10type State<'a, S> = LexerState<'a, S, CSharpLanguage>;
11
12pub struct CSharpLexer<'config> {
13 _config: &'config CSharpLanguage,
14}
15
16impl<'config> CSharpLexer<'config> {
17 pub fn new(config: &'config CSharpLanguage) -> Self {
18 Self { _config: config }
19 }
20
21 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
23 let start_pos = state.get_position();
24
25 while let Some(ch) = state.peek() {
26 if ch == ' ' || ch == '\t' {
27 state.advance(ch.len_utf8());
28 }
29 else {
30 break;
31 }
32 }
33
34 if state.get_position() > start_pos {
35 state.add_token(CSharpTokenType::Whitespace, start_pos, state.get_position());
36 true
37 }
38 else {
39 false
40 }
41 }
42
43 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
45 let start_pos = state.get_position();
46
47 if let Some('\n') = state.peek() {
48 state.advance(1);
49 state.add_token(CSharpTokenType::Newline, start_pos, state.get_position());
50 true
51 }
52 else if let Some('\r') = state.peek() {
53 state.advance(1);
54 if let Some('\n') = state.peek() {
55 state.advance(1);
56 }
57 state.add_token(CSharpTokenType::Newline, start_pos, state.get_position());
58 true
59 }
60 else {
61 false
62 }
63 }
64
65 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
67 let start_pos = state.get_position();
68
69 if let Some('/') = state.peek() {
70 state.advance(1);
71 if let Some('/') = state.peek() {
72 state.advance(1);
74 while let Some(ch) = state.peek() {
75 if ch == '\n' || ch == '\r' {
76 break;
77 }
78 state.advance(ch.len_utf8());
79 }
80 state.add_token(CSharpTokenType::Comment, start_pos, state.get_position());
81 return true;
82 }
83 else if let Some('*') = state.peek() {
84 state.advance(1);
86 while let Some(ch) = state.peek() {
87 if ch == '*' {
88 state.advance(1);
89 if let Some('/') = state.peek() {
90 state.advance(1);
91 break;
92 }
93 }
94 else {
95 state.advance(ch.len_utf8());
96 }
97 }
98 state.add_token(CSharpTokenType::Comment, start_pos, state.get_position());
99 return true;
100 }
101 else {
102 state.set_position(start_pos);
104 return false;
105 }
106 }
107 false
108 }
109
110 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
112 let start_pos = state.get_position();
113
114 if let Some('"') = state.peek() {
115 state.advance(1);
116 while let Some(ch) = state.peek() {
117 if ch == '"' {
118 state.advance(1);
119 break;
120 }
121 else if ch == '\\' {
122 state.advance(1);
123 if let Some(_) = state.peek() {
124 state.advance(1);
125 }
126 }
127 else {
128 state.advance(ch.len_utf8());
129 }
130 }
131 state.add_token(CSharpTokenType::StringLiteral, start_pos, state.get_position());
132 true
133 }
134 else if let Some('\'') = state.peek() {
135 state.advance(1);
137 while let Some(ch) = state.peek() {
138 if ch == '\'' {
139 state.advance(1);
140 break;
141 }
142 else if ch == '\\' {
143 state.advance(1);
144 if let Some(_) = state.peek() {
145 state.advance(1);
146 }
147 }
148 else {
149 state.advance(ch.len_utf8());
150 }
151 }
152 state.add_token(CSharpTokenType::CharLiteral, start_pos, state.get_position());
153 true
154 }
155 else {
156 false
157 }
158 }
159
160 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
162 let start_pos = state.get_position();
163
164 if let Some(ch) = state.peek() {
165 if ch.is_ascii_digit() {
166 state.advance(ch.len_utf8());
167
168 while let Some(ch) = state.peek() {
169 if ch.is_ascii_digit() || ch == '.' || ch == '_' {
170 state.advance(ch.len_utf8());
171 }
172 else {
173 break;
174 }
175 }
176
177 if let Some(ch) = state.peek() {
179 if ch.is_ascii_alphabetic() {
180 state.advance(ch.len_utf8());
181 if let Some(ch2) = state.peek() {
182 if ch2.is_ascii_alphabetic() {
183 state.advance(ch2.len_utf8());
184 }
185 }
186 }
187 }
188
189 state.add_token(CSharpTokenType::NumberLiteral, start_pos, state.get_position());
190 true
191 }
192 else {
193 false
194 }
195 }
196 else {
197 false
198 }
199 }
200
201 fn lex_keyword_or_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
203 let start_pos = state.get_position();
204
205 if let Some(ch) = state.peek() {
206 if ch.is_ascii_alphabetic() || ch == '_' || ch == '@' {
207 state.advance(ch.len_utf8());
208
209 while let Some(ch) = state.peek() {
210 if ch.is_ascii_alphanumeric() || ch == '_' {
211 state.advance(ch.len_utf8());
212 }
213 else {
214 break;
215 }
216 }
217
218 let text = state.get_text_in((start_pos..state.get_position()).into());
219 let token_kind = match text.as_ref() {
220 "abstract" => CSharpTokenType::Abstract,
222 "as" => CSharpTokenType::As,
223 "base" => CSharpTokenType::Base,
224 "bool" => CSharpTokenType::Bool,
225 "break" => CSharpTokenType::Break,
226 "byte" => CSharpTokenType::Byte,
227 "case" => CSharpTokenType::Case,
228 "catch" => CSharpTokenType::Catch,
229 "char" => CSharpTokenType::Char,
230 "checked" => CSharpTokenType::Checked,
231 "class" => CSharpTokenType::Class,
232 "const" => CSharpTokenType::Const,
233 "continue" => CSharpTokenType::Continue,
234 "decimal" => CSharpTokenType::Decimal,
235 "default" => CSharpTokenType::Default,
236 "delegate" => CSharpTokenType::Delegate,
237 "do" => CSharpTokenType::Do,
238 "double" => CSharpTokenType::Double,
239 "else" => CSharpTokenType::Else,
240 "enum" => CSharpTokenType::Enum,
241 "event" => CSharpTokenType::Event,
242 "explicit" => CSharpTokenType::Explicit,
243 "extern" => CSharpTokenType::Extern,
244 "false" => CSharpTokenType::False,
245 "finally" => CSharpTokenType::Finally,
246 "fixed" => CSharpTokenType::Fixed,
247 "float" => CSharpTokenType::Float,
248 "for" => CSharpTokenType::For,
249 "foreach" => CSharpTokenType::Foreach,
250 "goto" => CSharpTokenType::Goto,
251 "if" => CSharpTokenType::If,
252 "implicit" => CSharpTokenType::Implicit,
253 "in" => CSharpTokenType::In,
254 "int" => CSharpTokenType::Int,
255 "interface" => CSharpTokenType::Interface,
256 "internal" => CSharpTokenType::Internal,
257 "is" => CSharpTokenType::Is,
258 "lock" => CSharpTokenType::Lock,
259 "long" => CSharpTokenType::Long,
260 "namespace" => CSharpTokenType::Namespace,
261 "new" => CSharpTokenType::New,
262 "null" => CSharpTokenType::Null,
263 "object" => CSharpTokenType::Object,
264 "operator" => CSharpTokenType::Operator,
265 "out" => CSharpTokenType::Out,
266 "override" => CSharpTokenType::Override,
267 "params" => CSharpTokenType::Params,
268 "private" => CSharpTokenType::Private,
269 "protected" => CSharpTokenType::Protected,
270 "public" => CSharpTokenType::Public,
271 "readonly" => CSharpTokenType::Readonly,
272 "ref" => CSharpTokenType::Ref,
273 "return" => CSharpTokenType::Return,
274 "sbyte" => CSharpTokenType::Sbyte,
275 "sealed" => CSharpTokenType::Sealed,
276 "short" => CSharpTokenType::Short,
277 "sizeof" => CSharpTokenType::Sizeof,
278 "stackalloc" => CSharpTokenType::Stackalloc,
279 "static" => CSharpTokenType::Static,
280 "string" => CSharpTokenType::String,
281 "struct" => CSharpTokenType::Struct,
282 "switch" => CSharpTokenType::Switch,
283 "this" => CSharpTokenType::This,
284 "throw" => CSharpTokenType::Throw,
285 "true" => CSharpTokenType::True,
286 "try" => CSharpTokenType::Try,
287 "typeof" => CSharpTokenType::Typeof,
288 "uint" => CSharpTokenType::Uint,
289 "ulong" => CSharpTokenType::Ulong,
290 "unchecked" => CSharpTokenType::Unchecked,
291 "unsafe" => CSharpTokenType::Unsafe,
292 "ushort" => CSharpTokenType::Ushort,
293 "using" => CSharpTokenType::Using,
294 "virtual" => CSharpTokenType::Virtual,
295 "void" => CSharpTokenType::Void,
296 "volatile" => CSharpTokenType::Volatile,
297 "while" => CSharpTokenType::While,
298 _ => CSharpTokenType::Identifier,
299 };
300
301 state.add_token(token_kind, start_pos, state.get_position());
302 true
303 }
304 else {
305 false
306 }
307 }
308 else {
309 false
310 }
311 }
312
313 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
315 let start_pos = state.get_position();
316
317 if let Some(ch) = state.peek() {
318 let token_kind = match ch {
319 '+' => {
320 state.advance(1);
321 if let Some('=') = state.peek() {
322 state.advance(1);
323 CSharpTokenType::PlusAssign
324 }
325 else if let Some('+') = state.peek() {
326 state.advance(1);
327 CSharpTokenType::Increment
328 }
329 else {
330 CSharpTokenType::Plus
331 }
332 }
333 '-' => {
334 state.advance(1);
335 if let Some('=') = state.peek() {
336 state.advance(1);
337 CSharpTokenType::MinusAssign
338 }
339 else if let Some('-') = state.peek() {
340 state.advance(1);
341 CSharpTokenType::Decrement
342 }
343 else {
344 CSharpTokenType::Minus
345 }
346 }
347 '*' => {
348 state.advance(1);
349 if let Some('=') = state.peek() {
350 state.advance(1);
351 CSharpTokenType::StarAssign
352 }
353 else {
354 CSharpTokenType::Star
355 }
356 }
357 '/' => {
358 state.advance(1);
360 if let Some('=') = state.peek() {
361 state.advance(1);
362 CSharpTokenType::SlashAssign
363 }
364 else {
365 CSharpTokenType::Slash
366 }
367 }
368 '%' => {
369 state.advance(1);
370 if let Some('=') = state.peek() {
371 state.advance(1);
372 CSharpTokenType::PercentAssign
373 }
374 else {
375 CSharpTokenType::Percent
376 }
377 }
378 '=' => {
379 state.advance(1);
380 if let Some('=') = state.peek() {
381 state.advance(1);
382 CSharpTokenType::Equal
383 }
384 else {
385 CSharpTokenType::Assign
386 }
387 }
388 '!' => {
389 state.advance(1);
390 if let Some('=') = state.peek() {
391 state.advance(1);
392 CSharpTokenType::NotEqual
393 }
394 else {
395 CSharpTokenType::LogicalNot
396 }
397 }
398 '<' => {
399 state.advance(1);
400 if let Some('=') = state.peek() {
401 state.advance(1);
402 CSharpTokenType::LessEqual
403 }
404 else if let Some('<') = state.peek() {
405 state.advance(1);
406 CSharpTokenType::LeftShift
407 }
408 else {
409 CSharpTokenType::Less
410 }
411 }
412 '>' => {
413 state.advance(1);
414 if let Some('=') = state.peek() {
415 state.advance(1);
416 CSharpTokenType::GreaterEqual
417 }
418 else if let Some('>') = state.peek() {
419 state.advance(1);
420 CSharpTokenType::RightShift
421 }
422 else {
423 CSharpTokenType::Greater
424 }
425 }
426 '&' => {
427 state.advance(1);
428 if let Some('&') = state.peek() {
429 state.advance(1);
430 CSharpTokenType::LogicalAnd
431 }
432 else {
433 CSharpTokenType::Ampersand
434 }
435 }
436 '|' => {
437 state.advance(1);
438 if let Some('|') = state.peek() {
439 state.advance(1);
440 CSharpTokenType::LogicalOr
441 }
442 else {
443 CSharpTokenType::Pipe
444 }
445 }
446 '^' => {
447 state.advance(1);
448 CSharpTokenType::Caret
449 }
450 '~' => {
451 state.advance(1);
452 CSharpTokenType::Tilde
453 }
454 _ => return false,
455 };
456
457 state.add_token(token_kind, start_pos, state.get_position());
458 true
459 }
460 else {
461 false
462 }
463 }
464
465 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
467 let start_pos = state.get_position();
468
469 if let Some(ch) = state.peek() {
470 let token_kind = match ch {
471 '(' => CSharpTokenType::LeftParen,
472 ')' => CSharpTokenType::RightParen,
473 '[' => CSharpTokenType::LeftBracket,
474 ']' => CSharpTokenType::RightBracket,
475 '{' => CSharpTokenType::LeftBrace,
476 '}' => CSharpTokenType::RightBrace,
477 ';' => CSharpTokenType::Semicolon,
478 ',' => CSharpTokenType::Comma,
479 '.' => CSharpTokenType::Dot,
480 ':' => CSharpTokenType::Colon,
481 '?' => CSharpTokenType::Question,
482 _ => return false,
483 };
484
485 state.advance(ch.len_utf8());
486 state.add_token(token_kind, start_pos, state.get_position());
487 true
488 }
489 else {
490 false
491 }
492 }
493
494 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), oak_core::OakError> {
495 while state.not_at_end() {
496 let safe_point = state.get_position();
497
498 if self.skip_whitespace(state) {
499 continue;
500 }
501
502 if self.lex_newline(state) {
503 continue;
504 }
505
506 if self.lex_comment(state) {
507 continue;
508 }
509
510 if self.lex_string(state) {
511 continue;
512 }
513
514 if self.lex_number(state) {
515 continue;
516 }
517
518 if self.lex_keyword_or_identifier(state) {
519 continue;
520 }
521
522 if self.lex_operator(state) {
523 continue;
524 }
525
526 if self.lex_delimiter(state) {
527 continue;
528 }
529
530 let start_pos = state.get_position();
532 if let Some(ch) = state.peek() {
533 state.advance(ch.len_utf8());
534 state.add_token(CSharpTokenType::Error, start_pos, state.get_position());
535 }
536
537 state.advance_if_dead_lock(safe_point);
538 }
539 Ok(())
540 }
541}
542
543impl<'config> Lexer<CSharpLanguage> for CSharpLexer<'config> {
544 fn lex<'a, S: Source + ?Sized>(&self, text: &S, _edits: &[TextEdit], mut cache: &'a mut impl LexerCache<CSharpLanguage>) -> LexOutput<CSharpLanguage> {
545 let mut state = LexerState::new(text);
546 let result = self.run(&mut state);
547 if result.is_ok() {
548 state.add_eof();
549 }
550 state.finish_with_cache(result, &mut cache)
551 }
552}