1#![doc = include_str!("readme.md")]
2use crate::language::CSharpLanguage;
3pub mod token_type;
4use oak_core::{
5 Lexer, LexerCache, LexerState,
6 lexer::LexOutput,
7 source::{Source, TextEdit},
8};
9pub use token_type::CSharpTokenType;
10
11type State<'a, S> = LexerState<'a, S, CSharpLanguage>;
12
13pub struct CSharpLexer<'config> {
14 _config: &'config CSharpLanguage,
15}
16
17impl<'config> CSharpLexer<'config> {
18 pub fn new(config: &'config CSharpLanguage) -> Self {
19 Self { _config: config }
20 }
21
22 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
24 let start_pos = state.get_position();
25
26 while let Some(ch) = state.peek() {
27 if ch == ' ' || ch == '\t' {
28 state.advance(ch.len_utf8());
29 }
30 else {
31 break;
32 }
33 }
34
35 if state.get_position() > start_pos {
36 state.add_token(CSharpTokenType::Whitespace, start_pos, state.get_position());
37 true
38 }
39 else {
40 false
41 }
42 }
43
44 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
46 let start_pos = state.get_position();
47
48 if let Some('\n') = state.peek() {
49 state.advance(1);
50 state.add_token(CSharpTokenType::Newline, start_pos, state.get_position());
51 true
52 }
53 else if let Some('\r') = state.peek() {
54 state.advance(1);
55 if let Some('\n') = state.peek() {
56 state.advance(1);
57 }
58 state.add_token(CSharpTokenType::Newline, start_pos, state.get_position());
59 true
60 }
61 else {
62 false
63 }
64 }
65
66 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
68 let start_pos = state.get_position();
69
70 if let Some('/') = state.peek() {
71 state.advance(1);
72 if let Some('/') = state.peek() {
73 state.advance(1);
75 while let Some(ch) = state.peek() {
76 if ch == '\n' || ch == '\r' {
77 break;
78 }
79 state.advance(ch.len_utf8());
80 }
81 state.add_token(CSharpTokenType::Comment, start_pos, state.get_position());
82 return true;
83 }
84 else if let Some('*') = state.peek() {
85 state.advance(1);
87 while let Some(ch) = state.peek() {
88 if ch == '*' {
89 state.advance(1);
90 if let Some('/') = state.peek() {
91 state.advance(1);
92 break;
93 }
94 }
95 else {
96 state.advance(ch.len_utf8());
97 }
98 }
99 state.add_token(CSharpTokenType::Comment, start_pos, state.get_position());
100 return true;
101 }
102 else {
103 state.set_position(start_pos);
105 return false;
106 }
107 }
108 false
109 }
110
111 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
113 let start_pos = state.get_position();
114
115 if let Some('"') = state.peek() {
116 state.advance(1);
117 while let Some(ch) = state.peek() {
118 if ch == '"' {
119 state.advance(1);
120 break;
121 }
122 else if ch == '\\' {
123 state.advance(1);
124 if let Some(_) = state.peek() {
125 state.advance(1)
126 }
127 }
128 else {
129 state.advance(ch.len_utf8())
130 }
131 }
132 state.add_token(CSharpTokenType::StringLiteral, start_pos, state.get_position());
133 true
134 }
135 else if let Some('\'') = state.peek() {
136 state.advance(1);
138 while let Some(ch) = state.peek() {
139 if ch == '\'' {
140 state.advance(1);
141 break;
142 }
143 else if ch == '\\' {
144 state.advance(1);
145 if let Some(_) = state.peek() {
146 state.advance(1)
147 }
148 }
149 else {
150 state.advance(ch.len_utf8())
151 }
152 }
153 state.add_token(CSharpTokenType::CharLiteral, start_pos, state.get_position());
154 true
155 }
156 else {
157 false
158 }
159 }
160
161 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
163 let start_pos = state.get_position();
164
165 if let Some(ch) = state.peek() {
166 if ch.is_ascii_digit() {
167 state.advance(ch.len_utf8());
168
169 while let Some(ch) = state.peek() {
170 if ch.is_ascii_digit() || ch == '.' || ch == '_' { state.advance(ch.len_utf8()) } else { break }
171 }
172
173 if let Some(ch) = state.peek() {
175 if ch.is_ascii_alphabetic() {
176 state.advance(ch.len_utf8());
177 if let Some(ch2) = state.peek() {
178 if ch2.is_ascii_alphabetic() {
179 state.advance(ch2.len_utf8())
180 }
181 }
182 }
183 }
184
185 state.add_token(CSharpTokenType::NumberLiteral, start_pos, state.get_position());
186 true
187 }
188 else {
189 false
190 }
191 }
192 else {
193 false
194 }
195 }
196
197 fn lex_keyword_or_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
199 let start_pos = state.get_position();
200
201 if let Some(ch) = state.peek() {
202 if ch.is_ascii_alphabetic() || ch == '_' || ch == '@' {
203 state.advance(ch.len_utf8());
204
205 while let Some(ch) = state.peek() {
206 if ch.is_ascii_alphanumeric() || ch == '_' { state.advance(ch.len_utf8()) } else { break }
207 }
208
209 let text = state.get_text_in((start_pos..state.get_position()).into());
210 let token_kind = match text.as_ref() {
211 "abstract" => CSharpTokenType::Abstract,
213 "as" => CSharpTokenType::As,
214 "async" => CSharpTokenType::AsyncKeyword,
215 "await" => CSharpTokenType::AwaitKeyword,
216 "base" => CSharpTokenType::Base,
217 "bool" => CSharpTokenType::Bool,
218 "break" => CSharpTokenType::Break,
219 "byte" => CSharpTokenType::Byte,
220 "case" => CSharpTokenType::Case,
221 "catch" => CSharpTokenType::Catch,
222 "char" => CSharpTokenType::Char,
223 "checked" => CSharpTokenType::Checked,
224 "class" => CSharpTokenType::Class,
225 "const" => CSharpTokenType::Const,
226 "continue" => CSharpTokenType::Continue,
227 "decimal" => CSharpTokenType::Decimal,
228 "default" => CSharpTokenType::Default,
229 "delegate" => CSharpTokenType::Delegate,
230 "do" => CSharpTokenType::Do,
231 "double" => CSharpTokenType::Double,
232 "else" => CSharpTokenType::Else,
233 "enum" => CSharpTokenType::Enum,
234 "event" => CSharpTokenType::Event,
235 "explicit" => CSharpTokenType::Explicit,
236 "extern" => CSharpTokenType::Extern,
237 "false" => CSharpTokenType::False,
238 "finally" => CSharpTokenType::Finally,
239 "fixed" => CSharpTokenType::Fixed,
240 "float" => CSharpTokenType::Float,
241 "for" => CSharpTokenType::For,
242 "foreach" => CSharpTokenType::Foreach,
243 "goto" => CSharpTokenType::Goto,
244 "if" => CSharpTokenType::If,
245 "implicit" => CSharpTokenType::Implicit,
246 "in" => CSharpTokenType::In,
247 "int" => CSharpTokenType::Int,
248 "interface" => CSharpTokenType::Interface,
249 "internal" => CSharpTokenType::Internal,
250 "is" => CSharpTokenType::Is,
251 "lock" => CSharpTokenType::Lock,
252 "long" => CSharpTokenType::Long,
253 "namespace" => CSharpTokenType::Namespace,
254 "new" => CSharpTokenType::New,
255 "null" => CSharpTokenType::Null,
256 "object" => CSharpTokenType::Object,
257 "operator" => CSharpTokenType::Operator,
258 "out" => CSharpTokenType::Out,
259 "override" => CSharpTokenType::Override,
260 "params" => CSharpTokenType::Params,
261 "private" => CSharpTokenType::Private,
262 "protected" => CSharpTokenType::Protected,
263 "public" => CSharpTokenType::Public,
264 "readonly" => CSharpTokenType::Readonly,
265 "record" => CSharpTokenType::Record,
266 "ref" => CSharpTokenType::Ref,
267 "return" => CSharpTokenType::Return,
268 "sbyte" => CSharpTokenType::Sbyte,
269 "sealed" => CSharpTokenType::Sealed,
270 "short" => CSharpTokenType::Short,
271 "sizeof" => CSharpTokenType::Sizeof,
272 "stackalloc" => CSharpTokenType::Stackalloc,
273 "static" => CSharpTokenType::Static,
274 "string" => CSharpTokenType::String,
275 "struct" => CSharpTokenType::Struct,
276 "switch" => CSharpTokenType::Switch,
277 "this" => CSharpTokenType::This,
278 "throw" => CSharpTokenType::Throw,
279 "true" => CSharpTokenType::True,
280 "try" => CSharpTokenType::Try,
281 "typeof" => CSharpTokenType::Typeof,
282 "uint" => CSharpTokenType::Uint,
283 "ulong" => CSharpTokenType::Ulong,
284 "unchecked" => CSharpTokenType::Unchecked,
285 "unsafe" => CSharpTokenType::Unsafe,
286 "ushort" => CSharpTokenType::Ushort,
287 "using" => CSharpTokenType::Using,
288 "virtual" => CSharpTokenType::Virtual,
289 "void" => CSharpTokenType::Void,
290 "volatile" => CSharpTokenType::Volatile,
291 "while" => CSharpTokenType::While,
292 _ => CSharpTokenType::Identifier,
293 };
294
295 state.add_token(token_kind, start_pos, state.get_position());
296 true
297 }
298 else {
299 false
300 }
301 }
302 else {
303 false
304 }
305 }
306
307 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
309 let start_pos = state.get_position();
310
311 if let Some(ch) = state.peek() {
312 let token_kind = match ch {
313 '+' => {
314 state.advance(1);
315 if let Some('=') = state.peek() {
316 state.advance(1);
317 CSharpTokenType::PlusAssign
318 }
319 else if let Some('+') = state.peek() {
320 state.advance(1);
321 CSharpTokenType::Increment
322 }
323 else {
324 CSharpTokenType::Plus
325 }
326 }
327 '-' => {
328 state.advance(1);
329 if let Some('=') = state.peek() {
330 state.advance(1);
331 CSharpTokenType::MinusAssign
332 }
333 else if let Some('-') = state.peek() {
334 state.advance(1);
335 CSharpTokenType::Decrement
336 }
337 else {
338 CSharpTokenType::Minus
339 }
340 }
341 '*' => {
342 state.advance(1);
343 if let Some('=') = state.peek() {
344 state.advance(1);
345 CSharpTokenType::StarAssign
346 }
347 else {
348 CSharpTokenType::Star
349 }
350 }
351 '/' => {
352 state.advance(1);
354 if let Some('=') = state.peek() {
355 state.advance(1);
356 CSharpTokenType::SlashAssign
357 }
358 else {
359 CSharpTokenType::Slash
360 }
361 }
362 '%' => {
363 state.advance(1);
364 if let Some('=') = state.peek() {
365 state.advance(1);
366 CSharpTokenType::PercentAssign
367 }
368 else {
369 CSharpTokenType::Percent
370 }
371 }
372 '=' => {
373 state.advance(1);
374 if let Some('=') = state.peek() {
375 state.advance(1);
376 CSharpTokenType::Equal
377 }
378 else {
379 CSharpTokenType::Assign
380 }
381 }
382 '!' => {
383 state.advance(1);
384 if let Some('=') = state.peek() {
385 state.advance(1);
386 CSharpTokenType::NotEqual
387 }
388 else {
389 CSharpTokenType::LogicalNot
390 }
391 }
392 '<' => {
393 state.advance(1);
394 if let Some('=') = state.peek() {
395 state.advance(1);
396 CSharpTokenType::LessEqual
397 }
398 else if let Some('<') = state.peek() {
399 state.advance(1);
400 CSharpTokenType::LeftShift
401 }
402 else {
403 CSharpTokenType::Less
404 }
405 }
406 '>' => {
407 state.advance(1);
408 if let Some('=') = state.peek() {
409 state.advance(1);
410 CSharpTokenType::GreaterEqual
411 }
412 else if let Some('>') = state.peek() {
413 state.advance(1);
414 CSharpTokenType::RightShift
415 }
416 else {
417 CSharpTokenType::Greater
418 }
419 }
420 '&' => {
421 state.advance(1);
422 if let Some('&') = state.peek() {
423 state.advance(1);
424 CSharpTokenType::LogicalAnd
425 }
426 else {
427 CSharpTokenType::Ampersand
428 }
429 }
430 '|' => {
431 state.advance(1);
432 if let Some('|') = state.peek() {
433 state.advance(1);
434 CSharpTokenType::LogicalOr
435 }
436 else {
437 CSharpTokenType::Pipe
438 }
439 }
440 '^' => {
441 state.advance(1);
442 CSharpTokenType::Caret
443 }
444 '~' => {
445 state.advance(1);
446 CSharpTokenType::Tilde
447 }
448 _ => return false,
449 };
450
451 state.add_token(token_kind, start_pos, state.get_position());
452 true
453 }
454 else {
455 false
456 }
457 }
458
459 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
461 let start_pos = state.get_position();
462
463 if let Some(ch) = state.peek() {
464 let token_kind = match ch {
465 '(' => CSharpTokenType::LeftParen,
466 ')' => CSharpTokenType::RightParen,
467 '[' => CSharpTokenType::LeftBracket,
468 ']' => CSharpTokenType::RightBracket,
469 '{' => CSharpTokenType::LeftBrace,
470 '}' => CSharpTokenType::RightBrace,
471 ';' => CSharpTokenType::Semicolon,
472 ',' => CSharpTokenType::Comma,
473 '.' => CSharpTokenType::Dot,
474 ':' => CSharpTokenType::Colon,
475 '?' => CSharpTokenType::Question,
476 _ => return false,
477 };
478
479 state.advance(ch.len_utf8());
480 state.add_token(token_kind, start_pos, state.get_position());
481 true
482 }
483 else {
484 false
485 }
486 }
487
488 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), oak_core::OakError> {
489 while state.not_at_end() {
490 let safe_point = state.get_position();
491
492 if self.skip_whitespace(state) {
493 continue;
494 }
495
496 if self.lex_newline(state) {
497 continue;
498 }
499
500 if self.lex_comment(state) {
501 continue;
502 }
503
504 if self.lex_string(state) {
505 continue;
506 }
507
508 if self.lex_number(state) {
509 continue;
510 }
511
512 if self.lex_keyword_or_identifier(state) {
513 continue;
514 }
515
516 if self.lex_operator(state) {
517 continue;
518 }
519
520 if self.lex_delimiter(state) {
521 continue;
522 }
523
524 let start_pos = state.get_position();
526 if let Some(ch) = state.peek() {
527 state.advance(ch.len_utf8());
528 state.add_token(CSharpTokenType::Error, start_pos, state.get_position());
529 }
530
531 state.advance_if_dead_lock(safe_point)
532 }
533 Ok(())
534 }
535}
536
537impl<'config> Lexer<CSharpLanguage> for CSharpLexer<'config> {
538 fn lex<'a, S: Source + ?Sized>(&self, text: &S, _edits: &[TextEdit], mut cache: &'a mut impl LexerCache<CSharpLanguage>) -> LexOutput<CSharpLanguage> {
539 let mut state = LexerState::new(text);
540 let result = self.run(&mut state);
541 if result.is_ok() {
542 state.add_eof();
543 }
544 state.finish_with_cache(result, &mut cache)
545 }
546}