1use crate::{CSharpSyntaxKind, language::CSharpLanguage};
2use oak_core::{IncrementalCache, Lexer, LexerState, lexer::LexOutput, source::Source};
3
4type State<S> = LexerState<S, CSharpLanguage>;
5
6pub struct CSharpLexer;
7
8impl CSharpLexer {
9 pub fn new(_config: &CSharpLanguage) -> Self {
10 Self
11 }
12
13 fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
15 let start_pos = state.get_position();
16
17 while let Some(ch) = state.peek() {
18 if ch == ' ' || ch == '\t' {
19 state.advance(ch.len_utf8());
20 }
21 else {
22 break;
23 }
24 }
25
26 if state.get_position() > start_pos {
27 state.add_token(CSharpSyntaxKind::Whitespace, start_pos, state.get_position());
28 true
29 }
30 else {
31 false
32 }
33 }
34
35 fn lex_newline<S: Source>(&self, state: &mut State<S>) -> bool {
37 let start_pos = state.get_position();
38
39 if let Some('\n') = state.peek() {
40 state.advance(1);
41 state.add_token(CSharpSyntaxKind::Newline, start_pos, state.get_position());
42 true
43 }
44 else if let Some('\r') = state.peek() {
45 state.advance(1);
46 if let Some('\n') = state.peek() {
47 state.advance(1);
48 }
49 state.add_token(CSharpSyntaxKind::Newline, start_pos, state.get_position());
50 true
51 }
52 else {
53 false
54 }
55 }
56
57 fn lex_comment<S: Source>(&self, state: &mut State<S>) -> bool {
59 let start_pos = state.get_position();
60
61 if let Some('/') = state.peek() {
62 state.advance(1);
63 if let Some('/') = state.peek() {
64 state.advance(1);
66 while let Some(ch) = state.peek() {
67 if ch == '\n' || ch == '\r' {
68 break;
69 }
70 state.advance(ch.len_utf8());
71 }
72 state.add_token(CSharpSyntaxKind::Comment, start_pos, state.get_position());
73 return true;
74 }
75 else if let Some('*') = state.peek() {
76 state.advance(1);
78 while let Some(ch) = state.peek() {
79 if ch == '*' {
80 state.advance(1);
81 if let Some('/') = state.peek() {
82 state.advance(1);
83 break;
84 }
85 }
86 else {
87 state.advance(ch.len_utf8());
88 }
89 }
90 state.add_token(CSharpSyntaxKind::Comment, start_pos, state.get_position());
91 return true;
92 }
93 else {
94 state.set_position(start_pos);
96 return false;
97 }
98 }
99 false
100 }
101
102 fn lex_string<S: Source>(&self, state: &mut State<S>) -> bool {
104 let start_pos = state.get_position();
105
106 if let Some('"') = state.peek() {
107 state.advance(1);
108 while let Some(ch) = state.peek() {
109 if ch == '"' {
110 state.advance(1);
111 break;
112 }
113 else if ch == '\\' {
114 state.advance(1);
115 if let Some(_) = state.peek() {
116 state.advance(1);
117 }
118 }
119 else {
120 state.advance(ch.len_utf8());
121 }
122 }
123 state.add_token(CSharpSyntaxKind::StringLiteral, start_pos, state.get_position());
124 true
125 }
126 else if let Some('\'') = state.peek() {
127 state.advance(1);
129 while let Some(ch) = state.peek() {
130 if ch == '\'' {
131 state.advance(1);
132 break;
133 }
134 else if ch == '\\' {
135 state.advance(1);
136 if let Some(_) = state.peek() {
137 state.advance(1);
138 }
139 }
140 else {
141 state.advance(ch.len_utf8());
142 }
143 }
144 state.add_token(CSharpSyntaxKind::CharLiteral, start_pos, state.get_position());
145 true
146 }
147 else {
148 false
149 }
150 }
151
152 fn lex_number<S: Source>(&self, state: &mut State<S>) -> bool {
154 let start_pos = state.get_position();
155
156 if let Some(ch) = state.peek() {
157 if ch.is_ascii_digit() {
158 state.advance(ch.len_utf8());
159
160 while let Some(ch) = state.peek() {
161 if ch.is_ascii_digit() || ch == '.' || ch == '_' {
162 state.advance(ch.len_utf8());
163 }
164 else {
165 break;
166 }
167 }
168
169 if let Some(ch) = state.peek() {
171 if ch.is_ascii_alphabetic() {
172 state.advance(ch.len_utf8());
173 if let Some(ch2) = state.peek() {
174 if ch2.is_ascii_alphabetic() {
175 state.advance(ch2.len_utf8());
176 }
177 }
178 }
179 }
180
181 state.add_token(CSharpSyntaxKind::NumberLiteral, start_pos, state.get_position());
182 true
183 }
184 else {
185 false
186 }
187 }
188 else {
189 false
190 }
191 }
192
193 fn lex_keyword_or_identifier<S: Source>(&self, state: &mut State<S>) -> bool {
195 let start_pos = state.get_position();
196
197 if let Some(ch) = state.peek() {
198 if ch.is_ascii_alphabetic() || ch == '_' || ch == '@' {
199 state.advance(ch.len_utf8());
200
201 while let Some(ch) = state.peek() {
202 if ch.is_ascii_alphanumeric() || ch == '_' {
203 state.advance(ch.len_utf8());
204 }
205 else {
206 break;
207 }
208 }
209
210 let text = state.get_text_in((start_pos..state.get_position()).into());
211 let token_kind = match text {
212 "abstract" => CSharpSyntaxKind::Abstract,
214 "as" => CSharpSyntaxKind::As,
215 "base" => CSharpSyntaxKind::Base,
216 "bool" => CSharpSyntaxKind::Bool,
217 "break" => CSharpSyntaxKind::Break,
218 "byte" => CSharpSyntaxKind::Byte,
219 "case" => CSharpSyntaxKind::Case,
220 "catch" => CSharpSyntaxKind::Catch,
221 "char" => CSharpSyntaxKind::Char,
222 "checked" => CSharpSyntaxKind::Checked,
223 "class" => CSharpSyntaxKind::Class,
224 "const" => CSharpSyntaxKind::Const,
225 "continue" => CSharpSyntaxKind::Continue,
226 "decimal" => CSharpSyntaxKind::Decimal,
227 "default" => CSharpSyntaxKind::Default,
228 "delegate" => CSharpSyntaxKind::Delegate,
229 "do" => CSharpSyntaxKind::Do,
230 "double" => CSharpSyntaxKind::Double,
231 "else" => CSharpSyntaxKind::Else,
232 "enum" => CSharpSyntaxKind::Enum,
233 "event" => CSharpSyntaxKind::Event,
234 "explicit" => CSharpSyntaxKind::Explicit,
235 "extern" => CSharpSyntaxKind::Extern,
236 "false" => CSharpSyntaxKind::False,
237 "finally" => CSharpSyntaxKind::Finally,
238 "fixed" => CSharpSyntaxKind::Fixed,
239 "float" => CSharpSyntaxKind::Float,
240 "for" => CSharpSyntaxKind::For,
241 "foreach" => CSharpSyntaxKind::Foreach,
242 "goto" => CSharpSyntaxKind::Goto,
243 "if" => CSharpSyntaxKind::If,
244 "implicit" => CSharpSyntaxKind::Implicit,
245 "in" => CSharpSyntaxKind::In,
246 "int" => CSharpSyntaxKind::Int,
247 "interface" => CSharpSyntaxKind::Interface,
248 "internal" => CSharpSyntaxKind::Internal,
249 "is" => CSharpSyntaxKind::Is,
250 "lock" => CSharpSyntaxKind::Lock,
251 "long" => CSharpSyntaxKind::Long,
252 "namespace" => CSharpSyntaxKind::Namespace,
253 "new" => CSharpSyntaxKind::New,
254 "null" => CSharpSyntaxKind::Null,
255 "object" => CSharpSyntaxKind::Object,
256 "operator" => CSharpSyntaxKind::Operator,
257 "out" => CSharpSyntaxKind::Out,
258 "override" => CSharpSyntaxKind::Override,
259 "params" => CSharpSyntaxKind::Params,
260 "private" => CSharpSyntaxKind::Private,
261 "protected" => CSharpSyntaxKind::Protected,
262 "public" => CSharpSyntaxKind::Public,
263 "readonly" => CSharpSyntaxKind::Readonly,
264 "ref" => CSharpSyntaxKind::Ref,
265 "return" => CSharpSyntaxKind::Return,
266 "sbyte" => CSharpSyntaxKind::Sbyte,
267 "sealed" => CSharpSyntaxKind::Sealed,
268 "short" => CSharpSyntaxKind::Short,
269 "sizeof" => CSharpSyntaxKind::Sizeof,
270 "stackalloc" => CSharpSyntaxKind::Stackalloc,
271 "static" => CSharpSyntaxKind::Static,
272 "string" => CSharpSyntaxKind::String,
273 "struct" => CSharpSyntaxKind::Struct,
274 "switch" => CSharpSyntaxKind::Switch,
275 "this" => CSharpSyntaxKind::This,
276 "throw" => CSharpSyntaxKind::Throw,
277 "true" => CSharpSyntaxKind::True,
278 "try" => CSharpSyntaxKind::Try,
279 "typeof" => CSharpSyntaxKind::Typeof,
280 "uint" => CSharpSyntaxKind::Uint,
281 "ulong" => CSharpSyntaxKind::Ulong,
282 "unchecked" => CSharpSyntaxKind::Unchecked,
283 "unsafe" => CSharpSyntaxKind::Unsafe,
284 "ushort" => CSharpSyntaxKind::Ushort,
285 "using" => CSharpSyntaxKind::Using,
286 "virtual" => CSharpSyntaxKind::Virtual,
287 "void" => CSharpSyntaxKind::Void,
288 "volatile" => CSharpSyntaxKind::Volatile,
289 "while" => CSharpSyntaxKind::While,
290 _ => CSharpSyntaxKind::Identifier,
291 };
292
293 state.add_token(token_kind, start_pos, state.get_position());
294 true
295 }
296 else {
297 false
298 }
299 }
300 else {
301 false
302 }
303 }
304
305 fn lex_operator<S: Source>(&self, state: &mut State<S>) -> bool {
307 let start_pos = state.get_position();
308
309 if let Some(ch) = state.peek() {
310 let token_kind = match ch {
311 '+' => {
312 state.advance(1);
313 if let Some('=') = state.peek() {
314 state.advance(1);
315 CSharpSyntaxKind::PlusAssign
316 }
317 else if let Some('+') = state.peek() {
318 state.advance(1);
319 CSharpSyntaxKind::Increment
320 }
321 else {
322 CSharpSyntaxKind::Plus
323 }
324 }
325 '-' => {
326 state.advance(1);
327 if let Some('=') = state.peek() {
328 state.advance(1);
329 CSharpSyntaxKind::MinusAssign
330 }
331 else if let Some('-') = state.peek() {
332 state.advance(1);
333 CSharpSyntaxKind::Decrement
334 }
335 else {
336 CSharpSyntaxKind::Minus
337 }
338 }
339 '*' => {
340 state.advance(1);
341 if let Some('=') = state.peek() {
342 state.advance(1);
343 CSharpSyntaxKind::StarAssign
344 }
345 else {
346 CSharpSyntaxKind::Star
347 }
348 }
349 '/' => {
350 state.advance(1);
352 if let Some('=') = state.peek() {
353 state.advance(1);
354 CSharpSyntaxKind::SlashAssign
355 }
356 else {
357 CSharpSyntaxKind::Slash
358 }
359 }
360 '%' => {
361 state.advance(1);
362 if let Some('=') = state.peek() {
363 state.advance(1);
364 CSharpSyntaxKind::PercentAssign
365 }
366 else {
367 CSharpSyntaxKind::Percent
368 }
369 }
370 '=' => {
371 state.advance(1);
372 if let Some('=') = state.peek() {
373 state.advance(1);
374 CSharpSyntaxKind::Equal
375 }
376 else {
377 CSharpSyntaxKind::Assign
378 }
379 }
380 '!' => {
381 state.advance(1);
382 if let Some('=') = state.peek() {
383 state.advance(1);
384 CSharpSyntaxKind::NotEqual
385 }
386 else {
387 CSharpSyntaxKind::LogicalNot
388 }
389 }
390 '<' => {
391 state.advance(1);
392 if let Some('=') = state.peek() {
393 state.advance(1);
394 CSharpSyntaxKind::LessEqual
395 }
396 else if let Some('<') = state.peek() {
397 state.advance(1);
398 CSharpSyntaxKind::LeftShift
399 }
400 else {
401 CSharpSyntaxKind::Less
402 }
403 }
404 '>' => {
405 state.advance(1);
406 if let Some('=') = state.peek() {
407 state.advance(1);
408 CSharpSyntaxKind::GreaterEqual
409 }
410 else if let Some('>') = state.peek() {
411 state.advance(1);
412 CSharpSyntaxKind::RightShift
413 }
414 else {
415 CSharpSyntaxKind::Greater
416 }
417 }
418 '&' => {
419 state.advance(1);
420 if let Some('&') = state.peek() {
421 state.advance(1);
422 CSharpSyntaxKind::LogicalAnd
423 }
424 else {
425 CSharpSyntaxKind::Ampersand
426 }
427 }
428 '|' => {
429 state.advance(1);
430 if let Some('|') = state.peek() {
431 state.advance(1);
432 CSharpSyntaxKind::LogicalOr
433 }
434 else {
435 CSharpSyntaxKind::Pipe
436 }
437 }
438 '^' => {
439 state.advance(1);
440 CSharpSyntaxKind::Caret
441 }
442 '~' => {
443 state.advance(1);
444 CSharpSyntaxKind::Tilde
445 }
446 _ => return false,
447 };
448
449 state.add_token(token_kind, start_pos, state.get_position());
450 true
451 }
452 else {
453 false
454 }
455 }
456
457 fn lex_delimiter<S: Source>(&self, state: &mut State<S>) -> bool {
459 let start_pos = state.get_position();
460
461 if let Some(ch) = state.peek() {
462 let token_kind = match ch {
463 '(' => CSharpSyntaxKind::LeftParen,
464 ')' => CSharpSyntaxKind::RightParen,
465 '[' => CSharpSyntaxKind::LeftBracket,
466 ']' => CSharpSyntaxKind::RightBracket,
467 '{' => CSharpSyntaxKind::LeftBrace,
468 '}' => CSharpSyntaxKind::RightBrace,
469 ';' => CSharpSyntaxKind::Semicolon,
470 ',' => CSharpSyntaxKind::Comma,
471 '.' => CSharpSyntaxKind::Dot,
472 ':' => CSharpSyntaxKind::Colon,
473 '?' => CSharpSyntaxKind::Question,
474 _ => return false,
475 };
476
477 state.advance(ch.len_utf8());
478 state.add_token(token_kind, start_pos, state.get_position());
479 true
480 }
481 else {
482 false
483 }
484 }
485}
486
487impl Lexer<CSharpLanguage> for CSharpLexer {
488 fn lex_incremental(
489 &self,
490 source: impl Source,
491 _changed: usize,
492 _cache: IncrementalCache<CSharpLanguage>,
493 ) -> LexOutput<CSharpLanguage> {
494 let mut state = LexerState::new_with_cache(source, _changed, _cache);
495
496 while state.not_at_end() {
497 if self.skip_whitespace(&mut state) {
498 continue;
499 }
500
501 if self.lex_newline(&mut state) {
502 continue;
503 }
504
505 if self.lex_comment(&mut state) {
506 continue;
507 }
508
509 if self.lex_string(&mut state) {
510 continue;
511 }
512
513 if self.lex_number(&mut state) {
514 continue;
515 }
516
517 if self.lex_keyword_or_identifier(&mut state) {
518 continue;
519 }
520
521 if self.lex_operator(&mut state) {
522 continue;
523 }
524
525 if self.lex_delimiter(&mut state) {
526 continue;
527 }
528
529 let start_pos = state.get_position();
531 if let Some(ch) = state.peek() {
532 state.advance(ch.len_utf8());
533 state.add_token(CSharpSyntaxKind::Error, start_pos, state.get_position());
534 }
535 else {
536 break;
537 }
538 }
539
540 state.finish(Ok(()))
541 }
542}