1use crate::{kind::PowerShellSyntaxKind, language::PowerShellLanguage};
2use oak_core::{
3 Lexer, LexerCache, LexerState, OakError,
4 lexer::LexOutput,
5 source::{Source, TextEdit},
6};
7
8type State<'a, S> = LexerState<'a, S, PowerShellLanguage>;
9
10#[derive(Clone)]
11pub struct PowerShellLexer<'config> {
12 _config: &'config PowerShellLanguage,
13}
14
15impl<'config> PowerShellLexer<'config> {
16 pub fn new(config: &'config PowerShellLanguage) -> Self {
17 Self { _config: config }
18 }
19
20 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
21 while state.not_at_end() {
22 if self.skip_whitespace(state) {
23 continue;
24 }
25
26 if self.lex_newline(state) {
27 continue;
28 }
29
30 if self.lex_comment(state) {
31 continue;
32 }
33
34 if self.lex_string(state) {
35 continue;
36 }
37
38 if self.lex_number(state) {
39 continue;
40 }
41
42 if self.lex_variable(state) {
43 continue;
44 }
45
46 if self.lex_identifier_or_keyword(state) {
47 continue;
48 }
49
50 if self.lex_operators_and_punctuation(state) {
51 continue;
52 }
53
54 if let Some(ch) = state.peek() {
56 let start_pos = state.get_position();
57 state.advance(ch.len_utf8());
58 state.add_token(PowerShellSyntaxKind::Error, start_pos, state.get_position());
59 }
60 else {
61 break;
63 }
64 }
65
66 let pos = state.get_position();
68 state.add_token(PowerShellSyntaxKind::Eof, pos, pos);
69
70 Ok(())
71 }
72
73 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
74 let start_pos = state.get_position();
75
76 while let Some(ch) = state.peek() {
77 if ch == ' ' || ch == '\t' {
78 state.advance(ch.len_utf8());
79 }
80 else {
81 break;
82 }
83 }
84
85 if state.get_position() > start_pos {
86 state.add_token(PowerShellSyntaxKind::Whitespace, start_pos, state.get_position());
87 true
88 }
89 else {
90 false
91 }
92 }
93
94 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
95 let start_pos = state.get_position();
96
97 if let Some('\n') = state.peek() {
98 state.advance(1);
99 state.add_token(PowerShellSyntaxKind::Newline, start_pos, state.get_position());
100 true
101 }
102 else if let Some('\r') = state.peek() {
103 state.advance(1);
104 if let Some('\n') = state.peek() {
105 state.advance(1);
106 }
107 state.add_token(PowerShellSyntaxKind::Newline, start_pos, state.get_position());
108 true
109 }
110 else {
111 false
112 }
113 }
114
115 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
116 let start_pos = state.get_position();
117
118 if let Some('#') = state.peek() {
119 state.advance(1);
120 while let Some(ch) = state.peek() {
122 if ch == '\n' || ch == '\r' {
123 break;
124 }
125 state.advance(ch.len_utf8());
126 }
127 state.add_token(PowerShellSyntaxKind::Comment, start_pos, state.get_position());
128 true
129 }
130 else if let Some('<') = state.peek() {
131 state.advance(1);
132 if let Some('#') = state.peek() {
133 state.advance(1);
134 let mut depth = 1;
136 while let Some(ch) = state.peek() {
137 if depth == 0 {
138 break;
139 }
140 if ch == '<' {
141 state.advance(1);
142 if let Some('#') = state.peek() {
143 state.advance(1);
144 depth += 1;
145 }
146 }
147 else if ch == '#' {
148 state.advance(1);
149 if let Some('>') = state.peek() {
150 state.advance(1);
151 depth -= 1;
152 }
153 }
154 else {
155 state.advance(ch.len_utf8());
156 }
157 }
158 state.add_token(PowerShellSyntaxKind::Comment, start_pos, state.get_position());
159 true
160 }
161 else {
162 state.set_position(start_pos);
164 false
165 }
166 }
167 else {
168 false
169 }
170 }
171
172 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
173 let start_pos = state.get_position();
174
175 if let Some(quote_char) = state.peek() {
176 if quote_char == '"' || quote_char == '\'' {
177 state.advance(1); let mut escaped = false;
180 while let Some(ch) = state.peek() {
181 if escaped {
182 escaped = false;
183 state.advance(ch.len_utf8());
184 }
185 else if ch == '`' {
186 escaped = true;
188 state.advance(1);
189 }
190 else if ch == quote_char {
191 state.advance(1); break;
193 }
194 else if ch == '\n' || ch == '\r' {
195 state.advance(ch.len_utf8());
197 }
198 else {
199 state.advance(ch.len_utf8());
200 }
201 }
202
203 state.add_token(PowerShellSyntaxKind::StringLiteral, start_pos, state.get_position());
204 true
205 }
206 else {
207 false
208 }
209 }
210 else {
211 false
212 }
213 }
214
215 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
216 if let Some(ch) = state.peek() {
217 if ch.is_ascii_digit() {
218 let start_pos = state.get_position();
219
220 while let Some(ch) = state.peek() {
222 if ch.is_ascii_digit() {
223 state.advance(1);
224 }
225 else {
226 break;
227 }
228 }
229
230 if let Some('.') = state.peek() {
232 state.advance(1);
233 while let Some(ch) = state.peek() {
235 if ch.is_ascii_digit() {
236 state.advance(1);
237 }
238 else {
239 break;
240 }
241 }
242 }
243
244 if let Some(ch) = state.peek() {
246 if ch == 'e' || ch == 'E' {
247 state.advance(1);
248 if let Some(ch) = state.peek() {
249 if ch == '+' || ch == '-' {
250 state.advance(1);
251 }
252 }
253 while let Some(ch) = state.peek() {
254 if ch.is_ascii_digit() {
255 state.advance(1);
256 }
257 else {
258 break;
259 }
260 }
261 }
262 }
263
264 state.add_token(PowerShellSyntaxKind::NumberLiteral, start_pos, state.get_position());
265 true
266 }
267 else {
268 false
269 }
270 }
271 else {
272 false
273 }
274 }
275
276 fn lex_variable<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
277 let start_pos = state.get_position();
278
279 if let Some('$') = state.peek() {
280 state.advance(1);
281
282 if let Some(ch) = state.peek() {
284 if ch.is_alphabetic() || ch == '_' {
285 state.advance(ch.len_utf8());
286
287 while let Some(ch) = state.peek() {
289 if ch.is_alphanumeric() || ch == '_' {
290 state.advance(ch.len_utf8());
291 }
292 else {
293 break;
294 }
295 }
296
297 state.add_token(PowerShellSyntaxKind::Variable, start_pos, state.get_position());
298 true
299 }
300 else {
301 state.add_token(PowerShellSyntaxKind::Dollar, start_pos, state.get_position());
303 true
304 }
305 }
306 else {
307 state.add_token(PowerShellSyntaxKind::Dollar, start_pos, state.get_position());
308 true
309 }
310 }
311 else {
312 false
313 }
314 }
315
316 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
317 if let Some(ch) = state.peek() {
318 if ch.is_alphabetic() || ch == '_' {
319 let start_pos = state.get_position();
320 let mut text = String::new();
321
322 while let Some(ch) = state.peek() {
324 if ch.is_alphanumeric() || ch == '_' || ch == '-' {
325 text.push(ch);
326 state.advance(ch.len_utf8());
327 }
328 else {
329 break;
330 }
331 }
332
333 let kind = match text.as_str() {
335 "begin" => PowerShellSyntaxKind::Begin,
336 "break" => PowerShellSyntaxKind::Break,
337 "catch" => PowerShellSyntaxKind::Catch,
338 "class" => PowerShellSyntaxKind::Class,
339 "continue" => PowerShellSyntaxKind::Continue,
340 "data" => PowerShellSyntaxKind::Data,
341 "define" => PowerShellSyntaxKind::Define,
342 "do" => PowerShellSyntaxKind::Do,
343 "dynamicparam" => PowerShellSyntaxKind::DynamicParam,
344 "else" => PowerShellSyntaxKind::Else,
345 "elseif" => PowerShellSyntaxKind::ElseIf,
346 "end" => PowerShellSyntaxKind::End,
347 "exit" => PowerShellSyntaxKind::Exit,
348 "filter" => PowerShellSyntaxKind::Filter,
349 "finally" => PowerShellSyntaxKind::Finally,
350 "for" => PowerShellSyntaxKind::For,
351 "foreach" => PowerShellSyntaxKind::ForEach,
352 "from" => PowerShellSyntaxKind::From,
353 "function" => PowerShellSyntaxKind::Function,
354 "if" => PowerShellSyntaxKind::If,
355 "in" => PowerShellSyntaxKind::In,
356 "param" => PowerShellSyntaxKind::Param,
357 "process" => PowerShellSyntaxKind::Process,
358 "return" => PowerShellSyntaxKind::Return,
359 "switch" => PowerShellSyntaxKind::Switch,
360 "throw" => PowerShellSyntaxKind::Throw,
361 "trap" => PowerShellSyntaxKind::Trap,
362 "try" => PowerShellSyntaxKind::Try,
363 "until" => PowerShellSyntaxKind::Until,
364 "using" => PowerShellSyntaxKind::Using,
365 "var" => PowerShellSyntaxKind::Var,
366 "while" => PowerShellSyntaxKind::While,
367 "workflow" => PowerShellSyntaxKind::Workflow,
368 "true" => PowerShellSyntaxKind::BooleanLiteral,
369 "false" => PowerShellSyntaxKind::BooleanLiteral,
370 "null" => PowerShellSyntaxKind::NullLiteral,
371 _ => PowerShellSyntaxKind::Identifier,
372 };
373
374 state.add_token(kind, start_pos, state.get_position());
375 true
376 }
377 else {
378 false
379 }
380 }
381 else {
382 false
383 }
384 }
385
386 fn lex_operators_and_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
387 if let Some(ch) = state.peek() {
388 let start_pos = state.get_position();
389
390 let kind = match ch {
391 '+' => {
392 state.advance(1);
393 if let Some('+') = state.peek() {
394 state.advance(1);
395 PowerShellSyntaxKind::Plus
396 }
397 else if let Some('=') = state.peek() {
398 state.advance(1);
399 PowerShellSyntaxKind::Equal
400 }
401 else {
402 PowerShellSyntaxKind::Plus
403 }
404 }
405 '-' => {
406 state.advance(1);
407 if let Some('-') = state.peek() {
408 state.advance(1);
409 PowerShellSyntaxKind::Minus
410 }
411 else if let Some('=') = state.peek() {
412 state.advance(1);
413 PowerShellSyntaxKind::Equal
414 }
415 else {
416 PowerShellSyntaxKind::Minus
417 }
418 }
419 '*' => {
420 state.advance(1);
421 if let Some('=') = state.peek() {
422 state.advance(1);
423 PowerShellSyntaxKind::Equal
424 }
425 else {
426 PowerShellSyntaxKind::Multiply
427 }
428 }
429 '/' => {
430 state.advance(1);
431 if let Some('=') = state.peek() {
432 state.advance(1);
433 PowerShellSyntaxKind::Equal
434 }
435 else {
436 PowerShellSyntaxKind::Divide
437 }
438 }
439 '%' => {
440 state.advance(1);
441 if let Some('=') = state.peek() {
442 state.advance(1);
443 PowerShellSyntaxKind::Equal
444 }
445 else {
446 PowerShellSyntaxKind::Modulo
447 }
448 }
449 '=' => {
450 state.advance(1);
451 if let Some('=') = state.peek() {
452 state.advance(1);
453 PowerShellSyntaxKind::Equal
454 }
455 else {
456 PowerShellSyntaxKind::Equal
457 }
458 }
459 '!' => {
460 state.advance(1);
461 if let Some('=') = state.peek() {
462 state.advance(1);
463 PowerShellSyntaxKind::NotEqual
464 }
465 else {
466 PowerShellSyntaxKind::Exclamation
467 }
468 }
469 '<' => {
470 state.advance(1);
471 if let Some('=') = state.peek() {
472 state.advance(1);
473 PowerShellSyntaxKind::LessEqual
474 }
475 else {
476 PowerShellSyntaxKind::LessThan
477 }
478 }
479 '>' => {
480 state.advance(1);
481 if let Some('=') = state.peek() {
482 state.advance(1);
483 PowerShellSyntaxKind::GreaterEqual
484 }
485 else {
486 PowerShellSyntaxKind::GreaterThan
487 }
488 }
489 '&' => {
490 state.advance(1);
491 if let Some('&') = state.peek() {
492 state.advance(1);
493 PowerShellSyntaxKind::And
494 }
495 else {
496 PowerShellSyntaxKind::Ampersand
497 }
498 }
499 '|' => {
500 state.advance(1);
501 if let Some('|') = state.peek() {
502 state.advance(1);
503 PowerShellSyntaxKind::Or
504 }
505 else {
506 PowerShellSyntaxKind::Pipe
507 }
508 }
509 '^' => {
510 state.advance(1);
511 PowerShellSyntaxKind::Xor
512 }
513 '~' => {
514 state.advance(1);
515 PowerShellSyntaxKind::Not
516 }
517 '?' => {
518 state.advance(1);
519 PowerShellSyntaxKind::Question
520 }
521 ':' => {
522 state.advance(1);
523 if let Some(':') = state.peek() {
524 state.advance(1);
525 PowerShellSyntaxKind::DoubleColon
526 }
527 else {
528 PowerShellSyntaxKind::Colon
529 }
530 }
531 ';' => {
532 state.advance(1);
533 PowerShellSyntaxKind::Semicolon
534 }
535 ',' => {
536 state.advance(1);
537 PowerShellSyntaxKind::Comma
538 }
539 '.' => {
540 state.advance(1);
541 if let Some('.') = state.peek() {
542 state.advance(1);
543 PowerShellSyntaxKind::DotDot
544 }
545 else {
546 PowerShellSyntaxKind::Dot
547 }
548 }
549 '(' => {
550 state.advance(1);
551 PowerShellSyntaxKind::LeftParen
552 }
553 ')' => {
554 state.advance(1);
555 PowerShellSyntaxKind::RightParen
556 }
557 '[' => {
558 state.advance(1);
559 PowerShellSyntaxKind::LeftBracket
560 }
561 ']' => {
562 state.advance(1);
563 PowerShellSyntaxKind::RightBracket
564 }
565 '{' => {
566 state.advance(1);
567 PowerShellSyntaxKind::LeftBrace
568 }
569 '}' => {
570 state.advance(1);
571 PowerShellSyntaxKind::RightBrace
572 }
573 '@' => {
574 state.advance(1);
575 PowerShellSyntaxKind::At
576 }
577 '`' => {
578 state.advance(1);
579 PowerShellSyntaxKind::Backtick
580 }
581 _ => return false,
582 };
583
584 state.add_token(kind, start_pos, state.get_position());
585 true
586 }
587 else {
588 false
589 }
590 }
591}
592
593impl<'config> Lexer<PowerShellLanguage> for PowerShellLexer<'config> {
594 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<PowerShellLanguage>) -> LexOutput<PowerShellLanguage> {
595 let mut state = LexerState::new(source);
596 let result = self.run(&mut state);
597 state.finish_with_cache(result, cache)
598 }
599}