1use crate::{kind::PowerShellSyntaxKind, language::PowerShellLanguage};
2use oak_core::{IncrementalCache, Lexer, LexerState, OakError, lexer::LexOutput, source::Source};
3
4type State<S> = LexerState<S, PowerShellLanguage>;
5
6#[derive(Clone)]
7pub struct PowerShellLexer<'config> {
8 config: &'config PowerShellLanguage,
9}
10
11impl<'config> PowerShellLexer<'config> {
12 pub fn new(config: &'config PowerShellLanguage) -> Self {
13 Self { config }
14 }
15
16 fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
17 while state.not_at_end() {
18 if self.skip_whitespace(state) {
19 continue;
20 }
21
22 if self.lex_newline(state) {
23 continue;
24 }
25
26 if self.lex_comment(state) {
27 continue;
28 }
29
30 if self.lex_string(state) {
31 continue;
32 }
33
34 if self.lex_number(state) {
35 continue;
36 }
37
38 if self.lex_variable(state) {
39 continue;
40 }
41
42 if self.lex_identifier_or_keyword(state) {
43 continue;
44 }
45
46 if self.lex_operators_and_punctuation(state) {
47 continue;
48 }
49
50 if let Some(ch) = state.peek() {
52 let start_pos = state.get_position();
53 state.advance(ch.len_utf8());
54 state.add_token(PowerShellSyntaxKind::Error, start_pos, state.get_position());
55 }
56 else {
57 break;
59 }
60 }
61
62 let pos = state.get_position();
64 state.add_token(PowerShellSyntaxKind::Eof, pos, pos);
65
66 Ok(())
67 }
68
69 fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
70 let start_pos = state.get_position();
71
72 while let Some(ch) = state.peek() {
73 if ch == ' ' || ch == '\t' {
74 state.advance(ch.len_utf8());
75 }
76 else {
77 break;
78 }
79 }
80
81 if state.get_position() > start_pos {
82 state.add_token(PowerShellSyntaxKind::Whitespace, start_pos, state.get_position());
83 true
84 }
85 else {
86 false
87 }
88 }
89
90 fn lex_newline<S: Source>(&self, state: &mut State<S>) -> bool {
91 let start_pos = state.get_position();
92
93 if let Some('\n') = state.peek() {
94 state.advance(1);
95 state.add_token(PowerShellSyntaxKind::Newline, start_pos, state.get_position());
96 true
97 }
98 else if let Some('\r') = state.peek() {
99 state.advance(1);
100 if let Some('\n') = state.peek() {
101 state.advance(1);
102 }
103 state.add_token(PowerShellSyntaxKind::Newline, start_pos, state.get_position());
104 true
105 }
106 else {
107 false
108 }
109 }
110
111 fn lex_comment<S: Source>(&self, state: &mut State<S>) -> bool {
112 let start_pos = state.get_position();
113
114 if let Some('#') = state.peek() {
115 state.advance(1);
116 while let Some(ch) = state.peek() {
118 if ch == '\n' || ch == '\r' {
119 break;
120 }
121 state.advance(ch.len_utf8());
122 }
123 state.add_token(PowerShellSyntaxKind::Comment, start_pos, state.get_position());
124 true
125 }
126 else if let Some('<') = state.peek() {
127 state.advance(1);
128 if let Some('#') = state.peek() {
129 state.advance(1);
130 let mut depth = 1;
132 while let Some(ch) = state.peek()
133 && depth > 0
134 {
135 if ch == '<' {
136 state.advance(1);
137 if let Some('#') = state.peek() {
138 state.advance(1);
139 depth += 1;
140 }
141 }
142 else if ch == '#' {
143 state.advance(1);
144 if let Some('>') = state.peek() {
145 state.advance(1);
146 depth -= 1;
147 }
148 }
149 else {
150 state.advance(ch.len_utf8());
151 }
152 }
153 state.add_token(PowerShellSyntaxKind::Comment, start_pos, state.get_position());
154 true
155 }
156 else {
157 state.set_position(start_pos);
159 false
160 }
161 }
162 else {
163 false
164 }
165 }
166
167 fn lex_string<S: Source>(&self, state: &mut State<S>) -> bool {
168 let start_pos = state.get_position();
169
170 if let Some(quote_char) = state.peek() {
171 if quote_char == '"' || quote_char == '\'' {
172 state.advance(1); let mut escaped = false;
175 while let Some(ch) = state.peek() {
176 if escaped {
177 escaped = false;
178 state.advance(ch.len_utf8());
179 }
180 else if ch == '`' {
181 escaped = true;
183 state.advance(1);
184 }
185 else if ch == quote_char {
186 state.advance(1); break;
188 }
189 else if ch == '\n' || ch == '\r' {
190 state.advance(ch.len_utf8());
192 }
193 else {
194 state.advance(ch.len_utf8());
195 }
196 }
197
198 state.add_token(PowerShellSyntaxKind::StringLiteral, start_pos, state.get_position());
199 true
200 }
201 else {
202 false
203 }
204 }
205 else {
206 false
207 }
208 }
209
210 fn lex_number<S: Source>(&self, state: &mut State<S>) -> bool {
211 if let Some(ch) = state.peek() {
212 if ch.is_ascii_digit() {
213 let start_pos = state.get_position();
214
215 while let Some(ch) = state.peek() {
217 if ch.is_ascii_digit() {
218 state.advance(1);
219 }
220 else {
221 break;
222 }
223 }
224
225 if let Some('.') = state.peek() {
227 state.advance(1);
228 while let Some(ch) = state.peek() {
230 if ch.is_ascii_digit() {
231 state.advance(1);
232 }
233 else {
234 break;
235 }
236 }
237 }
238
239 if let Some(ch) = state.peek() {
241 if ch == 'e' || ch == 'E' {
242 state.advance(1);
243 if let Some(ch) = state.peek() {
244 if ch == '+' || ch == '-' {
245 state.advance(1);
246 }
247 }
248 while let Some(ch) = state.peek() {
249 if ch.is_ascii_digit() {
250 state.advance(1);
251 }
252 else {
253 break;
254 }
255 }
256 }
257 }
258
259 state.add_token(PowerShellSyntaxKind::NumberLiteral, start_pos, state.get_position());
260 true
261 }
262 else {
263 false
264 }
265 }
266 else {
267 false
268 }
269 }
270
271 fn lex_variable<S: Source>(&self, state: &mut State<S>) -> bool {
272 let start_pos = state.get_position();
273
274 if let Some('$') = state.peek() {
275 state.advance(1);
276
277 if let Some(ch) = state.peek() {
279 if ch.is_alphabetic() || ch == '_' {
280 state.advance(ch.len_utf8());
281
282 while let Some(ch) = state.peek() {
284 if ch.is_alphanumeric() || ch == '_' {
285 state.advance(ch.len_utf8());
286 }
287 else {
288 break;
289 }
290 }
291
292 state.add_token(PowerShellSyntaxKind::Variable, start_pos, state.get_position());
293 true
294 }
295 else {
296 state.add_token(PowerShellSyntaxKind::Dollar, start_pos, state.get_position());
298 true
299 }
300 }
301 else {
302 state.add_token(PowerShellSyntaxKind::Dollar, start_pos, state.get_position());
303 true
304 }
305 }
306 else {
307 false
308 }
309 }
310
311 fn lex_identifier_or_keyword<S: Source>(&self, state: &mut State<S>) -> bool {
312 if let Some(ch) = state.peek() {
313 if ch.is_alphabetic() || ch == '_' {
314 let start_pos = state.get_position();
315 let mut text = String::new();
316
317 while let Some(ch) = state.peek() {
319 if ch.is_alphanumeric() || ch == '_' || ch == '-' {
320 text.push(ch);
321 state.advance(ch.len_utf8());
322 }
323 else {
324 break;
325 }
326 }
327
328 let kind = match text.as_str() {
330 "begin" => PowerShellSyntaxKind::Begin,
331 "break" => PowerShellSyntaxKind::Break,
332 "catch" => PowerShellSyntaxKind::Catch,
333 "class" => PowerShellSyntaxKind::Class,
334 "continue" => PowerShellSyntaxKind::Continue,
335 "data" => PowerShellSyntaxKind::Data,
336 "define" => PowerShellSyntaxKind::Define,
337 "do" => PowerShellSyntaxKind::Do,
338 "dynamicparam" => PowerShellSyntaxKind::DynamicParam,
339 "else" => PowerShellSyntaxKind::Else,
340 "elseif" => PowerShellSyntaxKind::ElseIf,
341 "end" => PowerShellSyntaxKind::End,
342 "exit" => PowerShellSyntaxKind::Exit,
343 "filter" => PowerShellSyntaxKind::Filter,
344 "finally" => PowerShellSyntaxKind::Finally,
345 "for" => PowerShellSyntaxKind::For,
346 "foreach" => PowerShellSyntaxKind::ForEach,
347 "from" => PowerShellSyntaxKind::From,
348 "function" => PowerShellSyntaxKind::Function,
349 "if" => PowerShellSyntaxKind::If,
350 "in" => PowerShellSyntaxKind::In,
351 "param" => PowerShellSyntaxKind::Param,
352 "process" => PowerShellSyntaxKind::Process,
353 "return" => PowerShellSyntaxKind::Return,
354 "switch" => PowerShellSyntaxKind::Switch,
355 "throw" => PowerShellSyntaxKind::Throw,
356 "trap" => PowerShellSyntaxKind::Trap,
357 "try" => PowerShellSyntaxKind::Try,
358 "until" => PowerShellSyntaxKind::Until,
359 "using" => PowerShellSyntaxKind::Using,
360 "var" => PowerShellSyntaxKind::Var,
361 "while" => PowerShellSyntaxKind::While,
362 "workflow" => PowerShellSyntaxKind::Workflow,
363 "true" => PowerShellSyntaxKind::BooleanLiteral,
364 "false" => PowerShellSyntaxKind::BooleanLiteral,
365 "null" => PowerShellSyntaxKind::NullLiteral,
366 _ => PowerShellSyntaxKind::Identifier,
367 };
368
369 state.add_token(kind, start_pos, state.get_position());
370 true
371 }
372 else {
373 false
374 }
375 }
376 else {
377 false
378 }
379 }
380
381 fn lex_operators_and_punctuation<S: Source>(&self, state: &mut State<S>) -> bool {
382 if let Some(ch) = state.peek() {
383 let start_pos = state.get_position();
384
385 let kind = match ch {
386 '+' => {
387 state.advance(1);
388 if let Some('+') = state.peek() {
389 state.advance(1);
390 PowerShellSyntaxKind::Plus
391 }
392 else if let Some('=') = state.peek() {
393 state.advance(1);
394 PowerShellSyntaxKind::Equal
395 }
396 else {
397 PowerShellSyntaxKind::Plus
398 }
399 }
400 '-' => {
401 state.advance(1);
402 if let Some('-') = state.peek() {
403 state.advance(1);
404 PowerShellSyntaxKind::Minus
405 }
406 else if let Some('=') = state.peek() {
407 state.advance(1);
408 PowerShellSyntaxKind::Equal
409 }
410 else {
411 PowerShellSyntaxKind::Minus
412 }
413 }
414 '*' => {
415 state.advance(1);
416 if let Some('=') = state.peek() {
417 state.advance(1);
418 PowerShellSyntaxKind::Equal
419 }
420 else {
421 PowerShellSyntaxKind::Multiply
422 }
423 }
424 '/' => {
425 state.advance(1);
426 if let Some('=') = state.peek() {
427 state.advance(1);
428 PowerShellSyntaxKind::Equal
429 }
430 else {
431 PowerShellSyntaxKind::Divide
432 }
433 }
434 '%' => {
435 state.advance(1);
436 if let Some('=') = state.peek() {
437 state.advance(1);
438 PowerShellSyntaxKind::Equal
439 }
440 else {
441 PowerShellSyntaxKind::Modulo
442 }
443 }
444 '=' => {
445 state.advance(1);
446 if let Some('=') = state.peek() {
447 state.advance(1);
448 PowerShellSyntaxKind::Equal
449 }
450 else {
451 PowerShellSyntaxKind::Equal
452 }
453 }
454 '!' => {
455 state.advance(1);
456 if let Some('=') = state.peek() {
457 state.advance(1);
458 PowerShellSyntaxKind::NotEqual
459 }
460 else {
461 PowerShellSyntaxKind::Exclamation
462 }
463 }
464 '<' => {
465 state.advance(1);
466 if let Some('=') = state.peek() {
467 state.advance(1);
468 PowerShellSyntaxKind::LessEqual
469 }
470 else {
471 PowerShellSyntaxKind::LessThan
472 }
473 }
474 '>' => {
475 state.advance(1);
476 if let Some('=') = state.peek() {
477 state.advance(1);
478 PowerShellSyntaxKind::GreaterEqual
479 }
480 else {
481 PowerShellSyntaxKind::GreaterThan
482 }
483 }
484 '&' => {
485 state.advance(1);
486 if let Some('&') = state.peek() {
487 state.advance(1);
488 PowerShellSyntaxKind::And
489 }
490 else {
491 PowerShellSyntaxKind::Ampersand
492 }
493 }
494 '|' => {
495 state.advance(1);
496 if let Some('|') = state.peek() {
497 state.advance(1);
498 PowerShellSyntaxKind::Or
499 }
500 else {
501 PowerShellSyntaxKind::Pipe
502 }
503 }
504 '^' => {
505 state.advance(1);
506 PowerShellSyntaxKind::Xor
507 }
508 '~' => {
509 state.advance(1);
510 PowerShellSyntaxKind::Not
511 }
512 '?' => {
513 state.advance(1);
514 PowerShellSyntaxKind::Question
515 }
516 ':' => {
517 state.advance(1);
518 if let Some(':') = state.peek() {
519 state.advance(1);
520 PowerShellSyntaxKind::DoubleColon
521 }
522 else {
523 PowerShellSyntaxKind::Colon
524 }
525 }
526 ';' => {
527 state.advance(1);
528 PowerShellSyntaxKind::Semicolon
529 }
530 ',' => {
531 state.advance(1);
532 PowerShellSyntaxKind::Comma
533 }
534 '.' => {
535 state.advance(1);
536 if let Some('.') = state.peek() {
537 state.advance(1);
538 PowerShellSyntaxKind::DotDot
539 }
540 else {
541 PowerShellSyntaxKind::Dot
542 }
543 }
544 '(' => {
545 state.advance(1);
546 PowerShellSyntaxKind::LeftParen
547 }
548 ')' => {
549 state.advance(1);
550 PowerShellSyntaxKind::RightParen
551 }
552 '[' => {
553 state.advance(1);
554 PowerShellSyntaxKind::LeftBracket
555 }
556 ']' => {
557 state.advance(1);
558 PowerShellSyntaxKind::RightBracket
559 }
560 '{' => {
561 state.advance(1);
562 PowerShellSyntaxKind::LeftBrace
563 }
564 '}' => {
565 state.advance(1);
566 PowerShellSyntaxKind::RightBrace
567 }
568 '@' => {
569 state.advance(1);
570 PowerShellSyntaxKind::At
571 }
572 '`' => {
573 state.advance(1);
574 PowerShellSyntaxKind::Backtick
575 }
576 _ => return false,
577 };
578
579 state.add_token(kind, start_pos, state.get_position());
580 true
581 }
582 else {
583 false
584 }
585 }
586}
587
588impl<'config> Lexer<PowerShellLanguage> for PowerShellLexer<'config> {
589 fn lex_incremental(
590 &self,
591 source: impl Source,
592 _changed: usize,
593 _cache: IncrementalCache<PowerShellLanguage>,
594 ) -> LexOutput<PowerShellLanguage> {
595 let mut state = LexerState::new_with_cache(source, _changed, _cache);
596 let result = self.run(&mut state);
597 state.finish(result)
598 }
599}