1#![doc = include_str!("readme.md")]
2pub mod token_type;
4
5use crate::{language::PowerShellLanguage, lexer::token_type::PowerShellTokenType};
6use oak_core::{
7 Lexer, LexerCache, LexerState, OakError,
8 lexer::LexOutput,
9 source::{Source, TextEdit},
10};
11
12pub(crate) type State<'a, S> = LexerState<'a, S, PowerShellLanguage>;
13
14#[derive(Clone)]
16pub struct PowerShellLexer<'config> {
17 pub config: &'config PowerShellLanguage,
19}
20
21impl<'config> PowerShellLexer<'config> {
22 pub fn new(config: &'config PowerShellLanguage) -> Self {
24 Self { config }
25 }
26
27 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
28 while state.not_at_end() {
29 if self.skip_whitespace(state) {
30 continue;
31 }
32
33 if self.lex_newline(state) {
34 continue;
35 }
36
37 if self.lex_comment(state) {
38 continue;
39 }
40
41 if self.lex_string(state) {
42 continue;
43 }
44
45 if self.lex_number(state) {
46 continue;
47 }
48
49 if self.lex_variable(state) {
50 continue;
51 }
52
53 if self.lex_identifier_or_keyword(state) {
54 continue;
55 }
56
57 if self.lex_operators_and_punctuation(state) {
58 continue;
59 }
60
61 if let Some(ch) = state.peek() {
63 let start_pos = state.get_position();
64 state.advance(ch.len_utf8());
65 state.add_token(PowerShellTokenType::Error, start_pos, state.get_position());
66 }
67 else {
68 break;
70 }
71 }
72
73 let pos = state.get_position();
75 state.add_token(PowerShellTokenType::Eof, pos, pos);
76
77 Ok(())
78 }
79
80 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
81 let start_pos = state.get_position();
82
83 while let Some(ch) = state.peek() {
84 if ch == ' ' || ch == '\t' {
85 state.advance(ch.len_utf8());
86 }
87 else {
88 break;
89 }
90 }
91
92 if state.get_position() > start_pos {
93 state.add_token(PowerShellTokenType::Whitespace, start_pos, state.get_position());
94 true
95 }
96 else {
97 false
98 }
99 }
100
101 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
102 let start_pos = state.get_position();
103
104 if let Some('\n') = state.peek() {
105 state.advance(1);
106 state.add_token(PowerShellTokenType::Newline, start_pos, state.get_position());
107 true
108 }
109 else if let Some('\r') = state.peek() {
110 state.advance(1);
111 if let Some('\n') = state.peek() {
112 state.advance(1);
113 }
114 state.add_token(PowerShellTokenType::Newline, start_pos, state.get_position());
115 true
116 }
117 else {
118 false
119 }
120 }
121
122 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
123 let start_pos = state.get_position();
124
125 if let Some('#') = state.peek() {
126 state.advance(1);
127 while let Some(ch) = state.peek() {
129 if ch == '\n' || ch == '\r' {
130 break;
131 }
132 state.advance(ch.len_utf8());
133 }
134 state.add_token(PowerShellTokenType::Comment, start_pos, state.get_position());
135 true
136 }
137 else if let Some('<') = state.peek() {
138 state.advance(1);
139 if let Some('#') = state.peek() {
140 state.advance(1);
141 let mut depth = 1;
143 while let Some(ch) = state.peek() {
144 if depth == 0 {
145 break;
146 }
147 if ch == '<' {
148 state.advance(1);
149 if let Some('#') = state.peek() {
150 state.advance(1);
151 depth += 1;
152 }
153 }
154 else if ch == '#' {
155 state.advance(1);
156 if let Some('>') = state.peek() {
157 state.advance(1);
158 depth -= 1;
159 }
160 }
161 else {
162 state.advance(ch.len_utf8());
163 }
164 }
165 state.add_token(PowerShellTokenType::Comment, start_pos, state.get_position());
166 true
167 }
168 else {
169 state.set_position(start_pos);
171 false
172 }
173 }
174 else {
175 false
176 }
177 }
178
179 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
180 let start_pos = state.get_position();
181
182 if let Some(quote_char) = state.peek() {
183 if quote_char == '"' || quote_char == '\'' {
184 state.advance(1); let mut escaped = false;
187 while let Some(ch) = state.peek() {
188 if escaped {
189 escaped = false;
190 state.advance(ch.len_utf8());
191 }
192 else if ch == '`' {
193 escaped = true;
195 state.advance(1);
196 }
197 else if ch == quote_char {
198 state.advance(1); break;
200 }
201 else if ch == '\n' || ch == '\r' {
202 state.advance(ch.len_utf8());
204 }
205 else {
206 state.advance(ch.len_utf8());
207 }
208 }
209
210 state.add_token(PowerShellTokenType::StringLiteral, start_pos, state.get_position());
211 true
212 }
213 else {
214 false
215 }
216 }
217 else {
218 false
219 }
220 }
221
222 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
223 if let Some(ch) = state.peek() {
224 if ch.is_ascii_digit() {
225 let start_pos = state.get_position();
226
227 while let Some(ch) = state.peek() {
229 if ch.is_ascii_digit() {
230 state.advance(1);
231 }
232 else {
233 break;
234 }
235 }
236
237 if let Some('.') = state.peek() {
239 state.advance(1);
240 while let Some(ch) = state.peek() {
242 if ch.is_ascii_digit() {
243 state.advance(1);
244 }
245 else {
246 break;
247 }
248 }
249 }
250
251 if let Some(ch) = state.peek() {
253 if ch == 'e' || ch == 'E' {
254 state.advance(1);
255 if let Some(ch) = state.peek() {
256 if ch == '+' || ch == '-' {
257 state.advance(1);
258 }
259 }
260 while let Some(ch) = state.peek() {
261 if ch.is_ascii_digit() {
262 state.advance(1);
263 }
264 else {
265 break;
266 }
267 }
268 }
269 }
270
271 state.add_token(PowerShellTokenType::NumberLiteral, start_pos, state.get_position());
272 true
273 }
274 else {
275 false
276 }
277 }
278 else {
279 false
280 }
281 }
282
283 fn lex_variable<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
284 let start_pos = state.get_position();
285
286 if let Some('$') = state.peek() {
287 state.advance(1);
288
289 if let Some(ch) = state.peek() {
291 if ch.is_alphabetic() || ch == '_' {
292 state.advance(ch.len_utf8());
293
294 while let Some(ch) = state.peek() {
296 if ch.is_alphanumeric() || ch == '_' {
297 state.advance(ch.len_utf8());
298 }
299 else {
300 break;
301 }
302 }
303
304 state.add_token(PowerShellTokenType::Variable, start_pos, state.get_position());
305 true
306 }
307 else {
308 state.add_token(PowerShellTokenType::Dollar, start_pos, state.get_position());
310 true
311 }
312 }
313 else {
314 state.add_token(PowerShellTokenType::Dollar, start_pos, state.get_position());
315 true
316 }
317 }
318 else {
319 false
320 }
321 }
322
323 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
324 if let Some(ch) = state.peek() {
325 if ch.is_alphabetic() || ch == '_' {
326 let start_pos = state.get_position();
327 let mut text = String::new();
328
329 while let Some(ch) = state.peek() {
331 if ch.is_alphanumeric() || ch == '_' || ch == '-' {
332 text.push(ch);
333 state.advance(ch.len_utf8());
334 }
335 else {
336 break;
337 }
338 }
339
340 let kind = match text.as_str() {
342 "begin" => PowerShellTokenType::Begin,
343 "break" => PowerShellTokenType::Break,
344 "catch" => PowerShellTokenType::Catch,
345 "class" => PowerShellTokenType::Class,
346 "continue" => PowerShellTokenType::Continue,
347 "data" => PowerShellTokenType::Data,
348 "define" => PowerShellTokenType::Define,
349 "do" => PowerShellTokenType::Do,
350 "dynamicparam" => PowerShellTokenType::DynamicParam,
351 "else" => PowerShellTokenType::Else,
352 "elseif" => PowerShellTokenType::ElseIf,
353 "end" => PowerShellTokenType::End,
354 "exit" => PowerShellTokenType::Exit,
355 "filter" => PowerShellTokenType::Filter,
356 "finally" => PowerShellTokenType::Finally,
357 "for" => PowerShellTokenType::For,
358 "foreach" => PowerShellTokenType::ForEach,
359 "from" => PowerShellTokenType::From,
360 "function" => PowerShellTokenType::Function,
361 "if" => PowerShellTokenType::If,
362 "in" => PowerShellTokenType::In,
363 "param" => PowerShellTokenType::Param,
364 "process" => PowerShellTokenType::Process,
365 "return" => PowerShellTokenType::Return,
366 "switch" => PowerShellTokenType::Switch,
367 "throw" => PowerShellTokenType::Throw,
368 "trap" => PowerShellTokenType::Trap,
369 "try" => PowerShellTokenType::Try,
370 "until" => PowerShellTokenType::Until,
371 "using" => PowerShellTokenType::Using,
372 "var" => PowerShellTokenType::Var,
373 "while" => PowerShellTokenType::While,
374 "workflow" => PowerShellTokenType::Workflow,
375 "true" => PowerShellTokenType::BooleanLiteral,
376 "false" => PowerShellTokenType::BooleanLiteral,
377 "null" => PowerShellTokenType::NullLiteral,
378 _ => PowerShellTokenType::Identifier,
379 };
380
381 state.add_token(kind, start_pos, state.get_position());
382 true
383 }
384 else {
385 false
386 }
387 }
388 else {
389 false
390 }
391 }
392
393 fn lex_operators_and_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
394 if let Some(ch) = state.peek() {
395 let start_pos = state.get_position();
396
397 let kind = match ch {
398 '+' => {
399 state.advance(1);
400 if let Some('+') = state.peek() {
401 state.advance(1);
402 PowerShellTokenType::Plus
403 }
404 else if let Some('=') = state.peek() {
405 state.advance(1);
406 PowerShellTokenType::Equal
407 }
408 else {
409 PowerShellTokenType::Plus
410 }
411 }
412 '-' => {
413 state.advance(1);
414 if let Some('-') = state.peek() {
415 state.advance(1);
416 PowerShellTokenType::Minus
417 }
418 else if let Some('=') = state.peek() {
419 state.advance(1);
420 PowerShellTokenType::Equal
421 }
422 else {
423 PowerShellTokenType::Minus
424 }
425 }
426 '*' => {
427 state.advance(1);
428 if let Some('=') = state.peek() {
429 state.advance(1);
430 PowerShellTokenType::Equal
431 }
432 else {
433 PowerShellTokenType::Multiply
434 }
435 }
436 '/' => {
437 state.advance(1);
438 if let Some('=') = state.peek() {
439 state.advance(1);
440 PowerShellTokenType::Equal
441 }
442 else {
443 PowerShellTokenType::Divide
444 }
445 }
446 '%' => {
447 state.advance(1);
448 if let Some('=') = state.peek() {
449 state.advance(1);
450 PowerShellTokenType::Equal
451 }
452 else {
453 PowerShellTokenType::Modulo
454 }
455 }
456 '=' => {
457 state.advance(1);
458 if let Some('=') = state.peek() {
459 state.advance(1);
460 PowerShellTokenType::Equal
461 }
462 else {
463 PowerShellTokenType::Equal
464 }
465 }
466 '!' => {
467 state.advance(1);
468 if let Some('=') = state.peek() {
469 state.advance(1);
470 PowerShellTokenType::NotEqual
471 }
472 else {
473 PowerShellTokenType::Exclamation
474 }
475 }
476 '<' => {
477 state.advance(1);
478 if let Some('=') = state.peek() {
479 state.advance(1);
480 PowerShellTokenType::LessEqual
481 }
482 else {
483 PowerShellTokenType::LessThan
484 }
485 }
486 '>' => {
487 state.advance(1);
488 if let Some('=') = state.peek() {
489 state.advance(1);
490 PowerShellTokenType::GreaterEqual
491 }
492 else {
493 PowerShellTokenType::GreaterThan
494 }
495 }
496 '&' => {
497 state.advance(1);
498 if let Some('&') = state.peek() {
499 state.advance(1);
500 PowerShellTokenType::And
501 }
502 else {
503 PowerShellTokenType::Ampersand
504 }
505 }
506 '|' => {
507 state.advance(1);
508 if let Some('|') = state.peek() {
509 state.advance(1);
510 PowerShellTokenType::Or
511 }
512 else {
513 PowerShellTokenType::Pipe
514 }
515 }
516 '^' => {
517 state.advance(1);
518 PowerShellTokenType::Xor
519 }
520 '~' => {
521 state.advance(1);
522 PowerShellTokenType::Not
523 }
524 '?' => {
525 state.advance(1);
526 PowerShellTokenType::Question
527 }
528 ':' => {
529 state.advance(1);
530 if let Some(':') = state.peek() {
531 state.advance(1);
532 PowerShellTokenType::DoubleColon
533 }
534 else {
535 PowerShellTokenType::Colon
536 }
537 }
538 ';' => {
539 state.advance(1);
540 PowerShellTokenType::Semicolon
541 }
542 ',' => {
543 state.advance(1);
544 PowerShellTokenType::Comma
545 }
546 '.' => {
547 state.advance(1);
548 if let Some('.') = state.peek() {
549 state.advance(1);
550 PowerShellTokenType::DotDot
551 }
552 else {
553 PowerShellTokenType::Dot
554 }
555 }
556 '(' => {
557 state.advance(1);
558 PowerShellTokenType::LeftParen
559 }
560 ')' => {
561 state.advance(1);
562 PowerShellTokenType::RightParen
563 }
564 '[' => {
565 state.advance(1);
566 PowerShellTokenType::LeftBracket
567 }
568 ']' => {
569 state.advance(1);
570 PowerShellTokenType::RightBracket
571 }
572 '{' => {
573 state.advance(1);
574 PowerShellTokenType::LeftBrace
575 }
576 '}' => {
577 state.advance(1);
578 PowerShellTokenType::RightBrace
579 }
580 '@' => {
581 state.advance(1);
582 PowerShellTokenType::At
583 }
584 '`' => {
585 state.advance(1);
586 PowerShellTokenType::Backtick
587 }
588 _ => return false,
589 };
590
591 state.add_token(kind, start_pos, state.get_position());
592 true
593 }
594 else {
595 false
596 }
597 }
598}
599
600impl<'config> Lexer<PowerShellLanguage> for PowerShellLexer<'config> {
601 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<PowerShellLanguage>) -> LexOutput<PowerShellLanguage> {
602 let mut state = LexerState::new(source);
603 let result = self.run(&mut state);
604 state.finish_with_cache(result, cache)
605 }
606}