1use crate::{kind::PhpSyntaxKind, language::PhpLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
3
4type State<'s, S> = LexerState<'s, S, PhpLanguage>;
5
6#[derive(Clone, Debug)]
7pub struct PhpLexer<'config> {
8 _config: &'config PhpLanguage,
9}
10
11impl<'config> Lexer<PhpLanguage> for PhpLexer<'config> {
12 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<PhpLanguage>) -> LexOutput<PhpLanguage> {
13 let mut state = State::new_with_cache(source, 0, cache);
14 let result = self.run(&mut state);
15 if result.is_ok() {
16 state.add_eof();
17 }
18 state.finish_with_cache(result, cache)
19 }
20}
21
22impl<'config> PhpLexer<'config> {
23 pub fn new(config: &'config PhpLanguage) -> Self {
24 Self { _config: config }
25 }
26
27 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
28 while state.not_at_end() {
29 if self.skip_whitespace(state) {
30 continue;
31 }
32
33 if self.lex_newline(state) {
34 continue;
35 }
36
37 if self.lex_comment(state) {
38 continue;
39 }
40
41 if self.lex_string(state) {
42 continue;
43 }
44
45 if self.lex_number(state) {
46 continue;
47 }
48
49 if self.lex_identifier_or_keyword(state) {
50 continue;
51 }
52
53 if self.lex_operators_and_punctuation(state) {
54 continue;
55 }
56
57 if let Some(ch) = state.peek() {
59 let start_pos = state.get_position();
60 state.advance(ch.len_utf8());
61 state.add_token(PhpSyntaxKind::Error, start_pos, state.get_position());
62 }
63 else {
64 break;
66 }
67 }
68
69 Ok(())
70 }
71
72 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
73 let start_pos = state.get_position();
74
75 while let Some(ch) = state.peek() {
76 if ch == ' ' || ch == '\t' {
77 state.advance(ch.len_utf8());
78 }
79 else {
80 break;
81 }
82 }
83
84 if state.get_position() > start_pos {
85 state.add_token(PhpSyntaxKind::Whitespace, start_pos, state.get_position());
86 true
87 }
88 else {
89 false
90 }
91 }
92
93 fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
94 let start_pos = state.get_position();
95
96 if let Some('\n') = state.peek() {
97 state.advance(1);
98 state.add_token(PhpSyntaxKind::Newline, start_pos, state.get_position());
99 true
100 }
101 else if let Some('\r') = state.peek() {
102 state.advance(1);
103 if let Some('\n') = state.peek() {
104 state.advance(1);
105 }
106 state.add_token(PhpSyntaxKind::Newline, start_pos, state.get_position());
107 true
108 }
109 else {
110 false
111 }
112 }
113
114 fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
115 let start_pos = state.get_position();
116
117 if let Some('/') = state.peek() {
118 state.advance(1);
119 if let Some('/') = state.peek() {
120 state.advance(1);
121 while let Some(ch) = state.peek() {
123 if ch == '\n' || ch == '\r' {
124 break;
125 }
126 state.advance(ch.len_utf8());
127 }
128 state.add_token(PhpSyntaxKind::Comment, start_pos, state.get_position());
129 return true;
130 }
131 else if let Some('*') = state.peek() {
132 state.advance(1);
133 while let Some(ch) = state.peek() {
135 if ch == '*' {
136 state.advance(1);
137 if let Some('/') = state.peek() {
138 state.advance(1);
139 break;
140 }
141 }
142 else {
143 state.advance(ch.len_utf8());
144 }
145 }
146 state.add_token(PhpSyntaxKind::Comment, start_pos, state.get_position());
147 return true;
148 }
149 else {
150 state.set_position(start_pos);
152 return false;
153 }
154 }
155 else if let Some('#') = state.peek() {
156 state.advance(1);
157 while let Some(ch) = state.peek() {
159 if ch == '\n' || ch == '\r' {
160 break;
161 }
162 state.advance(ch.len_utf8());
163 }
164 state.add_token(PhpSyntaxKind::Comment, start_pos, state.get_position());
165 true
166 }
167 else {
168 false
169 }
170 }
171
172 fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
173 let start_pos = state.get_position();
174
175 if let Some(quote_char) = state.peek() {
176 if quote_char == '"' || quote_char == '\'' {
177 state.advance(1); let mut escaped = false;
180 while let Some(ch) = state.peek() {
181 if escaped {
182 escaped = false;
183 state.advance(ch.len_utf8());
184 }
185 else if ch == '\\' {
186 escaped = true;
187 state.advance(1);
188 }
189 else if ch == quote_char {
190 state.advance(1); break;
192 }
193 else if ch == '\n' || ch == '\r' {
194 break;
196 }
197 else {
198 state.advance(ch.len_utf8());
199 }
200 }
201
202 state.add_token(PhpSyntaxKind::StringLiteral, start_pos, state.get_position());
203 true
204 }
205 else {
206 false
207 }
208 }
209 else {
210 false
211 }
212 }
213
214 fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
215 if let Some(ch) = state.peek() {
216 if ch.is_ascii_digit() {
217 let start_pos = state.get_position();
218
219 while let Some(ch) = state.peek() {
221 if ch.is_ascii_digit() {
222 state.advance(1);
223 }
224 else {
225 break;
226 }
227 }
228
229 if let Some('.') = state.peek() {
231 state.advance(1);
232 while let Some(ch) = state.peek() {
234 if ch.is_ascii_digit() {
235 state.advance(1);
236 }
237 else {
238 break;
239 }
240 }
241 }
242
243 if let Some(ch) = state.peek() {
245 if ch == 'e' || ch == 'E' {
246 state.advance(1);
247 if let Some(ch) = state.peek() {
248 if ch == '+' || ch == '-' {
249 state.advance(1);
250 }
251 }
252 while let Some(ch) = state.peek() {
253 if ch.is_ascii_digit() {
254 state.advance(1);
255 }
256 else {
257 break;
258 }
259 }
260 }
261 }
262
263 state.add_token(PhpSyntaxKind::NumberLiteral, start_pos, state.get_position());
264 true
265 }
266 else {
267 false
268 }
269 }
270 else {
271 false
272 }
273 }
274
275 fn lex_identifier_or_keyword<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
276 if let Some(ch) = state.peek() {
277 if ch.is_alphabetic() || ch == '_' || ch == '$' {
278 let start_pos = state.get_position();
279
280 while let Some(ch) = state.peek() {
282 if ch.is_alphanumeric() || ch == '_' || ch == '$' {
283 state.advance(ch.len_utf8());
284 }
285 else {
286 break;
287 }
288 }
289
290 let end_pos = state.get_position();
291 let text = state.source().get_text_in(oak_core::Range { start: start_pos, end: end_pos });
292
293 let kind = match text.as_ref() {
295 "abstract" => PhpSyntaxKind::Abstract,
296 "and" => PhpSyntaxKind::And,
297 "array" => PhpSyntaxKind::Array,
298 "as" => PhpSyntaxKind::As,
299 "break" => PhpSyntaxKind::Break,
300 "callable" => PhpSyntaxKind::Callable,
301 "case" => PhpSyntaxKind::Case,
302 "catch" => PhpSyntaxKind::Catch,
303 "class" => PhpSyntaxKind::Class,
304 "clone" => PhpSyntaxKind::Clone,
305 "const" => PhpSyntaxKind::Const,
306 "continue" => PhpSyntaxKind::Continue,
307 "declare" => PhpSyntaxKind::Declare,
308 "default" => PhpSyntaxKind::Default,
309 "die" => PhpSyntaxKind::Exit,
310 "do" => PhpSyntaxKind::Do,
311 "echo" => PhpSyntaxKind::Echo,
312 "else" => PhpSyntaxKind::Else,
313 "elseif" => PhpSyntaxKind::Elseif,
314 "empty" => PhpSyntaxKind::Empty,
315 "enddeclare" => PhpSyntaxKind::Enddeclare,
316 "endfor" => PhpSyntaxKind::Endfor,
317 "endforeach" => PhpSyntaxKind::Endforeach,
318 "endif" => PhpSyntaxKind::Endif,
319 "endswitch" => PhpSyntaxKind::Endswitch,
320 "endwhile" => PhpSyntaxKind::Endwhile,
321 "eval" => PhpSyntaxKind::Eval,
322 "exit" => PhpSyntaxKind::Exit,
323 "extends" => PhpSyntaxKind::Extends,
324 "final" => PhpSyntaxKind::Final,
325 "finally" => PhpSyntaxKind::Finally,
326 "for" => PhpSyntaxKind::For,
327 "foreach" => PhpSyntaxKind::Foreach,
328 "function" => PhpSyntaxKind::Function,
329 "global" => PhpSyntaxKind::Global,
330 "goto" => PhpSyntaxKind::Goto,
331 "if" => PhpSyntaxKind::If,
332 "implements" => PhpSyntaxKind::Implements,
333 "include" => PhpSyntaxKind::Include,
334 "include_once" => PhpSyntaxKind::IncludeOnce,
335 "instanceof" => PhpSyntaxKind::Instanceof,
336 "insteadof" => PhpSyntaxKind::Insteadof,
337 "interface" => PhpSyntaxKind::Interface,
338 "isset" => PhpSyntaxKind::Isset,
339 "list" => PhpSyntaxKind::List,
340 "namespace" => PhpSyntaxKind::Namespace,
341 "new" => PhpSyntaxKind::New,
342 "or" => PhpSyntaxKind::Or,
343 "print" => PhpSyntaxKind::Print,
344 "private" => PhpSyntaxKind::Private,
345 "protected" => PhpSyntaxKind::Protected,
346 "public" => PhpSyntaxKind::Public,
347 "require" => PhpSyntaxKind::Require,
348 "require_once" => PhpSyntaxKind::RequireOnce,
349 "return" => PhpSyntaxKind::Return,
350 "static" => PhpSyntaxKind::Static,
351 "switch" => PhpSyntaxKind::Switch,
352 "throw" => PhpSyntaxKind::Throw,
353 "trait" => PhpSyntaxKind::Trait,
354 "try" => PhpSyntaxKind::Try,
355 "unset" => PhpSyntaxKind::Unset,
356 "use" => PhpSyntaxKind::Use,
357 "var" => PhpSyntaxKind::Var,
358 "while" => PhpSyntaxKind::While,
359 "xor" => PhpSyntaxKind::Xor,
360 "yield" => PhpSyntaxKind::Yield,
361 "true" => PhpSyntaxKind::BooleanLiteral,
362 "false" => PhpSyntaxKind::BooleanLiteral,
363 "null" => PhpSyntaxKind::NullLiteral,
364 _ => PhpSyntaxKind::Identifier,
365 };
366
367 state.add_token(kind, start_pos, state.get_position());
368 true
369 }
370 else {
371 false
372 }
373 }
374 else {
375 false
376 }
377 }
378
379 fn lex_operators_and_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
380 if let Some(ch) = state.peek() {
381 let start_pos = state.get_position();
382
383 let kind = match ch {
384 '+' => {
385 state.advance(1);
386 if let Some('+') = state.peek() {
387 state.advance(1);
388 PhpSyntaxKind::Increment
389 }
390 else if let Some('=') = state.peek() {
391 state.advance(1);
392 PhpSyntaxKind::PlusAssign
393 }
394 else {
395 PhpSyntaxKind::Plus
396 }
397 }
398 '-' => {
399 state.advance(1);
400 if let Some('-') = state.peek() {
401 state.advance(1);
402 PhpSyntaxKind::Decrement
403 }
404 else if let Some('=') = state.peek() {
405 state.advance(1);
406 PhpSyntaxKind::MinusAssign
407 }
408 else if let Some('>') = state.peek() {
409 state.advance(1);
410 PhpSyntaxKind::Arrow
411 }
412 else {
413 PhpSyntaxKind::Minus
414 }
415 }
416 '*' => {
417 state.advance(1);
418 if let Some('*') = state.peek() {
419 state.advance(1);
420 PhpSyntaxKind::Power
421 }
422 else if let Some('=') = state.peek() {
423 state.advance(1);
424 PhpSyntaxKind::MultiplyAssign
425 }
426 else {
427 PhpSyntaxKind::Multiply
428 }
429 }
430 '/' => {
431 state.advance(1);
432 if let Some('=') = state.peek() {
433 state.advance(1);
434 PhpSyntaxKind::DivideAssign
435 }
436 else {
437 PhpSyntaxKind::Divide
438 }
439 }
440 '%' => {
441 state.advance(1);
442 if let Some('=') = state.peek() {
443 state.advance(1);
444 PhpSyntaxKind::ModuloAssign
445 }
446 else {
447 PhpSyntaxKind::Modulo
448 }
449 }
450 '=' => {
451 state.advance(1);
452 if let Some('=') = state.peek() {
453 state.advance(1);
454 if let Some('=') = state.peek() {
455 state.advance(1);
456 PhpSyntaxKind::Identical
457 }
458 else {
459 PhpSyntaxKind::Equal
460 }
461 }
462 else if let Some('>') = state.peek() {
463 state.advance(1);
464 PhpSyntaxKind::DoubleArrow
465 }
466 else {
467 PhpSyntaxKind::Assign
468 }
469 }
470 '!' => {
471 state.advance(1);
472 if let Some('=') = state.peek() {
473 state.advance(1);
474 if let Some('=') = state.peek() {
475 state.advance(1);
476 PhpSyntaxKind::NotIdentical
477 }
478 else {
479 PhpSyntaxKind::NotEqual
480 }
481 }
482 else {
483 PhpSyntaxKind::LogicalNot
484 }
485 }
486 '<' => {
487 state.advance(1);
488 if let Some('=') = state.peek() {
489 state.advance(1);
490 PhpSyntaxKind::LessEqual
491 }
492 else if let Some('<') = state.peek() {
493 state.advance(1);
494 if let Some('=') = state.peek() {
495 state.advance(1);
496 PhpSyntaxKind::LeftShiftAssign
497 }
498 else {
499 PhpSyntaxKind::LeftShift
500 }
501 }
502 else if let Some('>') = state.peek() {
503 state.advance(1);
504 PhpSyntaxKind::Spaceship
505 }
506 else {
507 PhpSyntaxKind::Less
508 }
509 }
510 '>' => {
511 state.advance(1);
512 if let Some('=') = state.peek() {
513 state.advance(1);
514 PhpSyntaxKind::GreaterEqual
515 }
516 else if let Some('>') = state.peek() {
517 state.advance(1);
518 if let Some('=') = state.peek() {
519 state.advance(1);
520 PhpSyntaxKind::RightShiftAssign
521 }
522 else {
523 PhpSyntaxKind::RightShift
524 }
525 }
526 else {
527 PhpSyntaxKind::Greater
528 }
529 }
530 '&' => {
531 state.advance(1);
532 if let Some('&') = state.peek() {
533 state.advance(1);
534 PhpSyntaxKind::LogicalAnd
535 }
536 else if let Some('=') = state.peek() {
537 state.advance(1);
538 PhpSyntaxKind::BitwiseAndAssign
539 }
540 else {
541 PhpSyntaxKind::BitwiseAnd
542 }
543 }
544 '|' => {
545 state.advance(1);
546 if let Some('|') = state.peek() {
547 state.advance(1);
548 PhpSyntaxKind::LogicalOr
549 }
550 else if let Some('=') = state.peek() {
551 state.advance(1);
552 PhpSyntaxKind::BitwiseOrAssign
553 }
554 else {
555 PhpSyntaxKind::BitwiseOr
556 }
557 }
558 '^' => {
559 state.advance(1);
560 if let Some('=') = state.peek() {
561 state.advance(1);
562 PhpSyntaxKind::BitwiseXorAssign
563 }
564 else {
565 PhpSyntaxKind::BitwiseXor
566 }
567 }
568 '~' => {
569 state.advance(1);
570 PhpSyntaxKind::BitwiseNot
571 }
572 '?' => {
573 state.advance(1);
574 if let Some('?') = state.peek() {
575 state.advance(1);
576 PhpSyntaxKind::NullCoalesce
577 }
578 else {
579 PhpSyntaxKind::Question
580 }
581 }
582 ':' => {
583 state.advance(1);
584 if let Some(':') = state.peek() {
585 state.advance(1);
586 PhpSyntaxKind::DoubleColon
587 }
588 else {
589 PhpSyntaxKind::Colon
590 }
591 }
592 ';' => {
593 state.advance(1);
594 PhpSyntaxKind::Semicolon
595 }
596 ',' => {
597 state.advance(1);
598 PhpSyntaxKind::Comma
599 }
600 '.' => {
601 state.advance(1);
602 if let Some('=') = state.peek() {
603 state.advance(1);
604 PhpSyntaxKind::ConcatAssign
605 }
606 else {
607 PhpSyntaxKind::Dot
608 }
609 }
610 '(' => {
611 state.advance(1);
612 PhpSyntaxKind::LeftParen
613 }
614 ')' => {
615 state.advance(1);
616 PhpSyntaxKind::RightParen
617 }
618 '[' => {
619 state.advance(1);
620 PhpSyntaxKind::LeftBracket
621 }
622 ']' => {
623 state.advance(1);
624 PhpSyntaxKind::RightBracket
625 }
626 '{' => {
627 state.advance(1);
628 PhpSyntaxKind::LeftBrace
629 }
630 '}' => {
631 state.advance(1);
632 PhpSyntaxKind::RightBrace
633 }
634 '$' => {
635 state.advance(1);
636 PhpSyntaxKind::Dollar
637 }
638 '@' => {
639 state.advance(1);
640 PhpSyntaxKind::At
641 }
642 _ => return false,
643 };
644
645 state.add_token(kind, start_pos, state.get_position());
646 true
647 }
648 else {
649 false
650 }
651 }
652}