1use crate::{kind::PhpSyntaxKind, language::PhpLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
3
4type State<'s, S> = LexerState<'s, S, PhpLanguage>;
5
6#[derive(Clone)]
7pub struct PhpLexer<'config> {
8 _config: &'config PhpLanguage,
9}
10
11impl<'config> PhpLexer<'config> {
12 pub fn new(config: &'config PhpLanguage) -> Self {
13 Self { _config: config }
14 }
15
16 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
17 while state.not_at_end() {
18 if self.skip_whitespace(state) {
19 continue;
20 }
21
22 if self.lex_newline(state) {
23 continue;
24 }
25
26 if self.lex_comment(state) {
27 continue;
28 }
29
30 if self.lex_string(state) {
31 continue;
32 }
33
34 if self.lex_number(state) {
35 continue;
36 }
37
38 if self.lex_identifier_or_keyword(state) {
39 continue;
40 }
41
42 if self.lex_operators_and_punctuation(state) {
43 continue;
44 }
45
46 if let Some(ch) = state.peek() {
48 let start_pos = state.get_position();
49 state.advance(ch.len_utf8());
50 state.add_token(PhpSyntaxKind::Error, start_pos, state.get_position());
51 }
52 else {
53 break;
55 }
56 }
57
58 state.add_eof();
60
61 Ok(())
62 }
63
64 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
65 let start_pos = state.get_position();
66
67 while let Some(ch) = state.peek() {
68 if ch == ' ' || ch == '\t' {
69 state.advance(ch.len_utf8());
70 }
71 else {
72 break;
73 }
74 }
75
76 if state.get_position() > start_pos {
77 state.add_token(PhpSyntaxKind::Whitespace, start_pos, state.get_position());
78 true
79 }
80 else {
81 false
82 }
83 }
84
85 fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
86 let start_pos = state.get_position();
87
88 if let Some('\n') = state.peek() {
89 state.advance(1);
90 state.add_token(PhpSyntaxKind::Newline, start_pos, state.get_position());
91 true
92 }
93 else if let Some('\r') = state.peek() {
94 state.advance(1);
95 if let Some('\n') = state.peek() {
96 state.advance(1);
97 }
98 state.add_token(PhpSyntaxKind::Newline, start_pos, state.get_position());
99 true
100 }
101 else {
102 false
103 }
104 }
105
106 fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
107 let start_pos = state.get_position();
108
109 if let Some('/') = state.peek() {
110 state.advance(1);
111 if let Some('/') = state.peek() {
112 state.advance(1);
113 while let Some(ch) = state.peek() {
115 if ch == '\n' || ch == '\r' {
116 break;
117 }
118 state.advance(ch.len_utf8());
119 }
120 state.add_token(PhpSyntaxKind::Comment, start_pos, state.get_position());
121 return true;
122 }
123 else if let Some('*') = state.peek() {
124 state.advance(1);
125 while let Some(ch) = state.peek() {
127 if ch == '*' {
128 state.advance(1);
129 if let Some('/') = state.peek() {
130 state.advance(1);
131 break;
132 }
133 }
134 else {
135 state.advance(ch.len_utf8());
136 }
137 }
138 state.add_token(PhpSyntaxKind::Comment, start_pos, state.get_position());
139 return true;
140 }
141 else {
142 state.set_position(start_pos);
144 return false;
145 }
146 }
147 else if let Some('#') = state.peek() {
148 state.advance(1);
149 while let Some(ch) = state.peek() {
151 if ch == '\n' || ch == '\r' {
152 break;
153 }
154 state.advance(ch.len_utf8());
155 }
156 state.add_token(PhpSyntaxKind::Comment, start_pos, state.get_position());
157 true
158 }
159 else {
160 false
161 }
162 }
163
164 fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
165 let start_pos = state.get_position();
166
167 if let Some(quote_char) = state.peek() {
168 if quote_char == '"' || quote_char == '\'' {
169 state.advance(1); let mut escaped = false;
172 while let Some(ch) = state.peek() {
173 if escaped {
174 escaped = false;
175 state.advance(ch.len_utf8());
176 }
177 else if ch == '\\' {
178 escaped = true;
179 state.advance(1);
180 }
181 else if ch == quote_char {
182 state.advance(1); break;
184 }
185 else if ch == '\n' || ch == '\r' {
186 break;
188 }
189 else {
190 state.advance(ch.len_utf8());
191 }
192 }
193
194 state.add_token(PhpSyntaxKind::StringLiteral, start_pos, state.get_position());
195 true
196 }
197 else {
198 false
199 }
200 }
201 else {
202 false
203 }
204 }
205
206 fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
207 if let Some(ch) = state.peek() {
208 if ch.is_ascii_digit() {
209 let start_pos = state.get_position();
210
211 while let Some(ch) = state.peek() {
213 if ch.is_ascii_digit() {
214 state.advance(1);
215 }
216 else {
217 break;
218 }
219 }
220
221 if let Some('.') = state.peek() {
223 state.advance(1);
224 while let Some(ch) = state.peek() {
226 if ch.is_ascii_digit() {
227 state.advance(1);
228 }
229 else {
230 break;
231 }
232 }
233 }
234
235 if let Some(ch) = state.peek() {
237 if ch == 'e' || ch == 'E' {
238 state.advance(1);
239 if let Some(ch) = state.peek() {
240 if ch == '+' || ch == '-' {
241 state.advance(1);
242 }
243 }
244 while let Some(ch) = state.peek() {
245 if ch.is_ascii_digit() {
246 state.advance(1);
247 }
248 else {
249 break;
250 }
251 }
252 }
253 }
254
255 state.add_token(PhpSyntaxKind::NumberLiteral, start_pos, state.get_position());
256 true
257 }
258 else {
259 false
260 }
261 }
262 else {
263 false
264 }
265 }
266
267 fn lex_identifier_or_keyword<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
268 if let Some(ch) = state.peek() {
269 if ch.is_alphabetic() || ch == '_' || ch == '$' {
270 let start_pos = state.get_position();
271
272 while let Some(ch) = state.peek() {
274 if ch.is_alphanumeric() || ch == '_' || ch == '$' {
275 state.advance(ch.len_utf8());
276 }
277 else {
278 break;
279 }
280 }
281
282 let end_pos = state.get_position();
283 let text = state.source().get_text_in(oak_core::Range { start: start_pos, end: end_pos });
284
285 let kind = match text.as_ref() {
287 "abstract" => PhpSyntaxKind::Abstract,
288 "and" => PhpSyntaxKind::And,
289 "array" => PhpSyntaxKind::Array,
290 "as" => PhpSyntaxKind::As,
291 "break" => PhpSyntaxKind::Break,
292 "callable" => PhpSyntaxKind::Callable,
293 "case" => PhpSyntaxKind::Case,
294 "catch" => PhpSyntaxKind::Catch,
295 "class" => PhpSyntaxKind::Class,
296 "clone" => PhpSyntaxKind::Clone,
297 "const" => PhpSyntaxKind::Const,
298 "continue" => PhpSyntaxKind::Continue,
299 "declare" => PhpSyntaxKind::Declare,
300 "default" => PhpSyntaxKind::Default,
301 "die" => PhpSyntaxKind::Exit,
302 "do" => PhpSyntaxKind::Do,
303 "echo" => PhpSyntaxKind::Echo,
304 "else" => PhpSyntaxKind::Else,
305 "elseif" => PhpSyntaxKind::Elseif,
306 "empty" => PhpSyntaxKind::Empty,
307 "enddeclare" => PhpSyntaxKind::Enddeclare,
308 "endfor" => PhpSyntaxKind::Endfor,
309 "endforeach" => PhpSyntaxKind::Endforeach,
310 "endif" => PhpSyntaxKind::Endif,
311 "endswitch" => PhpSyntaxKind::Endswitch,
312 "endwhile" => PhpSyntaxKind::Endwhile,
313 "eval" => PhpSyntaxKind::Eval,
314 "exit" => PhpSyntaxKind::Exit,
315 "extends" => PhpSyntaxKind::Extends,
316 "final" => PhpSyntaxKind::Final,
317 "finally" => PhpSyntaxKind::Finally,
318 "for" => PhpSyntaxKind::For,
319 "foreach" => PhpSyntaxKind::Foreach,
320 "function" => PhpSyntaxKind::Function,
321 "global" => PhpSyntaxKind::Global,
322 "goto" => PhpSyntaxKind::Goto,
323 "if" => PhpSyntaxKind::If,
324 "implements" => PhpSyntaxKind::Implements,
325 "include" => PhpSyntaxKind::Include,
326 "include_once" => PhpSyntaxKind::IncludeOnce,
327 "instanceof" => PhpSyntaxKind::Instanceof,
328 "insteadof" => PhpSyntaxKind::Insteadof,
329 "interface" => PhpSyntaxKind::Interface,
330 "isset" => PhpSyntaxKind::Isset,
331 "list" => PhpSyntaxKind::List,
332 "namespace" => PhpSyntaxKind::Namespace,
333 "new" => PhpSyntaxKind::New,
334 "or" => PhpSyntaxKind::Or,
335 "print" => PhpSyntaxKind::Print,
336 "private" => PhpSyntaxKind::Private,
337 "protected" => PhpSyntaxKind::Protected,
338 "public" => PhpSyntaxKind::Public,
339 "require" => PhpSyntaxKind::Require,
340 "require_once" => PhpSyntaxKind::RequireOnce,
341 "return" => PhpSyntaxKind::Return,
342 "static" => PhpSyntaxKind::Static,
343 "switch" => PhpSyntaxKind::Switch,
344 "throw" => PhpSyntaxKind::Throw,
345 "trait" => PhpSyntaxKind::Trait,
346 "try" => PhpSyntaxKind::Try,
347 "unset" => PhpSyntaxKind::Unset,
348 "use" => PhpSyntaxKind::Use,
349 "var" => PhpSyntaxKind::Var,
350 "while" => PhpSyntaxKind::While,
351 "xor" => PhpSyntaxKind::Xor,
352 "yield" => PhpSyntaxKind::Yield,
353 "true" => PhpSyntaxKind::BooleanLiteral,
354 "false" => PhpSyntaxKind::BooleanLiteral,
355 "null" => PhpSyntaxKind::NullLiteral,
356 _ => PhpSyntaxKind::Identifier,
357 };
358
359 state.add_token(kind, start_pos, state.get_position());
360 true
361 }
362 else {
363 false
364 }
365 }
366 else {
367 false
368 }
369 }
370
371 fn lex_operators_and_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
372 if let Some(ch) = state.peek() {
373 let start_pos = state.get_position();
374
375 let kind = match ch {
376 '+' => {
377 state.advance(1);
378 if let Some('+') = state.peek() {
379 state.advance(1);
380 PhpSyntaxKind::Increment
381 }
382 else if let Some('=') = state.peek() {
383 state.advance(1);
384 PhpSyntaxKind::PlusAssign
385 }
386 else {
387 PhpSyntaxKind::Plus
388 }
389 }
390 '-' => {
391 state.advance(1);
392 if let Some('-') = state.peek() {
393 state.advance(1);
394 PhpSyntaxKind::Decrement
395 }
396 else if let Some('=') = state.peek() {
397 state.advance(1);
398 PhpSyntaxKind::MinusAssign
399 }
400 else if let Some('>') = state.peek() {
401 state.advance(1);
402 PhpSyntaxKind::Arrow
403 }
404 else {
405 PhpSyntaxKind::Minus
406 }
407 }
408 '*' => {
409 state.advance(1);
410 if let Some('*') = state.peek() {
411 state.advance(1);
412 PhpSyntaxKind::Power
413 }
414 else if let Some('=') = state.peek() {
415 state.advance(1);
416 PhpSyntaxKind::MultiplyAssign
417 }
418 else {
419 PhpSyntaxKind::Multiply
420 }
421 }
422 '/' => {
423 state.advance(1);
424 if let Some('=') = state.peek() {
425 state.advance(1);
426 PhpSyntaxKind::DivideAssign
427 }
428 else {
429 PhpSyntaxKind::Divide
430 }
431 }
432 '%' => {
433 state.advance(1);
434 if let Some('=') = state.peek() {
435 state.advance(1);
436 PhpSyntaxKind::ModuloAssign
437 }
438 else {
439 PhpSyntaxKind::Modulo
440 }
441 }
442 '=' => {
443 state.advance(1);
444 if let Some('=') = state.peek() {
445 state.advance(1);
446 if let Some('=') = state.peek() {
447 state.advance(1);
448 PhpSyntaxKind::Identical
449 }
450 else {
451 PhpSyntaxKind::Equal
452 }
453 }
454 else if let Some('>') = state.peek() {
455 state.advance(1);
456 PhpSyntaxKind::DoubleArrow
457 }
458 else {
459 PhpSyntaxKind::Assign
460 }
461 }
462 '!' => {
463 state.advance(1);
464 if let Some('=') = state.peek() {
465 state.advance(1);
466 if let Some('=') = state.peek() {
467 state.advance(1);
468 PhpSyntaxKind::NotIdentical
469 }
470 else {
471 PhpSyntaxKind::NotEqual
472 }
473 }
474 else {
475 PhpSyntaxKind::LogicalNot
476 }
477 }
478 '<' => {
479 state.advance(1);
480 if let Some('=') = state.peek() {
481 state.advance(1);
482 PhpSyntaxKind::LessEqual
483 }
484 else if let Some('<') = state.peek() {
485 state.advance(1);
486 if let Some('=') = state.peek() {
487 state.advance(1);
488 PhpSyntaxKind::LeftShiftAssign
489 }
490 else {
491 PhpSyntaxKind::LeftShift
492 }
493 }
494 else if let Some('>') = state.peek() {
495 state.advance(1);
496 PhpSyntaxKind::Spaceship
497 }
498 else {
499 PhpSyntaxKind::Less
500 }
501 }
502 '>' => {
503 state.advance(1);
504 if let Some('=') = state.peek() {
505 state.advance(1);
506 PhpSyntaxKind::GreaterEqual
507 }
508 else if let Some('>') = state.peek() {
509 state.advance(1);
510 if let Some('=') = state.peek() {
511 state.advance(1);
512 PhpSyntaxKind::RightShiftAssign
513 }
514 else {
515 PhpSyntaxKind::RightShift
516 }
517 }
518 else {
519 PhpSyntaxKind::Greater
520 }
521 }
522 '&' => {
523 state.advance(1);
524 if let Some('&') = state.peek() {
525 state.advance(1);
526 PhpSyntaxKind::LogicalAnd
527 }
528 else if let Some('=') = state.peek() {
529 state.advance(1);
530 PhpSyntaxKind::BitwiseAndAssign
531 }
532 else {
533 PhpSyntaxKind::BitwiseAnd
534 }
535 }
536 '|' => {
537 state.advance(1);
538 if let Some('|') = state.peek() {
539 state.advance(1);
540 PhpSyntaxKind::LogicalOr
541 }
542 else if let Some('=') = state.peek() {
543 state.advance(1);
544 PhpSyntaxKind::BitwiseOrAssign
545 }
546 else {
547 PhpSyntaxKind::BitwiseOr
548 }
549 }
550 '^' => {
551 state.advance(1);
552 if let Some('=') = state.peek() {
553 state.advance(1);
554 PhpSyntaxKind::BitwiseXorAssign
555 }
556 else {
557 PhpSyntaxKind::BitwiseXor
558 }
559 }
560 '~' => {
561 state.advance(1);
562 PhpSyntaxKind::BitwiseNot
563 }
564 '?' => {
565 state.advance(1);
566 if let Some('?') = state.peek() {
567 state.advance(1);
568 PhpSyntaxKind::NullCoalesce
569 }
570 else {
571 PhpSyntaxKind::Question
572 }
573 }
574 ':' => {
575 state.advance(1);
576 if let Some(':') = state.peek() {
577 state.advance(1);
578 PhpSyntaxKind::DoubleColon
579 }
580 else {
581 PhpSyntaxKind::Colon
582 }
583 }
584 ';' => {
585 state.advance(1);
586 PhpSyntaxKind::Semicolon
587 }
588 ',' => {
589 state.advance(1);
590 PhpSyntaxKind::Comma
591 }
592 '.' => {
593 state.advance(1);
594 if let Some('=') = state.peek() {
595 state.advance(1);
596 PhpSyntaxKind::ConcatAssign
597 }
598 else {
599 PhpSyntaxKind::Dot
600 }
601 }
602 '(' => {
603 state.advance(1);
604 PhpSyntaxKind::LeftParen
605 }
606 ')' => {
607 state.advance(1);
608 PhpSyntaxKind::RightParen
609 }
610 '[' => {
611 state.advance(1);
612 PhpSyntaxKind::LeftBracket
613 }
614 ']' => {
615 state.advance(1);
616 PhpSyntaxKind::RightBracket
617 }
618 '{' => {
619 state.advance(1);
620 PhpSyntaxKind::LeftBrace
621 }
622 '}' => {
623 state.advance(1);
624 PhpSyntaxKind::RightBrace
625 }
626 '$' => {
627 state.advance(1);
628 PhpSyntaxKind::Dollar
629 }
630 '@' => {
631 state.advance(1);
632 PhpSyntaxKind::At
633 }
634 _ => return false,
635 };
636
637 state.add_token(kind, start_pos, state.get_position());
638 true
639 }
640 else {
641 false
642 }
643 }
644}
645
646impl<'config> Lexer<PhpLanguage> for PhpLexer<'config> {
647 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<PhpLanguage>) -> LexOutput<PhpLanguage> {
648 let mut state: State<'_, S> = LexerState::new(source);
649 let result = self.run(&mut state);
650 state.finish_with_cache(result, cache)
651 }
652}