1#![doc = include_str!("readme.md")]
2pub mod token_type;
3use crate::language::PhpLanguage;
4use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
5pub use token_type::{PhpToken, PhpTokenType};
6
7type State<'s, S> = LexerState<'s, S, PhpLanguage>;
8
9#[derive(Clone, Debug)]
13pub struct PhpLexer<'config> {
14 _config: &'config PhpLanguage,
15}
16
17impl<'config> Lexer<PhpLanguage> for PhpLexer<'config> {
18 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<PhpLanguage>) -> LexOutput<PhpLanguage> {
19 let mut state = State::new_with_cache(source, 0, cache);
20 let result = self.run(&mut state);
21 if result.is_ok() {
22 state.add_eof();
23 }
24 state.finish_with_cache(result, cache)
25 }
26}
27
28impl<'config> PhpLexer<'config> {
29 pub fn new(config: &'config PhpLanguage) -> Self {
31 Self { _config: config }
32 }
33
34 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
35 while state.not_at_end() {
36 if self.skip_whitespace(state) {
37 continue;
38 }
39
40 if self.lex_newline(state) {
41 continue;
42 }
43
44 if self.lex_comment(state) {
45 continue;
46 }
47
48 if self.lex_string(state) {
49 continue;
50 }
51
52 if self.lex_number(state) {
53 continue;
54 }
55
56 if self.lex_identifier_or_keyword(state) {
57 continue;
58 }
59
60 if self.lex_operators_and_punctuation(state) {
61 continue;
62 }
63
64 if let Some(ch) = state.peek() {
66 let start_pos = state.get_position();
67 state.advance(ch.len_utf8());
68 state.add_token(PhpTokenType::Error, start_pos, state.get_position())
69 }
70 else {
71 break;
73 }
74 }
75
76 Ok(())
77 }
78
79 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
80 let start_pos = state.get_position();
81
82 while let Some(ch) = state.peek() {
83 if ch == ' ' || ch == '\t' {
84 state.advance(ch.len_utf8())
85 }
86 else {
87 break;
88 }
89 }
90
91 if state.get_position() > start_pos {
92 state.add_token(PhpTokenType::Whitespace, start_pos, state.get_position());
93 true
94 }
95 else {
96 false
97 }
98 }
99
100 fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
101 let start_pos = state.get_position();
102
103 if let Some('\n') = state.peek() {
104 state.advance(1);
105 state.add_token(PhpTokenType::Newline, start_pos, state.get_position());
106 true
107 }
108 else if let Some('\r') = state.peek() {
109 state.advance(1);
110 if let Some('\n') = state.peek() {
111 state.advance(1)
112 }
113 state.add_token(PhpTokenType::Newline, start_pos, state.get_position());
114 true
115 }
116 else {
117 false
118 }
119 }
120
121 fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
122 let start_pos = state.get_position();
123
124 if let Some('/') = state.peek() {
125 state.advance(1);
126 if let Some('/') = state.peek() {
127 state.advance(1);
128 while let Some(ch) = state.peek() {
130 if ch == '\n' || ch == '\r' {
131 break;
132 }
133 state.advance(ch.len_utf8())
134 }
135 state.add_token(PhpTokenType::Comment, start_pos, state.get_position());
136 return true;
137 }
138 else if let Some('*') = state.peek() {
139 state.advance(1);
140 while let Some(ch) = state.peek() {
142 if ch == '*' {
143 state.advance(1);
144 if let Some('/') = state.peek() {
145 state.advance(1);
146 break;
147 }
148 }
149 else {
150 state.advance(ch.len_utf8())
151 }
152 }
153 state.add_token(PhpTokenType::Comment, start_pos, state.get_position());
154 return true;
155 }
156 else {
157 state.set_position(start_pos);
159 return false;
160 }
161 }
162 else if let Some('#') = state.peek() {
163 state.advance(1);
164 while let Some(ch) = state.peek() {
166 if ch == '\n' || ch == '\r' {
167 break;
168 }
169 state.advance(ch.len_utf8())
170 }
171 state.add_token(PhpTokenType::Comment, start_pos, state.get_position());
172 true
173 }
174 else {
175 false
176 }
177 }
178
179 fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
180 let start_pos = state.get_position();
181
182 if let Some(quote_char) = state.peek() {
183 if quote_char == '"' || quote_char == '\'' {
184 state.advance(1); let mut escaped = false;
187 while let Some(ch) = state.peek() {
188 if escaped {
189 escaped = false;
190 state.advance(ch.len_utf8())
191 }
192 else if ch == '\\' {
193 escaped = true;
194 state.advance(1)
195 }
196 else if ch == quote_char {
197 state.advance(1); break;
199 }
200 else if ch == '\n' || ch == '\r' {
201 break;
203 }
204 else {
205 state.advance(ch.len_utf8())
206 }
207 }
208
209 state.add_token(PhpTokenType::StringLiteral, start_pos, state.get_position());
210 true
211 }
212 else {
213 false
214 }
215 }
216 else {
217 false
218 }
219 }
220
221 fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
222 if let Some(ch) = state.peek() {
223 if ch.is_ascii_digit() {
224 let start_pos = state.get_position();
225
226 while let Some(ch) = state.peek() {
228 if ch.is_ascii_digit() {
229 state.advance(1)
230 }
231 else {
232 break;
233 }
234 }
235
236 if let Some('.') = state.peek() {
238 state.advance(1);
239 while let Some(ch) = state.peek() {
241 if ch.is_ascii_digit() {
242 state.advance(1)
243 }
244 else {
245 break;
246 }
247 }
248 }
249
250 if let Some(ch) = state.peek() {
252 if ch == 'e' || ch == 'E' {
253 state.advance(1);
254 if let Some(ch) = state.peek() {
255 if ch == '+' || ch == '-' {
256 state.advance(1)
257 }
258 }
259 while let Some(ch) = state.peek() {
260 if ch.is_ascii_digit() {
261 state.advance(1)
262 }
263 else {
264 break;
265 }
266 }
267 }
268 }
269
270 state.add_token(PhpTokenType::NumberLiteral, start_pos, state.get_position());
271 true
272 }
273 else {
274 false
275 }
276 }
277 else {
278 false
279 }
280 }
281
282 fn lex_identifier_or_keyword<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
283 if let Some(ch) = state.peek() {
284 if ch.is_alphabetic() || ch == '_' || ch == '$' {
285 let start_pos = state.get_position();
286
287 while let Some(ch) = state.peek() {
289 if ch.is_alphanumeric() || ch == '_' || ch == '$' {
290 state.advance(ch.len_utf8())
291 }
292 else {
293 break;
294 }
295 }
296
297 let end_pos = state.get_position();
298 let text = state.source().get_text_in(oak_core::Range { start: start_pos, end: end_pos });
299
300 let kind = match text.as_ref() {
302 "abstract" => PhpTokenType::Abstract,
303 "and" => PhpTokenType::And,
304 "array" => PhpTokenType::Array,
305 "as" => PhpTokenType::As,
306 "break" => PhpTokenType::Break,
307 "callable" => PhpTokenType::Callable,
308 "case" => PhpTokenType::Case,
309 "catch" => PhpTokenType::Catch,
310 "class" => PhpTokenType::Class,
311 "clone" => PhpTokenType::Clone,
312 "const" => PhpTokenType::Const,
313 "continue" => PhpTokenType::Continue,
314 "declare" => PhpTokenType::Declare,
315 "default" => PhpTokenType::Default,
316 "die" => PhpTokenType::Exit,
317 "do" => PhpTokenType::Do,
318 "echo" => PhpTokenType::Echo,
319 "else" => PhpTokenType::Else,
320 "elseif" => PhpTokenType::Elseif,
321 "empty" => PhpTokenType::Empty,
322 "enddeclare" => PhpTokenType::Enddeclare,
323 "endfor" => PhpTokenType::Endfor,
324 "endforeach" => PhpTokenType::Endforeach,
325 "endif" => PhpTokenType::Endif,
326 "endswitch" => PhpTokenType::Endswitch,
327 "endwhile" => PhpTokenType::Endwhile,
328 "eval" => PhpTokenType::Eval,
329 "exit" => PhpTokenType::Exit,
330 "extends" => PhpTokenType::Extends,
331 "final" => PhpTokenType::Final,
332 "finally" => PhpTokenType::Finally,
333 "for" => PhpTokenType::For,
334 "foreach" => PhpTokenType::Foreach,
335 "function" => PhpTokenType::Function,
336 "global" => PhpTokenType::Global,
337 "goto" => PhpTokenType::Goto,
338 "if" => PhpTokenType::If,
339 "implements" => PhpTokenType::Implements,
340 "include" => PhpTokenType::Include,
341 "include_once" => PhpTokenType::IncludeOnce,
342 "instanceof" => PhpTokenType::Instanceof,
343 "insteadof" => PhpTokenType::Insteadof,
344 "interface" => PhpTokenType::Interface,
345 "isset" => PhpTokenType::Isset,
346 "list" => PhpTokenType::List,
347 "namespace" => PhpTokenType::Namespace,
348 "new" => PhpTokenType::New,
349 "or" => PhpTokenType::Or,
350 "print" => PhpTokenType::Print,
351 "private" => PhpTokenType::Private,
352 "protected" => PhpTokenType::Protected,
353 "public" => PhpTokenType::Public,
354 "require" => PhpTokenType::Require,
355 "require_once" => PhpTokenType::RequireOnce,
356 "return" => PhpTokenType::Return,
357 "static" => PhpTokenType::Static,
358 "switch" => PhpTokenType::Switch,
359 "throw" => PhpTokenType::Throw,
360 "trait" => PhpTokenType::Trait,
361 "try" => PhpTokenType::Try,
362 "unset" => PhpTokenType::Unset,
363 "use" => PhpTokenType::Use,
364 "var" => PhpTokenType::Var,
365 "while" => PhpTokenType::While,
366 "xor" => PhpTokenType::Xor,
367 "yield" => PhpTokenType::Yield,
368 "true" => PhpTokenType::BooleanLiteral,
369 "false" => PhpTokenType::BooleanLiteral,
370 "null" => PhpTokenType::NullLiteral,
371 _ => PhpTokenType::Identifier,
372 };
373
374 state.add_token(kind, start_pos, state.get_position());
375 true
376 }
377 else {
378 false
379 }
380 }
381 else {
382 false
383 }
384 }
385
386 fn lex_operators_and_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
387 if let Some(ch) = state.peek() {
388 let start_pos = state.get_position();
389
390 let kind = match ch {
391 '+' => {
392 state.advance(1);
393 if let Some('+') = state.peek() {
394 state.advance(1);
395 PhpTokenType::Increment
396 }
397 else if let Some('=') = state.peek() {
398 state.advance(1);
399 PhpTokenType::PlusAssign
400 }
401 else {
402 PhpTokenType::Plus
403 }
404 }
405 '-' => {
406 state.advance(1);
407 if let Some('-') = state.peek() {
408 state.advance(1);
409 PhpTokenType::Decrement
410 }
411 else if let Some('=') = state.peek() {
412 state.advance(1);
413 PhpTokenType::MinusAssign
414 }
415 else if let Some('>') = state.peek() {
416 state.advance(1);
417 PhpTokenType::Arrow
418 }
419 else {
420 PhpTokenType::Minus
421 }
422 }
423 '*' => {
424 state.advance(1);
425 if let Some('*') = state.peek() {
426 state.advance(1);
427 PhpTokenType::Power
428 }
429 else if let Some('=') = state.peek() {
430 state.advance(1);
431 PhpTokenType::MultiplyAssign
432 }
433 else {
434 PhpTokenType::Multiply
435 }
436 }
437 '/' => {
438 state.advance(1);
439 if let Some('=') = state.peek() {
440 state.advance(1);
441 PhpTokenType::DivideAssign
442 }
443 else {
444 PhpTokenType::Divide
445 }
446 }
447 '%' => {
448 state.advance(1);
449 if let Some('=') = state.peek() {
450 state.advance(1);
451 PhpTokenType::ModuloAssign
452 }
453 else {
454 PhpTokenType::Modulo
455 }
456 }
457 '=' => {
458 state.advance(1);
459 if let Some('=') = state.peek() {
460 state.advance(1);
461 if let Some('=') = state.peek() {
462 state.advance(1);
463 PhpTokenType::Identical
464 }
465 else {
466 PhpTokenType::Equal
467 }
468 }
469 else if let Some('>') = state.peek() {
470 state.advance(1);
471 PhpTokenType::DoubleArrow
472 }
473 else {
474 PhpTokenType::Assign
475 }
476 }
477 '!' => {
478 state.advance(1);
479 if let Some('=') = state.peek() {
480 state.advance(1);
481 if let Some('=') = state.peek() {
482 state.advance(1);
483 PhpTokenType::NotIdentical
484 }
485 else {
486 PhpTokenType::NotEqual
487 }
488 }
489 else {
490 PhpTokenType::LogicalNot
491 }
492 }
493 '<' => {
494 state.advance(1);
495 if let Some('=') = state.peek() {
496 state.advance(1);
497 PhpTokenType::LessEqual
498 }
499 else if let Some('<') = state.peek() {
500 state.advance(1);
501 if let Some('=') = state.peek() {
502 state.advance(1);
503 PhpTokenType::LeftShiftAssign
504 }
505 else {
506 PhpTokenType::LeftShift
507 }
508 }
509 else if let Some('>') = state.peek() {
510 state.advance(1);
511 PhpTokenType::Spaceship
512 }
513 else {
514 PhpTokenType::Less
515 }
516 }
517 '>' => {
518 state.advance(1);
519 if let Some('=') = state.peek() {
520 state.advance(1);
521 PhpTokenType::GreaterEqual
522 }
523 else if let Some('>') = state.peek() {
524 state.advance(1);
525 if let Some('=') = state.peek() {
526 state.advance(1);
527 PhpTokenType::RightShiftAssign
528 }
529 else {
530 PhpTokenType::RightShift
531 }
532 }
533 else {
534 PhpTokenType::Greater
535 }
536 }
537 '&' => {
538 state.advance(1);
539 if let Some('&') = state.peek() {
540 state.advance(1);
541 PhpTokenType::LogicalAnd
542 }
543 else if let Some('=') = state.peek() {
544 state.advance(1);
545 PhpTokenType::BitwiseAndAssign
546 }
547 else {
548 PhpTokenType::BitwiseAnd
549 }
550 }
551 '|' => {
552 state.advance(1);
553 if let Some('|') = state.peek() {
554 state.advance(1);
555 PhpTokenType::LogicalOr
556 }
557 else if let Some('=') = state.peek() {
558 state.advance(1);
559 PhpTokenType::BitwiseOrAssign
560 }
561 else {
562 PhpTokenType::BitwiseOr
563 }
564 }
565 '^' => {
566 state.advance(1);
567 if let Some('=') = state.peek() {
568 state.advance(1);
569 PhpTokenType::BitwiseXorAssign
570 }
571 else {
572 PhpTokenType::BitwiseXor
573 }
574 }
575 '~' => {
576 state.advance(1);
577 PhpTokenType::BitwiseNot
578 }
579 '?' => {
580 state.advance(1);
581 if let Some('?') = state.peek() {
582 state.advance(1);
583 PhpTokenType::NullCoalesce
584 }
585 else {
586 PhpTokenType::Question
587 }
588 }
589 ':' => {
590 state.advance(1);
591 if let Some(':') = state.peek() {
592 state.advance(1);
593 PhpTokenType::DoubleColon
594 }
595 else {
596 PhpTokenType::Colon
597 }
598 }
599 ';' => {
600 state.advance(1);
601 PhpTokenType::Semicolon
602 }
603 ',' => {
604 state.advance(1);
605 PhpTokenType::Comma
606 }
607 '.' => {
608 state.advance(1);
609 if let Some('=') = state.peek() {
610 state.advance(1);
611 PhpTokenType::ConcatAssign
612 }
613 else {
614 PhpTokenType::Dot
615 }
616 }
617 '(' => {
618 state.advance(1);
619 PhpTokenType::LeftParen
620 }
621 ')' => {
622 state.advance(1);
623 PhpTokenType::RightParen
624 }
625 '[' => {
626 state.advance(1);
627 PhpTokenType::LeftBracket
628 }
629 ']' => {
630 state.advance(1);
631 PhpTokenType::RightBracket
632 }
633 '{' => {
634 state.advance(1);
635 PhpTokenType::LeftBrace
636 }
637 '}' => {
638 state.advance(1);
639 PhpTokenType::RightBrace
640 }
641 '$' => {
642 state.advance(1);
643 PhpTokenType::Dollar
644 }
645 '@' => {
646 state.advance(1);
647 PhpTokenType::At
648 }
649 _ => return false,
650 };
651
652 state.add_token(kind, start_pos, state.get_position());
653 true
654 }
655 else {
656 false
657 }
658 }
659}