1use mago_interner::ThreadedInterner;
2use mago_span::Span;
3use mago_syntax::ast::Trivia;
4use mago_syntax::ast::TriviaKind;
5
6use crate::document::Document;
7use crate::error::ParseError;
8
9mod internal;
10
11pub mod document;
12pub mod error;
13
14#[inline]
15pub fn parse_trivia(interner: &ThreadedInterner, trivia: &Trivia) -> Result<Document, ParseError> {
16 if TriviaKind::DocBlockComment != trivia.kind {
17 return Err(ParseError::InvalidTrivia(trivia.span));
18 }
19
20 parse_phpdoc_with_span(interner, interner.lookup(&trivia.value), trivia.span)
21}
22
23#[inline]
24pub fn parse_phpdoc_with_span(interner: &ThreadedInterner, content: &str, span: Span) -> Result<Document, ParseError> {
25 let tokens = internal::lexer::tokenize(content, span)?;
26
27 internal::parser::parse_document(tokens.as_slice(), interner)
28}
29
30#[cfg(test)]
31mod tests {
32 use super::*;
33
34 use mago_interner::ThreadedInterner;
35 use mago_span::Position;
36 use mago_span::Span;
37
38 use crate::document::*;
39
40 #[test]
41 fn test_parse_all_elements() {
42 let interner = ThreadedInterner::new();
43 let phpdoc = r#"/**
44 * This is a simple description.
45 *
46 * This text contains an inline code `echo "Hello, World!";`.
47 *
48 * This text contains an inline tag {@see \Some\Class}.
49 *
50 * ```php
51 * echo "Hello, World!";
52 * ```
53 *
54 * $foo = "bar";
55 * echo "Hello, World!";
56 *
57 * @param string $foo
58 * @param array{
59 * bar: string,
60 * baz: int
61 * } $bar
62 * @return void
63 */"#;
64
65 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
66 let document = parse_phpdoc_with_span(&interner, phpdoc, span).expect("Failed to parse PHPDoc");
67 assert_eq!(document.elements.len(), 12);
68
69 let Element::Text(text) = &document.elements[0] else {
70 panic!("Expected Element::Text, got {:?}", document.elements[0]);
71 };
72
73 assert_eq!(text.segments.len(), 1);
74
75 let TextSegment::Paragraph { span, content } = &text.segments[0] else {
76 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
77 };
78
79 let content = interner.lookup(content);
80 assert_eq!(content, "This is a simple description.");
81 assert_eq!(&phpdoc[span.start.offset..span.end.offset], "This is a simple description.");
82
83 let Element::Line(_) = &document.elements[1] else {
84 panic!("Expected Element::Line, got {:?}", document.elements[1]);
85 };
86
87 let Element::Text(text) = &document.elements[2] else {
88 panic!("Expected Element::Text, got {:?}", document.elements[2]);
89 };
90
91 assert_eq!(text.segments.len(), 3);
92
93 let TextSegment::Paragraph { span, content } = &text.segments[0] else {
94 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
95 };
96
97 let content = interner.lookup(content);
98 assert_eq!(content, "This text contains an inline code ");
99 assert_eq!(&phpdoc[span.start.offset..span.end.offset], "This text contains an inline code ");
100
101 let TextSegment::InlineCode(code) = &text.segments[1] else {
102 panic!("Expected TextSegment::InlineCode, got {:?}", text.segments[1]);
103 };
104
105 let content = interner.lookup(&code.content);
106 assert_eq!(content, "echo \"Hello, World!\";");
107 assert_eq!(&phpdoc[code.span.start.offset..code.span.end.offset], "`echo \"Hello, World!\";`");
108
109 let TextSegment::Paragraph { span, content } = &text.segments[2] else {
110 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[2]);
111 };
112
113 let content = interner.lookup(content);
114 assert_eq!(content, ".");
115 assert_eq!(&phpdoc[span.start.offset..span.end.offset], ".");
116
117 let Element::Line(_) = &document.elements[3] else {
118 panic!("Expected Element::Line, got {:?}", document.elements[3]);
119 };
120
121 let Element::Text(text) = &document.elements[4] else {
122 panic!("Expected Element::Text, got {:?}", document.elements[4]);
123 };
124
125 assert_eq!(text.segments.len(), 3);
126
127 let TextSegment::Paragraph { span, content } = &text.segments[0] else {
128 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
129 };
130
131 let content = interner.lookup(content);
132 assert_eq!(content, "This text contains an inline tag ");
133 assert_eq!(&phpdoc[span.start.offset..span.end.offset], "This text contains an inline tag ");
134
135 let TextSegment::InlineTag(tag) = &text.segments[1] else {
136 panic!("Expected TextSegment::InlineTag, got {:?}", text.segments[1]);
137 };
138
139 let name = interner.lookup(&tag.name);
140 let description = interner.lookup(&tag.description);
141 assert_eq!(name, "see");
142 assert_eq!(description, "\\Some\\Class");
143 assert_eq!(tag.kind, TagKind::See);
144 assert_eq!(&phpdoc[tag.span.start.offset..tag.span.end.offset], "{@see \\Some\\Class}");
145
146 let TextSegment::Paragraph { span, content } = &text.segments[2] else {
147 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[2]);
148 };
149
150 let content = interner.lookup(content);
151 assert_eq!(content, ".");
152 assert_eq!(&phpdoc[span.start.offset..span.end.offset], ".");
153
154 let Element::Line(_) = &document.elements[5] else {
155 panic!("Expected Element::Line, got {:?}", document.elements[5]);
156 };
157
158 let Element::Code(code) = &document.elements[6] else {
159 panic!("Expected Element::CodeBlock, got {:?}", document.elements[6]);
160 };
161
162 let content = interner.lookup(&code.content);
163 let directives = code.directives.iter().map(|d| interner.lookup(d)).collect::<Vec<_>>();
164 assert_eq!(directives, &["php"]);
165 assert_eq!(content, "echo \"Hello, World!\";");
166 assert_eq!(
167 &phpdoc[code.span.start.offset..code.span.end.offset],
168 "```php\n * echo \"Hello, World!\";\n * ```"
169 );
170
171 let Element::Line(_) = &document.elements[7] else {
172 panic!("Expected Element::Line, got {:?}", document.elements[7]);
173 };
174
175 let Element::Code(code) = &document.elements[8] else {
176 panic!("Expected Element::CodeBlock, got {:?}", document.elements[8]);
177 };
178
179 let content = interner.lookup(&code.content);
180 assert!(code.directives.is_empty());
181 assert_eq!(content, "$foo = \"bar\";\necho \"Hello, World!\";\n");
182 assert_eq!(
183 &phpdoc[code.span.start.offset..code.span.end.offset],
184 " $foo = \"bar\";\n * echo \"Hello, World!\";\n"
185 );
186
187 let Element::Tag(tag) = &document.elements[9] else {
188 panic!("Expected Element::Tag, got {:?}", document.elements[9]);
189 };
190
191 let name = interner.lookup(&tag.name);
192 let description = interner.lookup(&tag.description);
193 assert_eq!(name, "param");
194 assert_eq!(tag.kind, TagKind::Param);
195 assert_eq!(description, "string $foo");
196 assert_eq!(&phpdoc[tag.span.start.offset..tag.span.end.offset], "@param string $foo");
197
198 let Element::Tag(tag) = &document.elements[10] else {
199 panic!("Expected Element::Tag, got {:?}", document.elements[10]);
200 };
201
202 let name = interner.lookup(&tag.name);
203 let description = interner.lookup(&tag.description);
204 assert_eq!(name, "param");
205 assert_eq!(tag.kind, TagKind::Param);
206 assert_eq!(description, "array{\n bar: string,\n baz: int\n} $bar");
207 assert_eq!(
208 &phpdoc[tag.span.start.offset..tag.span.end.offset],
209 "@param array{\n * bar: string,\n * baz: int\n * } $bar"
210 );
211
212 let Element::Tag(tag) = &document.elements[11] else {
213 panic!("Expected Element::Tag, got {:?}", document.elements[11]);
214 };
215
216 let name = interner.lookup(&tag.name);
217 let description = interner.lookup(&tag.description);
218 assert_eq!(name, "return");
219 assert_eq!(tag.kind, TagKind::Return);
220 assert_eq!(description, "void");
221 assert_eq!(&phpdoc[tag.span.start.offset..tag.span.end.offset], "@return void");
222 }
223
224 #[test]
225 fn test_unclosed_inline_tag() {
226 let interner = ThreadedInterner::new();
228 let phpdoc = "/** This is a doc block with an unclosed inline tag {@see Class */";
229 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
230
231 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
232
233 match result {
234 Err(ParseError::UnclosedInlineTag(error_span)) => {
235 let expected_start = phpdoc.find("{@see").unwrap();
236 let expected_span = span.subspan(expected_start, phpdoc.len() - 3);
237 assert_eq!(error_span, expected_span);
238 }
239 _ => {
240 panic!("Expected ParseError::UnclosedInlineTag");
241 }
242 }
243 }
244
245 #[test]
246 fn test_unclosed_inline_code() {
247 let interner = ThreadedInterner::new();
249 let phpdoc = "/** This is a doc block with unclosed inline code `code sample */";
250 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
251
252 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
253
254 match result {
255 Err(ParseError::UnclosedInlineCode(error_span)) => {
256 let expected_start = phpdoc.find('`').unwrap();
257 let expected_span = span.subspan(expected_start, phpdoc.len() - 3);
258 assert_eq!(error_span, expected_span);
259 }
260 _ => {
261 panic!("Expected ParseError::UnclosedInlineCode");
262 }
263 }
264 }
265
266 #[test]
267 fn test_unclosed_code_block() {
268 let interner = ThreadedInterner::new();
269 let phpdoc = r#"/**
270 * This is a doc block with unclosed code block
271 * ```
272 * Some code here
273 */"#;
274 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
275
276 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
277
278 match result {
279 Err(ParseError::UnclosedCodeBlock(error_span)) => {
280 let code_block_start = phpdoc.find("```").unwrap();
281 let expected_span = span.subspan(code_block_start, 109);
282 assert_eq!(error_span, expected_span);
283 }
284 _ => {
285 panic!("Expected ParseError::UnclosedCodeBlock");
286 }
287 }
288 }
289
290 #[test]
291 fn test_invalid_tag_name() {
292 let interner = ThreadedInterner::new();
294 let phpdoc = "/** @invalid_tag_name Description */";
295 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
296
297 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
298
299 match result {
300 Err(ParseError::InvalidTagName(error_span)) => {
301 let tag_start = phpdoc.find("@invalid_tag_name").unwrap();
302 let tag_end = tag_start + "@invalid_tag_name".len();
303 let expected_span = span.subspan(tag_start, tag_end);
304 assert_eq!(error_span, expected_span);
305 }
306 _ => {
307 panic!("Expected ParseError::InvalidTagName");
308 }
309 }
310 }
311
312 #[test]
313 fn test_malformed_code_block() {
314 let interner = ThreadedInterner::new();
315 let phpdoc = r#"/**
316 * ```
317 * Some code here
318 * Incorrect closing
319 */"#;
320 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
321
322 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
323
324 match result {
325 Ok(document) => {
326 panic!("Expected the parser to return an error, got {document:#?}");
327 }
328 Err(ParseError::UnclosedCodeBlock(error_span)) => {
329 let code_block_start = phpdoc.find("```").unwrap();
330 let expected_span = span.subspan(code_block_start, 82);
331 assert_eq!(error_span, expected_span);
332 }
333 _ => {
334 panic!("Expected ParseError::UnclosedCodeBlock");
335 }
336 }
337 }
338
339 #[test]
340 fn test_invalid_comment() {
341 let interner = ThreadedInterner::new();
343 let phpdoc = "/* Not a valid doc block */";
344 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
345
346 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
347
348 match result {
349 Err(ParseError::InvalidComment(error_span)) => {
350 assert_eq!(error_span, span);
351 }
352 _ => {
353 panic!("Expected ParseError::InvalidComment");
354 }
355 }
356 }
357
358 #[test]
359 fn test_inconsistent_indentation() {
360 let interner = ThreadedInterner::new();
362 let phpdoc = r#"/**
363 * This is a doc block
364 * With inconsistent indentation
365 */"#;
366 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
367
368 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
369
370 match result {
371 Err(ParseError::InconsistentIndentation(error_span, expected, found)) => {
372 assert_eq!(expected, 4); assert_eq!(found, 6); let inconsistent_line = " * With inconsistent indentation";
377 let line_start = phpdoc.find(inconsistent_line).unwrap();
378 let indent_length = inconsistent_line.chars().take_while(|c| c.is_whitespace()).count();
379 let expected_span = span.subspan(line_start, line_start + indent_length);
380 assert_eq!(error_span, expected_span);
381 }
382 _ => {
383 panic!("Expected ParseError::InconsistentIndentation");
384 }
385 }
386 }
387
388 #[test]
389 fn test_missing_asterisk() {
390 let interner = ThreadedInterner::new();
392 let phpdoc = r#"/**
393 This line is missing an asterisk
394 * This line is fine
395 */"#;
396 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
397
398 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
399
400 match result {
401 Err(ParseError::MissingAsterisk(error_span)) => {
402 let problematic_line = " This line is missing an asterisk";
404 let line_start = phpdoc.find(problematic_line).unwrap();
405 let indent_length = problematic_line.chars().take_while(|c| c.is_whitespace()).count();
406 let expected_span = span.subspan(line_start + indent_length, line_start + indent_length + 1);
407 assert_eq!(error_span, expected_span);
408 }
409 _ => {
410 panic!("Expected ParseError::MissingAsterisk");
411 }
412 }
413 }
414
415 #[test]
416 fn test_missing_whitespace_after_asterisk() {
417 let interner = ThreadedInterner::new();
419 let phpdoc = r#"/**
420 *This line is missing a space after asterisk
421 */"#;
422 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
423
424 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
425
426 match result {
427 Err(ParseError::MissingWhitespaceAfterAsterisk(error_span)) => {
428 let problematic_line = "*This line is missing a space after asterisk";
430 let line_start = phpdoc.find(problematic_line).unwrap();
431 let asterisk_pos = line_start;
432 let expected_span = span.subspan(asterisk_pos + 1, asterisk_pos + 2);
433 assert_eq!(error_span, expected_span);
434 }
435 _ => {
436 panic!("Expected ParseError::MissingWhitespaceAfterAsterisk");
437 }
438 }
439 }
440
441 #[test]
442 fn test_missing_whitespace_after_opening_asterisk() {
443 let interner = ThreadedInterner::new();
445 let phpdoc = "/**This is a doc block without space after /** */";
446 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
447
448 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
449
450 match result {
451 Err(ParseError::MissingWhitespaceAfterOpeningAsterisk(error_span)) => {
452 let expected_span = span.subspan(3, 4);
454 assert_eq!(error_span, expected_span);
455 }
456 _ => {
457 panic!("Expected ParseError::MissingWhitespaceAfterOpeningAsterisk");
458 }
459 }
460 }
461
462 #[test]
463 fn test_missing_whitespace_before_closing_asterisk() {
464 let interner = ThreadedInterner::new();
466 let phpdoc = "/** This is a doc block without space before */*/";
467 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
468
469 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
470
471 match result {
472 Err(ParseError::MissingWhitespaceBeforeClosingAsterisk(error_span)) => {
473 let expected_span = span.subspan(phpdoc.len() - 3, phpdoc.len() - 2);
475 assert_eq!(error_span, expected_span);
476 }
477 _ => {
478 panic!("Expected ParseError::MissingWhitespaceBeforeClosingAsterisk");
479 }
480 }
481 }
482
483 #[test]
484 fn test_utf8_characters() {
485 let interner = ThreadedInterner::new();
486 let phpdoc = r#"/**
487 * هذا نص باللغة العربية.
488 * 这是一段中文。
489 * Here are some mathematical symbols: ∑, ∆, π, θ.
490 *
491 * ```php
492 * // Arabic comment
493 * echo "مرحبا بالعالم";
494 * // Chinese comment
495 * echo "你好,世界";
496 * // Math symbols in code
497 * $sum = $a + $b; // ∑
498 * ```
499 *
500 * @param string $مثال A parameter with an Arabic variable name.
501 * @return int 返回值是整数类型。
502 */"#;
503
504 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
505 let document = parse_phpdoc_with_span(&interner, phpdoc, span).expect("Failed to parse PHPDoc");
506
507 assert_eq!(document.elements.len(), 6);
509
510 let Element::Text(text) = &document.elements[0] else {
512 panic!("Expected Element::Text, got {:?}", document.elements[0]);
513 };
514
515 assert_eq!(text.segments.len(), 1);
516
517 let TextSegment::Paragraph { span, content } = &text.segments[0] else {
518 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
519 };
520
521 let content_str = interner.lookup(content);
522 assert_eq!(
523 content_str,
524 "هذا نص باللغة العربية.\n这是一段中文。\nHere are some mathematical symbols: ∑, ∆, π, θ."
525 );
526
527 assert_eq!(
528 &phpdoc[span.start.offset..span.end.offset],
529 "هذا نص باللغة العربية.\n * 这是一段中文。\n * Here are some mathematical symbols: ∑, ",
530 );
531
532 let Element::Line(_) = &document.elements[1] else {
534 panic!("Expected Element::Line, got {:?}", document.elements[3]);
535 };
536
537 let Element::Code(code) = &document.elements[2] else {
539 panic!("Expected Element::Code, got {:?}", document.elements[2]);
540 };
541
542 let content_str = interner.lookup(&code.content);
543 let expected_code = "// Arabic comment\necho \"مرحبا بالعالم\";\n// Chinese comment\necho \"你好,世界\";\n// Math symbols in code\n$sum = $a + $b; // ∑";
544 assert_eq!(content_str, expected_code);
545 assert_eq!(
546 &phpdoc[code.span.start.offset..code.span.end.offset],
547 "```php\n * // Arabic comment\n * echo \"مرحبا بالعالم\";\n * // Chinese comment\n * echo \"你好,世界\";\n * // Math symbols in code\n * $sum = $a + $b; // ∑\n * ```"
548 );
549
550 let Element::Line(_) = &document.elements[3] else {
552 panic!("Expected Element::Line, got {:?}", document.elements[3]);
553 };
554
555 let Element::Tag(tag) = &document.elements[4] else {
557 panic!("Expected Element::Tag, got {:?}", document.elements[4]);
558 };
559
560 let name = interner.lookup(&tag.name);
561 let description = interner.lookup(&tag.description);
562 assert_eq!(name, "param");
563 assert_eq!(tag.kind, TagKind::Param);
564 assert_eq!(description, "string $مثال A parameter with an Arabic variable name.");
565 assert_eq!(
566 &phpdoc[tag.span.start.offset..tag.span.end.offset],
567 "@param string $مثال A parameter with an Arabic variable name."
568 );
569
570 let Element::Tag(tag) = &document.elements[5] else {
572 panic!("Expected Element::Tag, got {:?}", document.elements[5]);
573 };
574
575 let name = interner.lookup(&tag.name);
576 let description = interner.lookup(&tag.description);
577 assert_eq!(name, "return");
578 assert_eq!(tag.kind, TagKind::Return);
579 assert_eq!(description, "int 返回值是整数类型。");
580 assert_eq!(&phpdoc[tag.span.start.offset..tag.span.end.offset], "@return int 返回值是整数类型。");
581 }
582
583 #[test]
584 fn test_annotation_parsing() {
585 let interner = ThreadedInterner::new();
586 let phpdoc = r#"/**
587 * @Event("Symfony\Component\Workflow\Event\CompletedEvent")
588 * @AnotherAnnotation({
589 * "key": "value",
590 * "list": [1, 2, 3]
591 * })
592 * @SimpleAnnotation
593 */"#;
594 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
595 let document = parse_phpdoc_with_span(&interner, phpdoc, span).expect("Failed to parse PHPDoc");
596
597 assert_eq!(document.elements.len(), 3);
599
600 let Element::Annotation(annotation) = &document.elements[0] else {
602 panic!("Expected Element::Annotation, got {:?}", document.elements[0]);
603 };
604
605 let name = interner.lookup(&annotation.name);
606 assert_eq!(name, "Event");
607 let arguments = interner.lookup(&annotation.arguments.unwrap());
608 assert_eq!(arguments, "(\"Symfony\\Component\\Workflow\\Event\\CompletedEvent\")");
609
610 let Element::Annotation(annotation) = &document.elements[1] else {
612 panic!("Expected Element::Annotation, got {:?}", document.elements[1]);
613 };
614
615 let name = interner.lookup(&annotation.name);
616 assert_eq!(name, "AnotherAnnotation");
617 let arguments = interner.lookup(&annotation.arguments.unwrap());
618 let expected_arguments = "({\n \"key\": \"value\",\n \"list\": [1, 2, 3]\n})";
619 assert_eq!(arguments, expected_arguments);
620
621 let Element::Annotation(annotation) = &document.elements[2] else {
623 panic!("Expected Element::Annotation, got {:?}", document.elements[2]);
624 };
625
626 let name = interner.lookup(&annotation.name);
627 assert_eq!(name, "SimpleAnnotation");
628 assert!(annotation.arguments.is_none());
629 }
630}