1use mago_interner::ThreadedInterner;
2use mago_span::Span;
3use mago_syntax::ast::Trivia;
4use mago_syntax::ast::TriviaKind;
5
6use crate::document::Document;
7use crate::error::ParseError;
8
9mod internal;
10
11pub mod document;
12pub mod error;
13pub mod tag;
14
15#[inline]
16pub fn parse_trivia(interner: &ThreadedInterner, trivia: &Trivia) -> Result<Document, ParseError> {
17 if TriviaKind::DocBlockComment != trivia.kind {
18 return Err(ParseError::InvalidTrivia(trivia.span));
19 }
20
21 parse_phpdoc_with_span(interner, interner.lookup(&trivia.value), trivia.span)
22}
23
24#[inline]
25pub fn parse_phpdoc_with_span(interner: &ThreadedInterner, content: &str, span: Span) -> Result<Document, ParseError> {
26 let tokens = internal::lexer::tokenize(content, span)?;
27
28 internal::parser::parse_document(span, tokens.as_slice(), interner)
29}
30
31#[cfg(test)]
32mod tests {
33 use super::*;
34
35 use mago_interner::ThreadedInterner;
36 use mago_span::Position;
37 use mago_span::Span;
38
39 use crate::document::*;
40
41 #[test]
42 fn test_parse_all_elements() {
43 let interner = ThreadedInterner::new();
44 let phpdoc = r#"/**
45 * This is a simple description.
46 *
47 * This text contains an inline code `echo "Hello, World!";`.
48 *
49 * This text contains an inline tag {@see \Some\Class}.
50 *
51 * ```php
52 * echo "Hello, World!";
53 * ```
54 *
55 * $foo = "bar";
56 * echo "Hello, World!";
57 *
58 * @param string $foo
59 * @param array{
60 * bar: string,
61 * baz: int
62 * } $bar
63 * @return void
64 */"#;
65
66 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
67 let document = parse_phpdoc_with_span(&interner, phpdoc, span).expect("Failed to parse PHPDoc");
68 assert_eq!(document.elements.len(), 12);
69
70 let Element::Text(text) = &document.elements[0] else {
71 panic!("Expected Element::Text, got {:?}", document.elements[0]);
72 };
73
74 assert_eq!(text.segments.len(), 1);
75
76 let TextSegment::Paragraph { span, content } = &text.segments[0] else {
77 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
78 };
79
80 let content = interner.lookup(content);
81 assert_eq!(content, "This is a simple description.");
82 assert_eq!(&phpdoc[span.start.offset..span.end.offset], "This is a simple description.");
83
84 let Element::Line(_) = &document.elements[1] else {
85 panic!("Expected Element::Line, got {:?}", document.elements[1]);
86 };
87
88 let Element::Text(text) = &document.elements[2] else {
89 panic!("Expected Element::Text, got {:?}", document.elements[2]);
90 };
91
92 assert_eq!(text.segments.len(), 3);
93
94 let TextSegment::Paragraph { span, content } = &text.segments[0] else {
95 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
96 };
97
98 let content = interner.lookup(content);
99 assert_eq!(content, "This text contains an inline code ");
100 assert_eq!(&phpdoc[span.start.offset..span.end.offset], "This text contains an inline code ");
101
102 let TextSegment::InlineCode(code) = &text.segments[1] else {
103 panic!("Expected TextSegment::InlineCode, got {:?}", text.segments[1]);
104 };
105
106 let content = interner.lookup(&code.content);
107 assert_eq!(content, "echo \"Hello, World!\";");
108 assert_eq!(&phpdoc[code.span.start.offset..code.span.end.offset], "`echo \"Hello, World!\";`");
109
110 let TextSegment::Paragraph { span, content } = &text.segments[2] else {
111 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[2]);
112 };
113
114 let content = interner.lookup(content);
115 assert_eq!(content, ".");
116 assert_eq!(&phpdoc[span.start.offset..span.end.offset], ".");
117
118 let Element::Line(_) = &document.elements[3] else {
119 panic!("Expected Element::Line, got {:?}", document.elements[3]);
120 };
121
122 let Element::Text(text) = &document.elements[4] else {
123 panic!("Expected Element::Text, got {:?}", document.elements[4]);
124 };
125
126 assert_eq!(text.segments.len(), 3);
127
128 let TextSegment::Paragraph { span, content } = &text.segments[0] else {
129 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
130 };
131
132 let content = interner.lookup(content);
133 assert_eq!(content, "This text contains an inline tag ");
134 assert_eq!(&phpdoc[span.start.offset..span.end.offset], "This text contains an inline tag ");
135
136 let TextSegment::InlineTag(tag) = &text.segments[1] else {
137 panic!("Expected TextSegment::InlineTag, got {:?}", text.segments[1]);
138 };
139
140 let name = interner.lookup(&tag.name);
141 let description = interner.lookup(&tag.description);
142 assert_eq!(name, "see");
143 assert_eq!(description, "\\Some\\Class");
144 assert_eq!(tag.kind, TagKind::See);
145 assert_eq!(&phpdoc[tag.span.start.offset..tag.span.end.offset], "{@see \\Some\\Class}");
146
147 let TextSegment::Paragraph { span, content } = &text.segments[2] else {
148 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[2]);
149 };
150
151 let content = interner.lookup(content);
152 assert_eq!(content, ".");
153 assert_eq!(&phpdoc[span.start.offset..span.end.offset], ".");
154
155 let Element::Line(_) = &document.elements[5] else {
156 panic!("Expected Element::Line, got {:?}", document.elements[5]);
157 };
158
159 let Element::Code(code) = &document.elements[6] else {
160 panic!("Expected Element::CodeBlock, got {:?}", document.elements[6]);
161 };
162
163 let content = interner.lookup(&code.content);
164 let directives = code.directives.iter().map(|d| interner.lookup(d)).collect::<Vec<_>>();
165 assert_eq!(directives, &["php"]);
166 assert_eq!(content, "echo \"Hello, World!\";");
167 assert_eq!(
168 &phpdoc[code.span.start.offset..code.span.end.offset],
169 "```php\n * echo \"Hello, World!\";\n * ```"
170 );
171
172 let Element::Line(_) = &document.elements[7] else {
173 panic!("Expected Element::Line, got {:?}", document.elements[7]);
174 };
175
176 let Element::Code(code) = &document.elements[8] else {
177 panic!("Expected Element::CodeBlock, got {:?}", document.elements[8]);
178 };
179
180 let content = interner.lookup(&code.content);
181 assert!(code.directives.is_empty());
182 assert_eq!(content, "$foo = \"bar\";\necho \"Hello, World!\";\n");
183 assert_eq!(
184 &phpdoc[code.span.start.offset..code.span.end.offset],
185 " $foo = \"bar\";\n * echo \"Hello, World!\";\n"
186 );
187
188 let Element::Tag(tag) = &document.elements[9] else {
189 panic!("Expected Element::Tag, got {:?}", document.elements[9]);
190 };
191
192 let name = interner.lookup(&tag.name);
193 let description = interner.lookup(&tag.description);
194 assert_eq!(name, "param");
195 assert_eq!(tag.kind, TagKind::Param);
196 assert_eq!(description, "string $foo");
197 assert_eq!(&phpdoc[tag.span.start.offset..tag.span.end.offset], "@param string $foo");
198
199 let Element::Tag(tag) = &document.elements[10] else {
200 panic!("Expected Element::Tag, got {:?}", document.elements[10]);
201 };
202
203 let name = interner.lookup(&tag.name);
204 let description = interner.lookup(&tag.description);
205 assert_eq!(name, "param");
206 assert_eq!(tag.kind, TagKind::Param);
207 assert_eq!(description, "array{\n bar: string,\n baz: int\n} $bar");
208 assert_eq!(
209 &phpdoc[tag.span.start.offset..tag.span.end.offset],
210 "@param array{\n * bar: string,\n * baz: int\n * } $bar"
211 );
212
213 let Element::Tag(tag) = &document.elements[11] else {
214 panic!("Expected Element::Tag, got {:?}", document.elements[11]);
215 };
216
217 let name = interner.lookup(&tag.name);
218 let description = interner.lookup(&tag.description);
219 assert_eq!(name, "return");
220 assert_eq!(tag.kind, TagKind::Return);
221 assert_eq!(description, "void");
222 assert_eq!(&phpdoc[tag.span.start.offset..tag.span.end.offset], "@return void");
223 }
224
225 #[test]
226 fn test_unclosed_inline_tag() {
227 let interner = ThreadedInterner::new();
229 let phpdoc = "/** This is a doc block with an unclosed inline tag {@see Class */";
230 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
231
232 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
233
234 match result {
235 Err(ParseError::UnclosedInlineTag(error_span)) => {
236 let expected_start = phpdoc.find("{@see").unwrap();
237 let expected_span = span.subspan(expected_start, phpdoc.len() - 3);
238 assert_eq!(error_span, expected_span);
239 }
240 _ => {
241 panic!("Expected ParseError::UnclosedInlineTag");
242 }
243 }
244 }
245
246 #[test]
247 fn test_unclosed_inline_code() {
248 let interner = ThreadedInterner::new();
250 let phpdoc = "/** This is a doc block with unclosed inline code `code sample */";
251 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
252
253 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
254
255 match result {
256 Err(ParseError::UnclosedInlineCode(error_span)) => {
257 let expected_start = phpdoc.find('`').unwrap();
258 let expected_span = span.subspan(expected_start, phpdoc.len() - 3);
259 assert_eq!(error_span, expected_span);
260 }
261 _ => {
262 panic!("Expected ParseError::UnclosedInlineCode");
263 }
264 }
265 }
266
267 #[test]
268 fn test_unclosed_code_block() {
269 let interner = ThreadedInterner::new();
270 let phpdoc = r#"/**
271 * This is a doc block with unclosed code block
272 * ```
273 * Some code here
274 */"#;
275 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
276
277 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
278
279 match result {
280 Err(ParseError::UnclosedCodeBlock(error_span)) => {
281 let code_block_start = phpdoc.find("```").unwrap();
282 let expected_span = span.subspan(code_block_start, 109);
283 assert_eq!(error_span, expected_span);
284 }
285 _ => {
286 panic!("Expected ParseError::UnclosedCodeBlock");
287 }
288 }
289 }
290
291 #[test]
292 fn test_invalid_tag_name() {
293 let interner = ThreadedInterner::new();
295 let phpdoc = "/** @invalid_tag_name Description */";
296 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
297
298 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
299
300 match result {
301 Err(ParseError::InvalidTagName(error_span)) => {
302 let tag_start = phpdoc.find("@invalid_tag_name").unwrap();
303 let tag_end = tag_start + "@invalid_tag_name".len();
304 let expected_span = span.subspan(tag_start, tag_end);
305 assert_eq!(error_span, expected_span);
306 }
307 _ => {
308 panic!("Expected ParseError::InvalidTagName");
309 }
310 }
311 }
312
313 #[test]
314 fn test_malformed_code_block() {
315 let interner = ThreadedInterner::new();
316 let phpdoc = r#"/**
317 * ```
318 * Some code here
319 * Incorrect closing
320 */"#;
321 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
322
323 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
324
325 match result {
326 Ok(document) => {
327 panic!("Expected the parser to return an error, got {document:#?}");
328 }
329 Err(ParseError::UnclosedCodeBlock(error_span)) => {
330 let code_block_start = phpdoc.find("```").unwrap();
331 let expected_span = span.subspan(code_block_start, 82);
332 assert_eq!(error_span, expected_span);
333 }
334 _ => {
335 panic!("Expected ParseError::UnclosedCodeBlock");
336 }
337 }
338 }
339
340 #[test]
341 fn test_invalid_comment() {
342 let interner = ThreadedInterner::new();
344 let phpdoc = "/* Not a valid doc block */";
345 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
346
347 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
348
349 match result {
350 Err(ParseError::InvalidComment(error_span)) => {
351 assert_eq!(error_span, span);
352 }
353 _ => {
354 panic!("Expected ParseError::InvalidComment");
355 }
356 }
357 }
358
359 #[test]
360 fn test_inconsistent_indentation() {
361 let interner = ThreadedInterner::new();
363 let phpdoc = r#"/**
364 * This is a doc block
365 * With inconsistent indentation
366 */"#;
367 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
368
369 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
370
371 match result {
372 Err(ParseError::InconsistentIndentation(error_span, expected, found)) => {
373 assert_eq!(expected, 4); assert_eq!(found, 6); let inconsistent_line = " * With inconsistent indentation";
378 let line_start = phpdoc.find(inconsistent_line).unwrap();
379 let indent_length = inconsistent_line.chars().take_while(|c| c.is_whitespace()).count();
380 let expected_span = span.subspan(line_start, line_start + indent_length);
381 assert_eq!(error_span, expected_span);
382 }
383 _ => {
384 panic!("Expected ParseError::InconsistentIndentation");
385 }
386 }
387 }
388
389 #[test]
390 fn test_missing_asterisk() {
391 let interner = ThreadedInterner::new();
393 let phpdoc = r#"/**
394 This line is missing an asterisk
395 * This line is fine
396 */"#;
397 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
398
399 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
400
401 match result {
402 Err(ParseError::MissingAsterisk(error_span)) => {
403 let problematic_line = " This line is missing an asterisk";
405 let line_start = phpdoc.find(problematic_line).unwrap();
406 let indent_length = problematic_line.chars().take_while(|c| c.is_whitespace()).count();
407 let expected_span = span.subspan(line_start + indent_length, line_start + indent_length + 1);
408 assert_eq!(error_span, expected_span);
409 }
410 _ => {
411 panic!("Expected ParseError::MissingAsterisk");
412 }
413 }
414 }
415
416 #[test]
417 fn test_missing_whitespace_after_asterisk() {
418 let interner = ThreadedInterner::new();
420 let phpdoc = r#"/**
421 *This line is missing a space after asterisk
422 */"#;
423 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
424
425 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
426
427 match result {
428 Err(ParseError::MissingWhitespaceAfterAsterisk(error_span)) => {
429 let problematic_line = "*This line is missing a space after asterisk";
431 let line_start = phpdoc.find(problematic_line).unwrap();
432 let asterisk_pos = line_start;
433 let expected_span = span.subspan(asterisk_pos + 1, asterisk_pos + 2);
434 assert_eq!(error_span, expected_span);
435 }
436 _ => {
437 panic!("Expected ParseError::MissingWhitespaceAfterAsterisk");
438 }
439 }
440 }
441
442 #[test]
443 fn test_missing_whitespace_after_opening_asterisk() {
444 let interner = ThreadedInterner::new();
446 let phpdoc = "/**This is a doc block without space after /** */";
447 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
448
449 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
450
451 match result {
452 Err(ParseError::MissingWhitespaceAfterOpeningAsterisk(error_span)) => {
453 let expected_span = span.subspan(3, 4);
455 assert_eq!(error_span, expected_span);
456 }
457 _ => {
458 panic!("Expected ParseError::MissingWhitespaceAfterOpeningAsterisk");
459 }
460 }
461 }
462
463 #[test]
464 fn test_missing_whitespace_before_closing_asterisk() {
465 let interner = ThreadedInterner::new();
467 let phpdoc = "/** This is a doc block without space before */*/";
468 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
469
470 let result = parse_phpdoc_with_span(&interner, phpdoc, span);
471
472 match result {
473 Err(ParseError::MissingWhitespaceBeforeClosingAsterisk(error_span)) => {
474 let expected_span = span.subspan(phpdoc.len() - 3, phpdoc.len() - 2);
476 assert_eq!(error_span, expected_span);
477 }
478 _ => {
479 panic!("Expected ParseError::MissingWhitespaceBeforeClosingAsterisk");
480 }
481 }
482 }
483
484 #[test]
485 fn test_utf8_characters() {
486 let interner = ThreadedInterner::new();
487 let phpdoc = r#"/**
488 * هذا نص باللغة العربية.
489 * 这是一段中文。
490 * Here are some mathematical symbols: ∑, ∆, π, θ.
491 *
492 * ```php
493 * // Arabic comment
494 * echo "مرحبا بالعالم";
495 * // Chinese comment
496 * echo "你好,世界";
497 * // Math symbols in code
498 * $sum = $a + $b; // ∑
499 * ```
500 *
501 * @param string $مثال A parameter with an Arabic variable name.
502 * @return int 返回值是整数类型。
503 */"#;
504
505 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
506 let document = parse_phpdoc_with_span(&interner, phpdoc, span).expect("Failed to parse PHPDoc");
507
508 assert_eq!(document.elements.len(), 6);
510
511 let Element::Text(text) = &document.elements[0] else {
513 panic!("Expected Element::Text, got {:?}", document.elements[0]);
514 };
515
516 assert_eq!(text.segments.len(), 1);
517
518 let TextSegment::Paragraph { span, content } = &text.segments[0] else {
519 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
520 };
521
522 let content_str = interner.lookup(content);
523 assert_eq!(
524 content_str,
525 "هذا نص باللغة العربية.\n这是一段中文。\nHere are some mathematical symbols: ∑, ∆, π, θ."
526 );
527
528 assert_eq!(
529 &phpdoc[span.start.offset..span.end.offset],
530 "هذا نص باللغة العربية.\n * 这是一段中文。\n * Here are some mathematical symbols: ∑, ",
531 );
532
533 let Element::Line(_) = &document.elements[1] else {
535 panic!("Expected Element::Line, got {:?}", document.elements[3]);
536 };
537
538 let Element::Code(code) = &document.elements[2] else {
540 panic!("Expected Element::Code, got {:?}", document.elements[2]);
541 };
542
543 let content_str = interner.lookup(&code.content);
544 let expected_code = "// Arabic comment\necho \"مرحبا بالعالم\";\n// Chinese comment\necho \"你好,世界\";\n// Math symbols in code\n$sum = $a + $b; // ∑";
545 assert_eq!(content_str, expected_code);
546 assert_eq!(
547 &phpdoc[code.span.start.offset..code.span.end.offset],
548 "```php\n * // Arabic comment\n * echo \"مرحبا بالعالم\";\n * // Chinese comment\n * echo \"你好,世界\";\n * // Math symbols in code\n * $sum = $a + $b; // ∑\n * ```"
549 );
550
551 let Element::Line(_) = &document.elements[3] else {
553 panic!("Expected Element::Line, got {:?}", document.elements[3]);
554 };
555
556 let Element::Tag(tag) = &document.elements[4] else {
558 panic!("Expected Element::Tag, got {:?}", document.elements[4]);
559 };
560
561 let name = interner.lookup(&tag.name);
562 let description = interner.lookup(&tag.description);
563 assert_eq!(name, "param");
564 assert_eq!(tag.kind, TagKind::Param);
565 assert_eq!(description, "string $مثال A parameter with an Arabic variable name.");
566 assert_eq!(
567 &phpdoc[tag.span.start.offset..tag.span.end.offset],
568 "@param string $مثال A parameter with an Arabic variable name."
569 );
570
571 let Element::Tag(tag) = &document.elements[5] else {
573 panic!("Expected Element::Tag, got {:?}", document.elements[5]);
574 };
575
576 let name = interner.lookup(&tag.name);
577 let description = interner.lookup(&tag.description);
578 assert_eq!(name, "return");
579 assert_eq!(tag.kind, TagKind::Return);
580 assert_eq!(description, "int 返回值是整数类型。");
581 assert_eq!(&phpdoc[tag.span.start.offset..tag.span.end.offset], "@return int 返回值是整数类型。");
582 }
583
584 #[test]
585 fn test_annotation_parsing() {
586 let interner = ThreadedInterner::new();
587 let phpdoc = r#"/**
588 * @Event("Symfony\Component\Workflow\Event\CompletedEvent")
589 * @AnotherAnnotation({
590 * "key": "value",
591 * "list": [1, 2, 3]
592 * })
593 * @SimpleAnnotation
594 */"#;
595 let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
596 let document = parse_phpdoc_with_span(&interner, phpdoc, span).expect("Failed to parse PHPDoc");
597
598 assert_eq!(document.elements.len(), 3);
600
601 let Element::Annotation(annotation) = &document.elements[0] else {
603 panic!("Expected Element::Annotation, got {:?}", document.elements[0]);
604 };
605
606 let name = interner.lookup(&annotation.name);
607 assert_eq!(name, "Event");
608 let arguments = interner.lookup(&annotation.arguments.unwrap());
609 assert_eq!(arguments, "(\"Symfony\\Component\\Workflow\\Event\\CompletedEvent\")");
610
611 let Element::Annotation(annotation) = &document.elements[1] else {
613 panic!("Expected Element::Annotation, got {:?}", document.elements[1]);
614 };
615
616 let name = interner.lookup(&annotation.name);
617 assert_eq!(name, "AnotherAnnotation");
618 let arguments = interner.lookup(&annotation.arguments.unwrap());
619 let expected_arguments = "({\n \"key\": \"value\",\n \"list\": [1, 2, 3]\n})";
620 assert_eq!(arguments, expected_arguments);
621
622 let Element::Annotation(annotation) = &document.elements[2] else {
624 panic!("Expected Element::Annotation, got {:?}", document.elements[2]);
625 };
626
627 let name = interner.lookup(&annotation.name);
628 assert_eq!(name, "SimpleAnnotation");
629 assert!(annotation.arguments.is_none());
630 }
631}