mago_docblock/
lib.rs

1use mago_ast::Trivia;
2use mago_ast::TriviaKind;
3use mago_interner::ThreadedInterner;
4use mago_span::Span;
5
6use crate::document::Document;
7use crate::error::ParseError;
8
9mod internal;
10
11pub mod document;
12pub mod error;
13
14#[inline]
15pub fn parse_trivia(interner: &ThreadedInterner, trivia: &Trivia) -> Result<Document, ParseError> {
16    if TriviaKind::DocBlockComment != trivia.kind {
17        return Err(ParseError::InvalidTrivia(trivia.span));
18    }
19
20    parse_phpdoc_with_span(interner, interner.lookup(&trivia.value), trivia.span)
21}
22
23#[inline]
24pub fn parse_phpdoc_with_span(interner: &ThreadedInterner, content: &str, span: Span) -> Result<Document, ParseError> {
25    let tokens = internal::lexer::tokenize(content, span)?;
26
27    internal::parser::parse_document(tokens.as_slice(), interner)
28}
29
30#[cfg(test)]
31mod tests {
32    use super::*;
33
34    use mago_interner::ThreadedInterner;
35    use mago_span::Position;
36    use mago_span::Span;
37
38    use crate::document::*;
39
40    #[test]
41    fn test_parse_all_elements() {
42        let interner = ThreadedInterner::new();
43        let phpdoc = r#"/**
44            * This is a simple description.
45            *
46            * This text contains an inline code `echo "Hello, World!";`.
47            *
48            * This text contains an inline tag {@see \Some\Class}.
49            *
50            * ```php
51            * echo "Hello, World!";
52            * ```
53            *
54            *     $foo = "bar";
55            *     echo "Hello, World!";
56            *
57            * @param string $foo
58            * @param array{
59            *   bar: string,
60            *   baz: int
61            * } $bar
62            * @return void
63            */"#;
64
65        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
66        let document = parse_phpdoc_with_span(&interner, phpdoc, span).expect("Failed to parse PHPDoc");
67        assert_eq!(document.elements.len(), 12);
68
69        let Element::Text(text) = &document.elements[0] else {
70            panic!("Expected Element::Text, got {:?}", document.elements[0]);
71        };
72
73        assert_eq!(text.segments.len(), 1);
74
75        let TextSegment::Paragraph { span, content } = &text.segments[0] else {
76            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
77        };
78
79        let content = interner.lookup(content);
80        assert_eq!(content, "This is a simple description.");
81        assert_eq!(&phpdoc[span.start.offset..span.end.offset], "This is a simple description.");
82
83        let Element::Line(_) = &document.elements[1] else {
84            panic!("Expected Element::Line, got {:?}", document.elements[1]);
85        };
86
87        let Element::Text(text) = &document.elements[2] else {
88            panic!("Expected Element::Text, got {:?}", document.elements[2]);
89        };
90
91        assert_eq!(text.segments.len(), 3);
92
93        let TextSegment::Paragraph { span, content } = &text.segments[0] else {
94            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
95        };
96
97        let content = interner.lookup(content);
98        assert_eq!(content, "This text contains an inline code ");
99        assert_eq!(&phpdoc[span.start.offset..span.end.offset], "This text contains an inline code ");
100
101        let TextSegment::InlineCode(code) = &text.segments[1] else {
102            panic!("Expected TextSegment::InlineCode, got {:?}", text.segments[1]);
103        };
104
105        let content = interner.lookup(&code.content);
106        assert_eq!(content, "echo \"Hello, World!\";");
107        assert_eq!(&phpdoc[code.span.start.offset..code.span.end.offset], "`echo \"Hello, World!\";`");
108
109        let TextSegment::Paragraph { span, content } = &text.segments[2] else {
110            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[2]);
111        };
112
113        let content = interner.lookup(content);
114        assert_eq!(content, ".");
115        assert_eq!(&phpdoc[span.start.offset..span.end.offset], ".");
116
117        let Element::Line(_) = &document.elements[3] else {
118            panic!("Expected Element::Line, got {:?}", document.elements[3]);
119        };
120
121        let Element::Text(text) = &document.elements[4] else {
122            panic!("Expected Element::Text, got {:?}", document.elements[4]);
123        };
124
125        assert_eq!(text.segments.len(), 3);
126
127        let TextSegment::Paragraph { span, content } = &text.segments[0] else {
128            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
129        };
130
131        let content = interner.lookup(content);
132        assert_eq!(content, "This text contains an inline tag ");
133        assert_eq!(&phpdoc[span.start.offset..span.end.offset], "This text contains an inline tag ");
134
135        let TextSegment::InlineTag(tag) = &text.segments[1] else {
136            panic!("Expected TextSegment::InlineTag, got {:?}", text.segments[1]);
137        };
138
139        let name = interner.lookup(&tag.name);
140        let description = interner.lookup(&tag.description);
141        assert_eq!(name, "see");
142        assert_eq!(description, "\\Some\\Class");
143        assert_eq!(tag.kind, TagKind::See);
144        assert_eq!(&phpdoc[tag.span.start.offset..tag.span.end.offset], "{@see \\Some\\Class}");
145
146        let TextSegment::Paragraph { span, content } = &text.segments[2] else {
147            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[2]);
148        };
149
150        let content = interner.lookup(content);
151        assert_eq!(content, ".");
152        assert_eq!(&phpdoc[span.start.offset..span.end.offset], ".");
153
154        let Element::Line(_) = &document.elements[5] else {
155            panic!("Expected Element::Line, got {:?}", document.elements[5]);
156        };
157
158        let Element::Code(code) = &document.elements[6] else {
159            panic!("Expected Element::CodeBlock, got {:?}", document.elements[6]);
160        };
161
162        let content = interner.lookup(&code.content);
163        let directives = code.directives.iter().map(|d| interner.lookup(d)).collect::<Vec<_>>();
164        assert_eq!(directives, &["php"]);
165        assert_eq!(content, "echo \"Hello, World!\";");
166        assert_eq!(
167            &phpdoc[code.span.start.offset..code.span.end.offset],
168            "```php\n            * echo \"Hello, World!\";\n            * ```"
169        );
170
171        let Element::Line(_) = &document.elements[7] else {
172            panic!("Expected Element::Line, got {:?}", document.elements[7]);
173        };
174
175        let Element::Code(code) = &document.elements[8] else {
176            panic!("Expected Element::CodeBlock, got {:?}", document.elements[8]);
177        };
178
179        let content = interner.lookup(&code.content);
180        assert!(code.directives.is_empty());
181        assert_eq!(content, "$foo = \"bar\";\necho \"Hello, World!\";\n");
182        assert_eq!(
183            &phpdoc[code.span.start.offset..code.span.end.offset],
184            "    $foo = \"bar\";\n            *     echo \"Hello, World!\";\n"
185        );
186
187        let Element::Tag(tag) = &document.elements[9] else {
188            panic!("Expected Element::Tag, got {:?}", document.elements[9]);
189        };
190
191        let name = interner.lookup(&tag.name);
192        let description = interner.lookup(&tag.description);
193        assert_eq!(name, "param");
194        assert_eq!(tag.kind, TagKind::Param);
195        assert_eq!(description, "string $foo");
196        assert_eq!(&phpdoc[tag.span.start.offset..tag.span.end.offset], "@param string $foo");
197
198        let Element::Tag(tag) = &document.elements[10] else {
199            panic!("Expected Element::Tag, got {:?}", document.elements[10]);
200        };
201
202        let name = interner.lookup(&tag.name);
203        let description = interner.lookup(&tag.description);
204        assert_eq!(name, "param");
205        assert_eq!(tag.kind, TagKind::Param);
206        assert_eq!(description, "array{\n  bar: string,\n  baz: int\n} $bar");
207        assert_eq!(
208            &phpdoc[tag.span.start.offset..tag.span.end.offset],
209            "@param array{\n            *   bar: string,\n            *   baz: int\n            * } $bar"
210        );
211
212        let Element::Tag(tag) = &document.elements[11] else {
213            panic!("Expected Element::Tag, got {:?}", document.elements[11]);
214        };
215
216        let name = interner.lookup(&tag.name);
217        let description = interner.lookup(&tag.description);
218        assert_eq!(name, "return");
219        assert_eq!(tag.kind, TagKind::Return);
220        assert_eq!(description, "void");
221        assert_eq!(&phpdoc[tag.span.start.offset..tag.span.end.offset], "@return void");
222    }
223
224    #[test]
225    fn test_unclosed_inline_tag() {
226        // Test case for ParseError::UnclosedInlineTag
227        let interner = ThreadedInterner::new();
228        let phpdoc = "/** This is a doc block with an unclosed inline tag {@see Class */";
229        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
230
231        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
232
233        match result {
234            Err(ParseError::UnclosedInlineTag(error_span)) => {
235                let expected_start = phpdoc.find("{@see").unwrap();
236                let expected_span = span.subspan(expected_start, phpdoc.len() - 3);
237                assert_eq!(error_span, expected_span);
238            }
239            _ => {
240                panic!("Expected ParseError::UnclosedInlineTag");
241            }
242        }
243    }
244
245    #[test]
246    fn test_unclosed_inline_code() {
247        // Test case for ParseError::UnclosedInlineCode
248        let interner = ThreadedInterner::new();
249        let phpdoc = "/** This is a doc block with unclosed inline code `code sample */";
250        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
251
252        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
253
254        match result {
255            Err(ParseError::UnclosedInlineCode(error_span)) => {
256                let expected_start = phpdoc.find('`').unwrap();
257                let expected_span = span.subspan(expected_start, phpdoc.len() - 3);
258                assert_eq!(error_span, expected_span);
259            }
260            _ => {
261                panic!("Expected ParseError::UnclosedInlineCode");
262            }
263        }
264    }
265
266    #[test]
267    fn test_unclosed_code_block() {
268        let interner = ThreadedInterner::new();
269        let phpdoc = r#"/**
270            * This is a doc block with unclosed code block
271            * ```
272            * Some code here
273            */"#;
274        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
275
276        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
277
278        match result {
279            Err(ParseError::UnclosedCodeBlock(error_span)) => {
280                let code_block_start = phpdoc.find("```").unwrap();
281                let expected_span = span.subspan(code_block_start, 109);
282                assert_eq!(error_span, expected_span);
283            }
284            _ => {
285                panic!("Expected ParseError::UnclosedCodeBlock");
286            }
287        }
288    }
289
290    #[test]
291    fn test_invalid_tag_name() {
292        // Test case for ParseError::InvalidTagName
293        let interner = ThreadedInterner::new();
294        let phpdoc = "/** @invalid_tag_name Description */";
295        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
296
297        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
298
299        match result {
300            Err(ParseError::InvalidTagName(error_span)) => {
301                let tag_start = phpdoc.find("@invalid_tag_name").unwrap();
302                let tag_end = tag_start + "@invalid_tag_name".len();
303                let expected_span = span.subspan(tag_start, tag_end);
304                assert_eq!(error_span, expected_span);
305            }
306            _ => {
307                panic!("Expected ParseError::InvalidTagName");
308            }
309        }
310    }
311
312    #[test]
313    fn test_malformed_code_block() {
314        let interner = ThreadedInterner::new();
315        let phpdoc = r#"/**
316            * ```
317            * Some code here
318            * Incorrect closing
319            */"#;
320        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
321
322        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
323
324        match result {
325            Ok(document) => {
326                panic!("Expected the parser to return an error, got {:#?}", document);
327            }
328            Err(ParseError::UnclosedCodeBlock(error_span)) => {
329                let code_block_start = phpdoc.find("```").unwrap();
330                let expected_span = span.subspan(code_block_start, 82);
331                assert_eq!(error_span, expected_span);
332            }
333            _ => {
334                panic!("Expected ParseError::UnclosedCodeBlock");
335            }
336        }
337    }
338
339    #[test]
340    fn test_invalid_comment() {
341        // Test case for ParseError::InvalidComment
342        let interner = ThreadedInterner::new();
343        let phpdoc = "/* Not a valid doc block */";
344        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
345
346        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
347
348        match result {
349            Err(ParseError::InvalidComment(error_span)) => {
350                assert_eq!(error_span, span);
351            }
352            _ => {
353                panic!("Expected ParseError::InvalidComment");
354            }
355        }
356    }
357
358    #[test]
359    fn test_inconsistent_indentation() {
360        // Test case for ParseError::InconsistentIndentation
361        let interner = ThreadedInterner::new();
362        let phpdoc = r#"/**
363    * This is a doc block
364      * With inconsistent indentation
365    */"#;
366        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
367
368        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
369
370        match result {
371            Err(ParseError::InconsistentIndentation(error_span, expected, found)) => {
372                // The expected and found indentation lengths
373                assert_eq!(expected, 4); // Assuming 4 spaces
374                assert_eq!(found, 6); // Assuming 6 spaces
375                                      // The error_span should point to the line with inconsistent indentation
376                let inconsistent_line = "      * With inconsistent indentation";
377                let line_start = phpdoc.find(inconsistent_line).unwrap();
378                let indent_length = inconsistent_line.chars().take_while(|c| c.is_whitespace()).count();
379                let expected_span = span.subspan(line_start, line_start + indent_length);
380                assert_eq!(error_span, expected_span);
381            }
382            _ => {
383                panic!("Expected ParseError::InconsistentIndentation");
384            }
385        }
386    }
387
388    #[test]
389    fn test_missing_asterisk() {
390        // Test case for ParseError::MissingAsterisk
391        let interner = ThreadedInterner::new();
392        let phpdoc = r#"/**
393     This line is missing an asterisk
394     * This line is fine
395     */"#;
396        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
397
398        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
399
400        match result {
401            Err(ParseError::MissingAsterisk(error_span)) => {
402                // The error_span should point to where the asterisk is missing
403                let problematic_line = "     This line is missing an asterisk";
404                let line_start = phpdoc.find(problematic_line).unwrap();
405                let indent_length = problematic_line.chars().take_while(|c| c.is_whitespace()).count();
406                let expected_span = span.subspan(line_start + indent_length, line_start + indent_length + 1);
407                assert_eq!(error_span, expected_span);
408            }
409            _ => {
410                panic!("Expected ParseError::MissingAsterisk");
411            }
412        }
413    }
414
415    #[test]
416    fn test_missing_whitespace_after_asterisk() {
417        // Test case for ParseError::MissingWhitespaceAfterAsterisk
418        let interner = ThreadedInterner::new();
419        let phpdoc = r#"/**
420     *This line is missing a space after asterisk
421     */"#;
422        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
423
424        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
425
426        match result {
427            Err(ParseError::MissingWhitespaceAfterAsterisk(error_span)) => {
428                // The error_span should point to the character after the asterisk
429                let problematic_line = "*This line is missing a space after asterisk";
430                let line_start = phpdoc.find(problematic_line).unwrap();
431                let asterisk_pos = line_start;
432                let expected_span = span.subspan(asterisk_pos + 1, asterisk_pos + 2);
433                assert_eq!(error_span, expected_span);
434            }
435            _ => {
436                panic!("Expected ParseError::MissingWhitespaceAfterAsterisk");
437            }
438        }
439    }
440
441    #[test]
442    fn test_missing_whitespace_after_opening_asterisk() {
443        // Test case for ParseError::MissingWhitespaceAfterOpeningAsterisk
444        let interner = ThreadedInterner::new();
445        let phpdoc = "/**This is a doc block without space after /** */";
446        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
447
448        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
449
450        match result {
451            Err(ParseError::MissingWhitespaceAfterOpeningAsterisk(error_span)) => {
452                // The error_span should point to the position after '/**'
453                let expected_span = span.subspan(3, 4);
454                assert_eq!(error_span, expected_span);
455            }
456            _ => {
457                panic!("Expected ParseError::MissingWhitespaceAfterOpeningAsterisk");
458            }
459        }
460    }
461
462    #[test]
463    fn test_missing_whitespace_before_closing_asterisk() {
464        // Test case for ParseError::MissingWhitespaceBeforeClosingAsterisk
465        let interner = ThreadedInterner::new();
466        let phpdoc = "/** This is a doc block without space before */*/";
467        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
468
469        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
470
471        match result {
472            Err(ParseError::MissingWhitespaceBeforeClosingAsterisk(error_span)) => {
473                // The error_span should point to the position before '*/'
474                let expected_span = span.subspan(phpdoc.len() - 3, phpdoc.len() - 2);
475                assert_eq!(error_span, expected_span);
476            }
477            _ => {
478                panic!("Expected ParseError::MissingWhitespaceBeforeClosingAsterisk");
479            }
480        }
481    }
482
483    #[test]
484    fn test_utf8_characters() {
485        let interner = ThreadedInterner::new();
486        let phpdoc = r#"/**
487    * هذا نص باللغة العربية.
488    * 这是一段中文。
489    * Here are some mathematical symbols: ∑, ∆, π, θ.
490    *
491    * ```php
492    * // Arabic comment
493    * echo "مرحبا بالعالم";
494    * // Chinese comment
495    * echo "你好,世界";
496    * // Math symbols in code
497    * $sum = $a + $b; // ∑
498    * ```
499    *
500    * @param string $مثال A parameter with an Arabic variable name.
501    * @return int 返回值是整数类型。
502    */"#;
503
504        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
505        let document = parse_phpdoc_with_span(&interner, phpdoc, span).expect("Failed to parse PHPDoc");
506
507        // Verify the number of elements parsed
508        assert_eq!(document.elements.len(), 6);
509
510        // First text element (Arabic text)
511        let Element::Text(text) = &document.elements[0] else {
512            panic!("Expected Element::Text, got {:?}", document.elements[0]);
513        };
514
515        assert_eq!(text.segments.len(), 1);
516
517        let TextSegment::Paragraph { span, content } = &text.segments[0] else {
518            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
519        };
520
521        let content_str = interner.lookup(content);
522        assert_eq!(
523            content_str,
524            "هذا نص باللغة العربية.\n这是一段中文。\nHere are some mathematical symbols: ∑, ∆, π, θ."
525        );
526
527        assert_eq!(
528            &phpdoc[span.start.offset..span.end.offset],
529            "هذا نص باللغة العربية.\n    * 这是一段中文。\n    * Here are some mathematical symbols: ∑, ",
530        );
531
532        // Empty line
533        let Element::Line(_) = &document.elements[1] else {
534            panic!("Expected Element::Line, got {:?}", document.elements[3]);
535        };
536
537        // Code block
538        let Element::Code(code) = &document.elements[2] else {
539            panic!("Expected Element::Code, got {:?}", document.elements[2]);
540        };
541
542        let content_str = interner.lookup(&code.content);
543        let expected_code = "// Arabic comment\necho \"مرحبا بالعالم\";\n// Chinese comment\necho \"你好,世界\";\n// Math symbols in code\n$sum = $a + $b; // ∑";
544        assert_eq!(content_str, expected_code);
545        assert_eq!(
546            &phpdoc[code.span.start.offset..code.span.end.offset],
547            "```php\n    * // Arabic comment\n    * echo \"مرحبا بالعالم\";\n    * // Chinese comment\n    * echo \"你好,世界\";\n    * // Math symbols in code\n    * $sum = $a + $b; // ∑\n    * ```"
548        );
549
550        // Empty line
551        let Element::Line(_) = &document.elements[3] else {
552            panic!("Expected Element::Line, got {:?}", document.elements[3]);
553        };
554
555        // @param tag with Arabic variable name
556        let Element::Tag(tag) = &document.elements[4] else {
557            panic!("Expected Element::Tag, got {:?}", document.elements[4]);
558        };
559
560        let name = interner.lookup(&tag.name);
561        let description = interner.lookup(&tag.description);
562        assert_eq!(name, "param");
563        assert_eq!(tag.kind, TagKind::Param);
564        assert_eq!(description, "string $مثال A parameter with an Arabic variable name.");
565        assert_eq!(
566            &phpdoc[tag.span.start.offset..tag.span.end.offset],
567            "@param string $مثال A parameter with an Arabic variable name."
568        );
569
570        // @return tag with Chinese description
571        let Element::Tag(tag) = &document.elements[5] else {
572            panic!("Expected Element::Tag, got {:?}", document.elements[5]);
573        };
574
575        let name = interner.lookup(&tag.name);
576        let description = interner.lookup(&tag.description);
577        assert_eq!(name, "return");
578        assert_eq!(tag.kind, TagKind::Return);
579        assert_eq!(description, "int 返回值是整数类型。");
580        assert_eq!(&phpdoc[tag.span.start.offset..tag.span.end.offset], "@return int 返回值是整数类型。");
581    }
582
583    #[test]
584    fn test_annotation_parsing() {
585        let interner = ThreadedInterner::new();
586        let phpdoc = r#"/**
587         * @Event("Symfony\Component\Workflow\Event\CompletedEvent")
588         * @AnotherAnnotation({
589         *     "key": "value",
590         *     "list": [1, 2, 3]
591         * })
592         * @SimpleAnnotation
593         */"#;
594        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
595        let document = parse_phpdoc_with_span(&interner, phpdoc, span).expect("Failed to parse PHPDoc");
596
597        // Verify that the document has the expected number of elements
598        assert_eq!(document.elements.len(), 3);
599
600        // First annotation
601        let Element::Annotation(annotation) = &document.elements[0] else {
602            panic!("Expected Element::Annotation, got {:?}", document.elements[0]);
603        };
604
605        let name = interner.lookup(&annotation.name);
606        assert_eq!(name, "Event");
607        let arguments = interner.lookup(&annotation.arguments.unwrap());
608        assert_eq!(arguments, "(\"Symfony\\Component\\Workflow\\Event\\CompletedEvent\")");
609
610        // Second annotation
611        let Element::Annotation(annotation) = &document.elements[1] else {
612            panic!("Expected Element::Annotation, got {:?}", document.elements[1]);
613        };
614
615        let name = interner.lookup(&annotation.name);
616        assert_eq!(name, "AnotherAnnotation");
617        let arguments = interner.lookup(&annotation.arguments.unwrap());
618        let expected_arguments = "({\n    \"key\": \"value\",\n    \"list\": [1, 2, 3]\n})";
619        assert_eq!(arguments, expected_arguments);
620
621        // Third annotation
622        let Element::Annotation(annotation) = &document.elements[2] else {
623            panic!("Expected Element::Annotation, got {:?}", document.elements[2]);
624        };
625
626        let name = interner.lookup(&annotation.name);
627        assert_eq!(name, "SimpleAnnotation");
628        assert!(annotation.arguments.is_none());
629    }
630}