mago_docblock/
lib.rs

1use mago_interner::ThreadedInterner;
2use mago_span::Span;
3use mago_syntax::ast::Trivia;
4use mago_syntax::ast::TriviaKind;
5
6use crate::document::Document;
7use crate::error::ParseError;
8
9mod internal;
10
11pub mod document;
12pub mod error;
13pub mod tag;
14
15#[inline]
16pub fn parse_trivia(interner: &ThreadedInterner, trivia: &Trivia) -> Result<Document, ParseError> {
17    if TriviaKind::DocBlockComment != trivia.kind {
18        return Err(ParseError::InvalidTrivia(trivia.span));
19    }
20
21    parse_phpdoc_with_span(interner, interner.lookup(&trivia.value), trivia.span)
22}
23
24#[inline]
25pub fn parse_phpdoc_with_span(interner: &ThreadedInterner, content: &str, span: Span) -> Result<Document, ParseError> {
26    let tokens = internal::lexer::tokenize(content, span)?;
27
28    internal::parser::parse_document(span, tokens.as_slice(), interner)
29}
30
31#[cfg(test)]
32mod tests {
33    use super::*;
34
35    use mago_interner::ThreadedInterner;
36    use mago_span::Position;
37    use mago_span::Span;
38
39    use crate::document::*;
40
41    #[test]
42    fn test_parse_all_elements() {
43        let interner = ThreadedInterner::new();
44        let phpdoc = r#"/**
45            * This is a simple description.
46            *
47            * This text contains an inline code `echo "Hello, World!";`.
48            *
49            * This text contains an inline tag {@see \Some\Class}.
50            *
51            * ```php
52            * echo "Hello, World!";
53            * ```
54            *
55            *     $foo = "bar";
56            *     echo "Hello, World!";
57            *
58            * @param string $foo
59            * @param array{
60            *   bar: string,
61            *   baz: int
62            * } $bar
63            * @return void
64            */"#;
65
66        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
67        let document = parse_phpdoc_with_span(&interner, phpdoc, span).expect("Failed to parse PHPDoc");
68        assert_eq!(document.elements.len(), 12);
69
70        let Element::Text(text) = &document.elements[0] else {
71            panic!("Expected Element::Text, got {:?}", document.elements[0]);
72        };
73
74        assert_eq!(text.segments.len(), 1);
75
76        let TextSegment::Paragraph { span, content } = &text.segments[0] else {
77            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
78        };
79
80        let content = interner.lookup(content);
81        assert_eq!(content, "This is a simple description.");
82        assert_eq!(&phpdoc[span.start.offset..span.end.offset], "This is a simple description.");
83
84        let Element::Line(_) = &document.elements[1] else {
85            panic!("Expected Element::Line, got {:?}", document.elements[1]);
86        };
87
88        let Element::Text(text) = &document.elements[2] else {
89            panic!("Expected Element::Text, got {:?}", document.elements[2]);
90        };
91
92        assert_eq!(text.segments.len(), 3);
93
94        let TextSegment::Paragraph { span, content } = &text.segments[0] else {
95            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
96        };
97
98        let content = interner.lookup(content);
99        assert_eq!(content, "This text contains an inline code ");
100        assert_eq!(&phpdoc[span.start.offset..span.end.offset], "This text contains an inline code ");
101
102        let TextSegment::InlineCode(code) = &text.segments[1] else {
103            panic!("Expected TextSegment::InlineCode, got {:?}", text.segments[1]);
104        };
105
106        let content = interner.lookup(&code.content);
107        assert_eq!(content, "echo \"Hello, World!\";");
108        assert_eq!(&phpdoc[code.span.start.offset..code.span.end.offset], "`echo \"Hello, World!\";`");
109
110        let TextSegment::Paragraph { span, content } = &text.segments[2] else {
111            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[2]);
112        };
113
114        let content = interner.lookup(content);
115        assert_eq!(content, ".");
116        assert_eq!(&phpdoc[span.start.offset..span.end.offset], ".");
117
118        let Element::Line(_) = &document.elements[3] else {
119            panic!("Expected Element::Line, got {:?}", document.elements[3]);
120        };
121
122        let Element::Text(text) = &document.elements[4] else {
123            panic!("Expected Element::Text, got {:?}", document.elements[4]);
124        };
125
126        assert_eq!(text.segments.len(), 3);
127
128        let TextSegment::Paragraph { span, content } = &text.segments[0] else {
129            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
130        };
131
132        let content = interner.lookup(content);
133        assert_eq!(content, "This text contains an inline tag ");
134        assert_eq!(&phpdoc[span.start.offset..span.end.offset], "This text contains an inline tag ");
135
136        let TextSegment::InlineTag(tag) = &text.segments[1] else {
137            panic!("Expected TextSegment::InlineTag, got {:?}", text.segments[1]);
138        };
139
140        let name = interner.lookup(&tag.name);
141        let description = interner.lookup(&tag.description);
142        assert_eq!(name, "see");
143        assert_eq!(description, "\\Some\\Class");
144        assert_eq!(tag.kind, TagKind::See);
145        assert_eq!(&phpdoc[tag.span.start.offset..tag.span.end.offset], "{@see \\Some\\Class}");
146
147        let TextSegment::Paragraph { span, content } = &text.segments[2] else {
148            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[2]);
149        };
150
151        let content = interner.lookup(content);
152        assert_eq!(content, ".");
153        assert_eq!(&phpdoc[span.start.offset..span.end.offset], ".");
154
155        let Element::Line(_) = &document.elements[5] else {
156            panic!("Expected Element::Line, got {:?}", document.elements[5]);
157        };
158
159        let Element::Code(code) = &document.elements[6] else {
160            panic!("Expected Element::CodeBlock, got {:?}", document.elements[6]);
161        };
162
163        let content = interner.lookup(&code.content);
164        let directives = code.directives.iter().map(|d| interner.lookup(d)).collect::<Vec<_>>();
165        assert_eq!(directives, &["php"]);
166        assert_eq!(content, "echo \"Hello, World!\";");
167        assert_eq!(
168            &phpdoc[code.span.start.offset..code.span.end.offset],
169            "```php\n            * echo \"Hello, World!\";\n            * ```"
170        );
171
172        let Element::Line(_) = &document.elements[7] else {
173            panic!("Expected Element::Line, got {:?}", document.elements[7]);
174        };
175
176        let Element::Code(code) = &document.elements[8] else {
177            panic!("Expected Element::CodeBlock, got {:?}", document.elements[8]);
178        };
179
180        let content = interner.lookup(&code.content);
181        assert!(code.directives.is_empty());
182        assert_eq!(content, "$foo = \"bar\";\necho \"Hello, World!\";\n");
183        assert_eq!(
184            &phpdoc[code.span.start.offset..code.span.end.offset],
185            "    $foo = \"bar\";\n            *     echo \"Hello, World!\";\n"
186        );
187
188        let Element::Tag(tag) = &document.elements[9] else {
189            panic!("Expected Element::Tag, got {:?}", document.elements[9]);
190        };
191
192        let name = interner.lookup(&tag.name);
193        let description = interner.lookup(&tag.description);
194        assert_eq!(name, "param");
195        assert_eq!(tag.kind, TagKind::Param);
196        assert_eq!(description, "string $foo");
197        assert_eq!(&phpdoc[tag.span.start.offset..tag.span.end.offset], "@param string $foo");
198
199        let Element::Tag(tag) = &document.elements[10] else {
200            panic!("Expected Element::Tag, got {:?}", document.elements[10]);
201        };
202
203        let name = interner.lookup(&tag.name);
204        let description = interner.lookup(&tag.description);
205        assert_eq!(name, "param");
206        assert_eq!(tag.kind, TagKind::Param);
207        assert_eq!(description, "array{\n  bar: string,\n  baz: int\n} $bar");
208        assert_eq!(
209            &phpdoc[tag.span.start.offset..tag.span.end.offset],
210            "@param array{\n            *   bar: string,\n            *   baz: int\n            * } $bar"
211        );
212
213        let Element::Tag(tag) = &document.elements[11] else {
214            panic!("Expected Element::Tag, got {:?}", document.elements[11]);
215        };
216
217        let name = interner.lookup(&tag.name);
218        let description = interner.lookup(&tag.description);
219        assert_eq!(name, "return");
220        assert_eq!(tag.kind, TagKind::Return);
221        assert_eq!(description, "void");
222        assert_eq!(&phpdoc[tag.span.start.offset..tag.span.end.offset], "@return void");
223    }
224
225    #[test]
226    fn test_unclosed_inline_tag() {
227        // Test case for ParseError::UnclosedInlineTag
228        let interner = ThreadedInterner::new();
229        let phpdoc = "/** This is a doc block with an unclosed inline tag {@see Class */";
230        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
231
232        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
233
234        match result {
235            Err(ParseError::UnclosedInlineTag(error_span)) => {
236                let expected_start = phpdoc.find("{@see").unwrap();
237                let expected_span = span.subspan(expected_start, phpdoc.len() - 3);
238                assert_eq!(error_span, expected_span);
239            }
240            _ => {
241                panic!("Expected ParseError::UnclosedInlineTag");
242            }
243        }
244    }
245
246    #[test]
247    fn test_unclosed_inline_code() {
248        // Test case for ParseError::UnclosedInlineCode
249        let interner = ThreadedInterner::new();
250        let phpdoc = "/** This is a doc block with unclosed inline code `code sample */";
251        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
252
253        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
254
255        match result {
256            Err(ParseError::UnclosedInlineCode(error_span)) => {
257                let expected_start = phpdoc.find('`').unwrap();
258                let expected_span = span.subspan(expected_start, phpdoc.len() - 3);
259                assert_eq!(error_span, expected_span);
260            }
261            _ => {
262                panic!("Expected ParseError::UnclosedInlineCode");
263            }
264        }
265    }
266
267    #[test]
268    fn test_unclosed_code_block() {
269        let interner = ThreadedInterner::new();
270        let phpdoc = r#"/**
271            * This is a doc block with unclosed code block
272            * ```
273            * Some code here
274            */"#;
275        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
276
277        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
278
279        match result {
280            Err(ParseError::UnclosedCodeBlock(error_span)) => {
281                let code_block_start = phpdoc.find("```").unwrap();
282                let expected_span = span.subspan(code_block_start, 109);
283                assert_eq!(error_span, expected_span);
284            }
285            _ => {
286                panic!("Expected ParseError::UnclosedCodeBlock");
287            }
288        }
289    }
290
291    #[test]
292    fn test_invalid_tag_name() {
293        // Test case for ParseError::InvalidTagName
294        let interner = ThreadedInterner::new();
295        let phpdoc = "/** @invalid_tag_name Description */";
296        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
297
298        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
299
300        match result {
301            Err(ParseError::InvalidTagName(error_span)) => {
302                let tag_start = phpdoc.find("@invalid_tag_name").unwrap();
303                let tag_end = tag_start + "@invalid_tag_name".len();
304                let expected_span = span.subspan(tag_start, tag_end);
305                assert_eq!(error_span, expected_span);
306            }
307            _ => {
308                panic!("Expected ParseError::InvalidTagName");
309            }
310        }
311    }
312
313    #[test]
314    fn test_malformed_code_block() {
315        let interner = ThreadedInterner::new();
316        let phpdoc = r#"/**
317            * ```
318            * Some code here
319            * Incorrect closing
320            */"#;
321        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
322
323        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
324
325        match result {
326            Ok(document) => {
327                panic!("Expected the parser to return an error, got {document:#?}");
328            }
329            Err(ParseError::UnclosedCodeBlock(error_span)) => {
330                let code_block_start = phpdoc.find("```").unwrap();
331                let expected_span = span.subspan(code_block_start, 82);
332                assert_eq!(error_span, expected_span);
333            }
334            _ => {
335                panic!("Expected ParseError::UnclosedCodeBlock");
336            }
337        }
338    }
339
340    #[test]
341    fn test_invalid_comment() {
342        // Test case for ParseError::InvalidComment
343        let interner = ThreadedInterner::new();
344        let phpdoc = "/* Not a valid doc block */";
345        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
346
347        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
348
349        match result {
350            Err(ParseError::InvalidComment(error_span)) => {
351                assert_eq!(error_span, span);
352            }
353            _ => {
354                panic!("Expected ParseError::InvalidComment");
355            }
356        }
357    }
358
359    #[test]
360    fn test_inconsistent_indentation() {
361        // Test case for ParseError::InconsistentIndentation
362        let interner = ThreadedInterner::new();
363        let phpdoc = r#"/**
364    * This is a doc block
365      * With inconsistent indentation
366    */"#;
367        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
368
369        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
370
371        match result {
372            Err(ParseError::InconsistentIndentation(error_span, expected, found)) => {
373                // The expected and found indentation lengths
374                assert_eq!(expected, 4); // Assuming 4 spaces
375                assert_eq!(found, 6); // Assuming 6 spaces
376                // The error_span should point to the line with inconsistent indentation
377                let inconsistent_line = "      * With inconsistent indentation";
378                let line_start = phpdoc.find(inconsistent_line).unwrap();
379                let indent_length = inconsistent_line.chars().take_while(|c| c.is_whitespace()).count();
380                let expected_span = span.subspan(line_start, line_start + indent_length);
381                assert_eq!(error_span, expected_span);
382            }
383            _ => {
384                panic!("Expected ParseError::InconsistentIndentation");
385            }
386        }
387    }
388
389    #[test]
390    fn test_missing_asterisk() {
391        // Test case for ParseError::MissingAsterisk
392        let interner = ThreadedInterner::new();
393        let phpdoc = r#"/**
394     This line is missing an asterisk
395     * This line is fine
396     */"#;
397        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
398
399        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
400
401        match result {
402            Err(ParseError::MissingAsterisk(error_span)) => {
403                // The error_span should point to where the asterisk is missing
404                let problematic_line = "     This line is missing an asterisk";
405                let line_start = phpdoc.find(problematic_line).unwrap();
406                let indent_length = problematic_line.chars().take_while(|c| c.is_whitespace()).count();
407                let expected_span = span.subspan(line_start + indent_length, line_start + indent_length + 1);
408                assert_eq!(error_span, expected_span);
409            }
410            _ => {
411                panic!("Expected ParseError::MissingAsterisk");
412            }
413        }
414    }
415
416    #[test]
417    fn test_missing_whitespace_after_asterisk() {
418        // Test case for ParseError::MissingWhitespaceAfterAsterisk
419        let interner = ThreadedInterner::new();
420        let phpdoc = r#"/**
421     *This line is missing a space after asterisk
422     */"#;
423        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
424
425        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
426
427        match result {
428            Err(ParseError::MissingWhitespaceAfterAsterisk(error_span)) => {
429                // The error_span should point to the character after the asterisk
430                let problematic_line = "*This line is missing a space after asterisk";
431                let line_start = phpdoc.find(problematic_line).unwrap();
432                let asterisk_pos = line_start;
433                let expected_span = span.subspan(asterisk_pos + 1, asterisk_pos + 2);
434                assert_eq!(error_span, expected_span);
435            }
436            _ => {
437                panic!("Expected ParseError::MissingWhitespaceAfterAsterisk");
438            }
439        }
440    }
441
442    #[test]
443    fn test_missing_whitespace_after_opening_asterisk() {
444        // Test case for ParseError::MissingWhitespaceAfterOpeningAsterisk
445        let interner = ThreadedInterner::new();
446        let phpdoc = "/**This is a doc block without space after /** */";
447        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
448
449        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
450
451        match result {
452            Err(ParseError::MissingWhitespaceAfterOpeningAsterisk(error_span)) => {
453                // The error_span should point to the position after '/**'
454                let expected_span = span.subspan(3, 4);
455                assert_eq!(error_span, expected_span);
456            }
457            _ => {
458                panic!("Expected ParseError::MissingWhitespaceAfterOpeningAsterisk");
459            }
460        }
461    }
462
463    #[test]
464    fn test_missing_whitespace_before_closing_asterisk() {
465        // Test case for ParseError::MissingWhitespaceBeforeClosingAsterisk
466        let interner = ThreadedInterner::new();
467        let phpdoc = "/** This is a doc block without space before */*/";
468        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
469
470        let result = parse_phpdoc_with_span(&interner, phpdoc, span);
471
472        match result {
473            Err(ParseError::MissingWhitespaceBeforeClosingAsterisk(error_span)) => {
474                // The error_span should point to the position before '*/'
475                let expected_span = span.subspan(phpdoc.len() - 3, phpdoc.len() - 2);
476                assert_eq!(error_span, expected_span);
477            }
478            _ => {
479                panic!("Expected ParseError::MissingWhitespaceBeforeClosingAsterisk");
480            }
481        }
482    }
483
484    #[test]
485    fn test_utf8_characters() {
486        let interner = ThreadedInterner::new();
487        let phpdoc = r#"/**
488    * هذا نص باللغة العربية.
489    * 这是一段中文。
490    * Here are some mathematical symbols: ∑, ∆, π, θ.
491    *
492    * ```php
493    * // Arabic comment
494    * echo "مرحبا بالعالم";
495    * // Chinese comment
496    * echo "你好,世界";
497    * // Math symbols in code
498    * $sum = $a + $b; // ∑
499    * ```
500    *
501    * @param string $مثال A parameter with an Arabic variable name.
502    * @return int 返回值是整数类型。
503    */"#;
504
505        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
506        let document = parse_phpdoc_with_span(&interner, phpdoc, span).expect("Failed to parse PHPDoc");
507
508        // Verify the number of elements parsed
509        assert_eq!(document.elements.len(), 6);
510
511        // First text element (Arabic text)
512        let Element::Text(text) = &document.elements[0] else {
513            panic!("Expected Element::Text, got {:?}", document.elements[0]);
514        };
515
516        assert_eq!(text.segments.len(), 1);
517
518        let TextSegment::Paragraph { span, content } = &text.segments[0] else {
519            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
520        };
521
522        let content_str = interner.lookup(content);
523        assert_eq!(
524            content_str,
525            "هذا نص باللغة العربية.\n这是一段中文。\nHere are some mathematical symbols: ∑, ∆, π, θ."
526        );
527
528        assert_eq!(
529            &phpdoc[span.start.offset..span.end.offset],
530            "هذا نص باللغة العربية.\n    * 这是一段中文。\n    * Here are some mathematical symbols: ∑, ",
531        );
532
533        // Empty line
534        let Element::Line(_) = &document.elements[1] else {
535            panic!("Expected Element::Line, got {:?}", document.elements[3]);
536        };
537
538        // Code block
539        let Element::Code(code) = &document.elements[2] else {
540            panic!("Expected Element::Code, got {:?}", document.elements[2]);
541        };
542
543        let content_str = interner.lookup(&code.content);
544        let expected_code = "// Arabic comment\necho \"مرحبا بالعالم\";\n// Chinese comment\necho \"你好,世界\";\n// Math symbols in code\n$sum = $a + $b; // ∑";
545        assert_eq!(content_str, expected_code);
546        assert_eq!(
547            &phpdoc[code.span.start.offset..code.span.end.offset],
548            "```php\n    * // Arabic comment\n    * echo \"مرحبا بالعالم\";\n    * // Chinese comment\n    * echo \"你好,世界\";\n    * // Math symbols in code\n    * $sum = $a + $b; // ∑\n    * ```"
549        );
550
551        // Empty line
552        let Element::Line(_) = &document.elements[3] else {
553            panic!("Expected Element::Line, got {:?}", document.elements[3]);
554        };
555
556        // @param tag with Arabic variable name
557        let Element::Tag(tag) = &document.elements[4] else {
558            panic!("Expected Element::Tag, got {:?}", document.elements[4]);
559        };
560
561        let name = interner.lookup(&tag.name);
562        let description = interner.lookup(&tag.description);
563        assert_eq!(name, "param");
564        assert_eq!(tag.kind, TagKind::Param);
565        assert_eq!(description, "string $مثال A parameter with an Arabic variable name.");
566        assert_eq!(
567            &phpdoc[tag.span.start.offset..tag.span.end.offset],
568            "@param string $مثال A parameter with an Arabic variable name."
569        );
570
571        // @return tag with Chinese description
572        let Element::Tag(tag) = &document.elements[5] else {
573            panic!("Expected Element::Tag, got {:?}", document.elements[5]);
574        };
575
576        let name = interner.lookup(&tag.name);
577        let description = interner.lookup(&tag.description);
578        assert_eq!(name, "return");
579        assert_eq!(tag.kind, TagKind::Return);
580        assert_eq!(description, "int 返回值是整数类型。");
581        assert_eq!(&phpdoc[tag.span.start.offset..tag.span.end.offset], "@return int 返回值是整数类型。");
582    }
583
584    #[test]
585    fn test_annotation_parsing() {
586        let interner = ThreadedInterner::new();
587        let phpdoc = r#"/**
588         * @Event("Symfony\Component\Workflow\Event\CompletedEvent")
589         * @AnotherAnnotation({
590         *     "key": "value",
591         *     "list": [1, 2, 3]
592         * })
593         * @SimpleAnnotation
594         */"#;
595        let span = Span::new(Position::dummy(0), Position::dummy(phpdoc.len()));
596        let document = parse_phpdoc_with_span(&interner, phpdoc, span).expect("Failed to parse PHPDoc");
597
598        // Verify that the document has the expected number of elements
599        assert_eq!(document.elements.len(), 3);
600
601        // First annotation
602        let Element::Annotation(annotation) = &document.elements[0] else {
603            panic!("Expected Element::Annotation, got {:?}", document.elements[0]);
604        };
605
606        let name = interner.lookup(&annotation.name);
607        assert_eq!(name, "Event");
608        let arguments = interner.lookup(&annotation.arguments.unwrap());
609        assert_eq!(arguments, "(\"Symfony\\Component\\Workflow\\Event\\CompletedEvent\")");
610
611        // Second annotation
612        let Element::Annotation(annotation) = &document.elements[1] else {
613            panic!("Expected Element::Annotation, got {:?}", document.elements[1]);
614        };
615
616        let name = interner.lookup(&annotation.name);
617        assert_eq!(name, "AnotherAnnotation");
618        let arguments = interner.lookup(&annotation.arguments.unwrap());
619        let expected_arguments = "({\n    \"key\": \"value\",\n    \"list\": [1, 2, 3]\n})";
620        assert_eq!(arguments, expected_arguments);
621
622        // Third annotation
623        let Element::Annotation(annotation) = &document.elements[2] else {
624            panic!("Expected Element::Annotation, got {:?}", document.elements[2]);
625        };
626
627        let name = interner.lookup(&annotation.name);
628        assert_eq!(name, "SimpleAnnotation");
629        assert!(annotation.arguments.is_none());
630    }
631}