Skip to main content

mago_docblock/
lib.rs

1use bumpalo::Bump;
2
3use mago_span::Span;
4use mago_syntax::ast::Trivia;
5use mago_syntax::ast::TriviaKind;
6
7use crate::document::Document;
8use crate::error::ParseError;
9
10mod internal;
11
12pub mod document;
13pub mod error;
14pub mod tag;
15
16/// Parses a docblock from a trivia token.
17///
18/// # Errors
19///
20/// Returns a [`ParseError`] if the trivia is not a docblock comment or parsing fails.
21#[inline]
22pub fn parse_trivia<'arena>(arena: &'arena Bump, trivia: &Trivia<'arena>) -> Result<Document<'arena>, ParseError> {
23    if TriviaKind::DocBlockComment != trivia.kind {
24        return Err(ParseError::InvalidTrivia(trivia.span));
25    }
26
27    parse_phpdoc_with_span(arena, trivia.value, trivia.span)
28}
29
30/// Parses a `PHPDoc` comment string with an associated span.
31///
32/// # Errors
33///
34/// Returns a [`ParseError`] if tokenization or parsing fails.
35#[inline]
36pub fn parse_phpdoc_with_span<'arena>(
37    arena: &'arena Bump,
38    content: &'arena str,
39    span: Span,
40) -> Result<Document<'arena>, ParseError> {
41    let tokens = internal::lexer::tokenize(content, span)?;
42
43    internal::parser::parse_document(span, tokens.as_slice(), arena)
44}
45
46#[cfg(test)]
47#[allow(clippy::unwrap_used, clippy::expect_used)]
48mod tests {
49    use super::*;
50
51    use mago_database::file::FileId;
52    use mago_span::Position;
53    use mago_span::Span;
54
55    use crate::document::*;
56
57    #[test]
58    fn test_parse_all_elements() {
59        let arena = Bump::new();
60        let phpdoc = r#"/**
61            * This is a simple description.
62            *
63            * This text contains an inline code `echo "Hello, World!";`.
64            *
65            * This text contains an inline tag {@see \Some\Class}.
66            *
67            * ```php
68            * echo "Hello, World!";
69            * ```
70            *
71            *     $foo = "bar";
72            *     echo "Hello, World!";
73            *
74            * @param string $foo
75            * @param array{
76            *   bar: string,
77            *   baz: int
78            * } $bar
79            * @return void
80            */"#;
81
82        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
83        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
84        assert_eq!(document.elements.len(), 12);
85
86        let Element::Text(text) = &document.elements[0] else {
87            panic!("Expected Element::Text, got {:?}", document.elements[0]);
88        };
89
90        assert_eq!(text.segments.len(), 1);
91
92        let TextSegment::Paragraph { span, content } = text.segments[0] else {
93            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
94        };
95
96        assert_eq!(content, "This is a simple description.");
97        assert_eq!(&phpdoc[span.start.offset as usize..span.end.offset as usize], "This is a simple description.");
98
99        let Element::Line(_) = &document.elements[1] else {
100            panic!("Expected Element::Line, got {:?}", document.elements[1]);
101        };
102
103        let Element::Text(text) = &document.elements[2] else {
104            panic!("Expected Element::Text, got {:?}", document.elements[2]);
105        };
106
107        assert_eq!(text.segments.len(), 3);
108
109        let TextSegment::Paragraph { content, .. } = text.segments[0] else {
110            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
111        };
112
113        assert_eq!(content, "This text contains an inline code ");
114
115        let TextSegment::InlineCode(code) = &text.segments[1] else {
116            panic!("Expected TextSegment::InlineCode, got {:?}", text.segments[1]);
117        };
118
119        let content = code.content;
120        assert_eq!(content, "echo \"Hello, World!\";");
121        assert_eq!(
122            &phpdoc[code.span.start.offset as usize..code.span.end.offset as usize],
123            "`echo \"Hello, World!\";`"
124        );
125
126        let TextSegment::Paragraph { content, .. } = text.segments[2] else {
127            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[2]);
128        };
129
130        assert_eq!(content, ".");
131
132        let Element::Line(_) = &document.elements[3] else {
133            panic!("Expected Element::Line, got {:?}", document.elements[3]);
134        };
135
136        let Element::Text(text) = &document.elements[4] else {
137            panic!("Expected Element::Text, got {:?}", document.elements[4]);
138        };
139
140        assert_eq!(text.segments.len(), 3);
141
142        let TextSegment::Paragraph { content, .. } = text.segments[0] else {
143            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
144        };
145
146        assert_eq!(content, "This text contains an inline tag ");
147
148        let TextSegment::InlineTag(tag) = &text.segments[1] else {
149            panic!("Expected TextSegment::InlineTag, got {:?}", text.segments[1]);
150        };
151
152        let name = tag.name;
153        let description = tag.description;
154        assert_eq!(name, "see");
155        assert_eq!(description, "\\Some\\Class");
156        assert_eq!(tag.kind, TagKind::See);
157        assert_eq!(&phpdoc[tag.span.start.offset as usize..tag.span.end.offset as usize], "{@see \\Some\\Class}");
158
159        let TextSegment::Paragraph { content, .. } = text.segments[2] else {
160            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[2]);
161        };
162
163        assert_eq!(content, ".");
164
165        let Element::Line(_) = &document.elements[5] else {
166            panic!("Expected Element::Line, got {:?}", document.elements[5]);
167        };
168
169        let Element::Code(code) = &document.elements[6] else {
170            panic!("Expected Element::CodeBlock, got {:?}", document.elements[6]);
171        };
172
173        let content = code.content;
174        assert_eq!(code.directives, &["php"]);
175        assert_eq!(content, "echo \"Hello, World!\";");
176        assert_eq!(
177            &phpdoc[code.span.start.offset as usize..code.span.end.offset as usize],
178            "```php\n            * echo \"Hello, World!\";\n            * ```"
179        );
180
181        let Element::Line(_) = &document.elements[7] else {
182            panic!("Expected Element::Line, got {:?}", document.elements[7]);
183        };
184
185        let Element::Code(code) = &document.elements[8] else {
186            panic!("Expected Element::CodeBlock, got {:?}", document.elements[8]);
187        };
188
189        let content = code.content;
190        assert!(code.directives.is_empty());
191        assert_eq!(content, "$foo = \"bar\";\necho \"Hello, World!\";\n");
192        assert_eq!(
193            &phpdoc[code.span.start.offset as usize..code.span.end.offset as usize],
194            "    $foo = \"bar\";\n            *     echo \"Hello, World!\";\n"
195        );
196
197        let Element::Tag(tag) = &document.elements[9] else {
198            panic!("Expected Element::Tag, got {:?}", document.elements[9]);
199        };
200
201        let name = tag.name;
202        let description = tag.description;
203        assert_eq!(name, "param");
204        assert_eq!(tag.kind, TagKind::Param);
205        assert_eq!(description, "string $foo");
206        assert_eq!(&phpdoc[tag.span.start.offset as usize..tag.span.end.offset as usize], "@param string $foo");
207
208        let Element::Tag(tag) = &document.elements[10] else {
209            panic!("Expected Element::Tag, got {:?}", document.elements[10]);
210        };
211
212        let name = tag.name;
213        let description = tag.description;
214        assert_eq!(name, "param");
215        assert_eq!(tag.kind, TagKind::Param);
216        assert_eq!(description, "array{\n  bar: string,\n  baz: int\n} $bar");
217        assert_eq!(
218            &phpdoc[tag.span.start.offset as usize..tag.span.end.offset as usize],
219            "@param array{\n            *   bar: string,\n            *   baz: int\n            * } $bar"
220        );
221
222        let Element::Tag(tag) = &document.elements[11] else {
223            panic!("Expected Element::Tag, got {:?}", document.elements[11]);
224        };
225
226        let name = tag.name;
227        let description = tag.description;
228        assert_eq!(name, "return");
229        assert_eq!(tag.kind, TagKind::Return);
230        assert_eq!(description, "void");
231        assert_eq!(&phpdoc[tag.span.start.offset as usize..tag.span.end.offset as usize], "@return void");
232    }
233
234    #[test]
235    fn test_unclosed_inline_tag() {
236        // Test case for ParseError::UnclosedInlineTag
237        let arena = Bump::new();
238        let phpdoc = "/** This is a doc block with an unclosed inline tag {@see Class */";
239        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
240
241        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
242
243        match result {
244            Err(ParseError::UnclosedInlineTag(error_span)) => {
245                let expected_start = phpdoc.find("{@see").unwrap();
246                let expected_span = span.subspan(expected_start as u32, phpdoc.len() as u32 - 3);
247                assert_eq!(error_span, expected_span);
248            }
249            _ => {
250                panic!("Expected ParseError::UnclosedInlineTag");
251            }
252        }
253    }
254
255    #[test]
256    fn test_unclosed_inline_code() {
257        // Test case for ParseError::UnclosedInlineCode
258        let arena = Bump::new();
259        let phpdoc = "/** This is a doc block with unclosed inline code `code sample */";
260        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
261
262        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
263
264        match result {
265            Err(ParseError::UnclosedInlineCode(error_span)) => {
266                let expected_start = phpdoc.find('`').unwrap();
267                let expected_span = span.subspan(expected_start as u32, phpdoc.len() as u32 - 3);
268                assert_eq!(error_span, expected_span);
269            }
270            _ => {
271                panic!("Expected ParseError::UnclosedInlineCode");
272            }
273        }
274    }
275
276    #[test]
277    fn test_unclosed_code_block() {
278        let arena = Bump::new();
279        let phpdoc = "/**
280            * This is a doc block with unclosed code block
281            * ```
282            * Some code here
283            */";
284        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
285
286        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
287
288        match result {
289            Err(ParseError::UnclosedCodeBlock(error_span)) => {
290                let code_block_start = phpdoc.find("```").unwrap();
291                let expected_span = span.subspan(code_block_start as u32, 109);
292                assert_eq!(error_span, expected_span);
293            }
294            _ => {
295                panic!("Expected ParseError::UnclosedCodeBlock");
296            }
297        }
298    }
299
300    #[test]
301    fn test_invalid_tag_name() {
302        // Test case for ParseError::InvalidTagName — use a character not valid in identifiers
303        let arena = Bump::new();
304        let phpdoc = "/** @invalid!tag Description */";
305        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
306
307        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
308
309        assert!(
310            matches!(result, Err(ParseError::InvalidTagName(_))),
311            "Expected ParseError::InvalidTagName, got {result:?}"
312        );
313    }
314
315    #[test]
316    fn test_underscore_tag_name_is_valid() {
317        let arena = Bump::new();
318        let phpdoc = "/** @some_tag Description */";
319        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
320
321        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
322        let Element::Tag(tag) = &document.elements[0] else {
323            panic!("Expected Element::Tag");
324        };
325        assert_eq!(tag.name, "some_tag");
326    }
327
328    #[test]
329    fn test_malformed_code_block() {
330        let arena = Bump::new();
331        let phpdoc = "/**
332            * ```
333            * Some code here
334            * Incorrect closing
335            */";
336        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
337
338        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
339
340        match result {
341            Ok(document) => {
342                panic!("Expected the parser to return an error, got {document:#?}");
343            }
344            Err(ParseError::UnclosedCodeBlock(error_span)) => {
345                let code_block_start = phpdoc.find("```").unwrap();
346                let expected_span = span.subspan(code_block_start as u32, 82);
347                assert_eq!(error_span, expected_span);
348            }
349            _ => {
350                panic!("Expected ParseError::UnclosedCodeBlock");
351            }
352        }
353    }
354
355    #[test]
356    fn test_invalid_comment() {
357        // Test case for ParseError::InvalidComment
358        let arena = Bump::new();
359        let phpdoc = "/* Not a valid doc block */";
360        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
361
362        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
363
364        match result {
365            Err(ParseError::InvalidComment(error_span)) => {
366                assert_eq!(error_span, span);
367            }
368            _ => {
369                panic!("Expected ParseError::InvalidComment");
370            }
371        }
372    }
373
374    #[test]
375    fn test_inconsistent_indentation() {
376        // Test case for ParseError::InconsistentIndentation
377        let arena = Bump::new();
378        let phpdoc = "/**
379    * This is a doc block
380      * With inconsistent indentation
381    */";
382        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
383
384        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
385
386        match result {
387            Ok(document) => {
388                assert_eq!(document.elements.len(), 1);
389                let Element::Text(text) = &document.elements[0] else {
390                    panic!("Expected Element::Text, got {:?}", document.elements[0]);
391                };
392
393                assert_eq!(text.segments.len(), 1);
394                let TextSegment::Paragraph { span, content } = &text.segments[0] else {
395                    panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
396                };
397
398                assert_eq!(*content, "This is a doc block\nWith inconsistent indentation");
399                assert_eq!(
400                    &phpdoc[span.start.offset as usize..span.end.offset as usize],
401                    "This is a doc block\n      * With inconsistent indentation"
402                );
403            }
404            _ => {
405                panic!("Expected ParseError::InconsistentIndentation");
406            }
407        }
408    }
409
410    #[test]
411    fn test_missing_asterisk() {
412        let arena = Bump::new();
413        let phpdoc = "/**
414     This line is missing an asterisk
415     * This line is fine
416     */";
417        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
418
419        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
420
421        match result {
422            Ok(document) => {
423                assert_eq!(document.elements.len(), 1);
424                let Element::Text(text) = &document.elements[0] else {
425                    panic!("Expected Element::Text, got {:?}", document.elements[0]);
426                };
427
428                assert_eq!(text.segments.len(), 1);
429
430                let TextSegment::Paragraph { span, content } = &text.segments[0] else {
431                    panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
432                };
433
434                assert_eq!(*content, "This line is missing an asterisk\nThis line is fine");
435                assert_eq!(
436                    &phpdoc[span.start.offset as usize..span.end.offset as usize],
437                    "This line is missing an asterisk\n     * This line is fine"
438                );
439            }
440            _ => {
441                panic!("Expected ParseError::MissingAsterisk");
442            }
443        }
444    }
445
446    #[test]
447    fn test_missing_whitespace_after_asterisk() {
448        let arena = Bump::new();
449        let phpdoc = "/**
450     *This line is missing a space after asterisk
451     */";
452        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
453
454        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
455
456        match result {
457            Ok(document) => {
458                assert_eq!(document.elements.len(), 1);
459                let Element::Text(text) = &document.elements[0] else {
460                    panic!("Expected Element::Text, got {:?}", document.elements[0]);
461                };
462
463                assert_eq!(text.segments.len(), 1);
464                let TextSegment::Paragraph { span, content } = &text.segments[0] else {
465                    panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
466                };
467
468                assert_eq!(*content, "This line is missing a space after asterisk");
469                assert_eq!(
470                    &phpdoc[span.start.offset as usize..span.end.offset as usize],
471                    "This line is missing a space after asterisk"
472                );
473            }
474            _ => {
475                panic!("Expected ParseError::MissingWhitespaceAfterAsterisk");
476            }
477        }
478    }
479
480    #[test]
481    fn test_missing_whitespace_after_opening_asterisk() {
482        let arena = Bump::new();
483        let phpdoc = "/**This is a doc block without space after /** */";
484        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
485
486        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
487
488        match result {
489            Ok(document) => {
490                assert_eq!(document.elements.len(), 1);
491                let Element::Text(text) = &document.elements[0] else {
492                    panic!("Expected Element::Text, got {:?}", document.elements[0]);
493                };
494
495                assert_eq!(text.segments.len(), 1);
496                let TextSegment::Paragraph { span, content } = &text.segments[0] else {
497                    panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
498                };
499
500                assert_eq!(*content, "This is a doc block without space after /**");
501                assert_eq!(
502                    &phpdoc[span.start.offset as usize..span.end.offset as usize],
503                    "This is a doc block without space after /**"
504                );
505            }
506            _ => {
507                panic!("Expected ParseError::MissingWhitespaceAfterOpeningAsterisk");
508            }
509        }
510    }
511
512    #[test]
513    fn test_missing_whitespace_before_closing_asterisk() {
514        let arena = Bump::new();
515        let phpdoc = "/** This is a doc block without space before */*/";
516        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
517
518        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
519
520        match result {
521            Ok(document) => {
522                assert_eq!(document.elements.len(), 1);
523                let Element::Text(text) = &document.elements[0] else {
524                    panic!("Expected Element::Text, got {:?}", document.elements[0]);
525                };
526
527                assert_eq!(text.segments.len(), 1);
528                let TextSegment::Paragraph { span, content } = &text.segments[0] else {
529                    panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
530                };
531
532                assert_eq!(*content, "This is a doc block without space before */");
533                assert_eq!(
534                    &phpdoc[span.start.offset as usize..span.end.offset as usize],
535                    "This is a doc block without space before */"
536                );
537            }
538            _ => {
539                panic!("Expected ParseError::MissingWhitespaceBeforeClosingAsterisk");
540            }
541        }
542    }
543
544    #[test]
545    fn test_utf8_characters() {
546        let arena = Bump::new();
547        let phpdoc = r#"/**
548    * هذا نص باللغة العربية.
549    * 这是一段中文。
550    * Here are some mathematical symbols: ∑, ∆, π, θ.
551    *
552    * ```php
553    * // Arabic comment
554    * echo "مرحبا بالعالم";
555    * // Chinese comment
556    * echo "你好,世界";
557    * // Math symbols in code
558    * $sum = $a + $b; // ∑
559    * ```
560    *
561    * @param string $مثال A parameter with an Arabic variable name.
562    * @return int 返回值是整数类型。
563    */"#;
564
565        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
566        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
567
568        // Verify the number of elements parsed
569        assert_eq!(document.elements.len(), 6);
570
571        // First text element (Arabic text)
572        let Element::Text(text) = &document.elements[0] else {
573            panic!("Expected Element::Text, got {:?}", document.elements[0]);
574        };
575
576        assert_eq!(text.segments.len(), 1);
577
578        let TextSegment::Paragraph { span, content } = &text.segments[0] else {
579            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
580        };
581
582        assert_eq!(*content, "هذا نص باللغة العربية.\n这是一段中文。\nHere are some mathematical symbols: ∑, ∆, π, θ.");
583
584        assert_eq!(
585            &phpdoc[span.start.offset as usize..span.end.offset as usize],
586            "هذا نص باللغة العربية.\n    * 这是一段中文。\n    * Here are some mathematical symbols: ∑, ∆, π, θ."
587        );
588
589        // Empty line
590        let Element::Line(_) = &document.elements[1] else {
591            panic!("Expected Element::Line, got {:?}", document.elements[3]);
592        };
593
594        // Code block
595        let Element::Code(code) = &document.elements[2] else {
596            panic!("Expected Element::Code, got {:?}", document.elements[2]);
597        };
598
599        let content_str = code.content;
600        let expected_code = "// Arabic comment\necho \"مرحبا بالعالم\";\n// Chinese comment\necho \"你好,世界\";\n// Math symbols in code\n$sum = $a + $b; // ∑";
601        assert_eq!(content_str, expected_code);
602        assert_eq!(
603            &phpdoc[code.span.start.offset as usize..code.span.end.offset as usize],
604            "```php\n    * // Arabic comment\n    * echo \"مرحبا بالعالم\";\n    * // Chinese comment\n    * echo \"你好,世界\";\n    * // Math symbols in code\n    * $sum = $a + $b; // ∑\n    * ```"
605        );
606
607        // Empty line
608        let Element::Line(_) = &document.elements[3] else {
609            panic!("Expected Element::Line, got {:?}", document.elements[3]);
610        };
611
612        // @param tag with Arabic variable name
613        let Element::Tag(tag) = &document.elements[4] else {
614            panic!("Expected Element::Tag, got {:?}", document.elements[4]);
615        };
616
617        let name = tag.name;
618        let description = tag.description;
619        assert_eq!(name, "param");
620        assert_eq!(tag.kind, TagKind::Param);
621        assert_eq!(description, "string $مثال A parameter with an Arabic variable name.");
622        assert_eq!(
623            &phpdoc[tag.span.start.offset as usize..tag.span.end.offset as usize],
624            "@param string $مثال A parameter with an Arabic variable name."
625        );
626
627        // @return tag with Chinese description
628        let Element::Tag(tag) = &document.elements[5] else {
629            panic!("Expected Element::Tag, got {:?}", document.elements[5]);
630        };
631
632        let name = tag.name;
633        let description = tag.description;
634        assert_eq!(name, "return");
635        assert_eq!(tag.kind, TagKind::Return);
636        assert_eq!(description, "int 返回值是整数类型。");
637        assert_eq!(
638            &phpdoc[tag.span.start.offset as usize..tag.span.end.offset as usize],
639            "@return int 返回值是整数类型。"
640        );
641    }
642
643    #[test]
644    fn test_annotation_parsing() {
645        let arena = Bump::new();
646        let phpdoc = r#"/**
647         * @Event("Symfony\Component\Workflow\Event\CompletedEvent")
648         * @AnotherAnnotation({
649         *     "key": "value",
650         *     "list": [1, 2, 3]
651         * })
652         * @SimpleAnnotation
653         */"#;
654        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
655        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
656
657        let Element::Tag(tag) = &document.elements[0] else {
658            panic!("Expected Element::Tag, got {:?}", document.elements[0]);
659        };
660
661        assert_eq!(tag.name, "Event");
662        assert_eq!(tag.metadata.unwrap(), "(\"Symfony\\Component\\Workflow\\Event\\CompletedEvent\")");
663
664        let Element::Tag(tag) = &document.elements[1] else {
665            panic!("Expected Element::Tag, got {:?}", document.elements[1]);
666        };
667        assert_eq!(tag.name, "AnotherAnnotation");
668
669        let last_idx = document.elements.len() - 1;
670        let Element::Tag(tag) = &document.elements[last_idx] else {
671            panic!("Expected Element::Tag, got {:?}", document.elements[last_idx]);
672        };
673        assert_eq!(tag.name, "SimpleAnnotation");
674        assert!(tag.metadata.is_none());
675    }
676
677    #[test]
678    fn test_long_description_with_missing_asterisk() {
679        let arena = Bump::new();
680        let phpdoc = "/** @var string[] this is a really long description
681            that spans multiple lines, and demonstrates how the parser handles
682            docblocks with multiple descriptions, and missing astricks*/";
683        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
684        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
685
686        assert_eq!(document.elements.len(), 1);
687        let Element::Tag(tag) = &document.elements[0] else {
688            panic!("Expected Element::Tag, got {:?}", document.elements[0]);
689        };
690
691        let name = tag.name;
692        let description = tag.description;
693        assert_eq!(name, "var");
694        assert_eq!(tag.kind, TagKind::Var);
695        assert_eq!(
696            description,
697            "string[] this is a really long description\nthat spans multiple lines, and demonstrates how the parser handles\ndocblocks with multiple descriptions, and missing astricks"
698        );
699        assert_eq!(
700            &phpdoc[tag.span.start.offset as usize..tag.span.end.offset as usize],
701            "@var string[] this is a really long description\n            that spans multiple lines, and demonstrates how the parser handles\n            docblocks with multiple descriptions, and missing astricks"
702        );
703    }
704
705    #[test]
706    fn test_code_indent_using_non_ascii_chars() {
707        let arena = Bump::new();
708        let phpdoc = "/**
709        *    └─ comment 2
710        *       └─ comment 4
711        *    └─ comment 3
712        */";
713
714        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
715        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
716
717        assert_eq!(document.elements.len(), 1);
718
719        let Element::Code(code) = &document.elements[0] else {
720            panic!("Expected Element::Code, got {:?}", document.elements[0]);
721        };
722
723        let content_str = code.content;
724        // After fix: correctly strips indent_len characters (not bytes) from each line
725        assert_eq!(content_str, "└─ comment 2\n\u{a0}\u{a0} └─ comment 4\n└─ comment 3");
726        assert_eq!(
727            &phpdoc[code.span.start.offset as usize..code.span.end.offset as usize],
728            " \u{a0} └─ comment 2\n        *    \u{a0}\u{a0} └─ comment 4\n        *  \u{a0} └─ comment 3"
729        );
730    }
731
732    #[test]
733    fn test_issue_456() {
734        let arena = Bump::new();
735        let phpdoc = "/**
736             * \u{3000}(イベント日数をもとに計算)\u{3000}
737             * @return\u{3000}int
738             * @throws\u{3000}Exception
739             */";
740
741        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
742        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
743
744        assert_eq!(document.elements.len(), 3);
745
746        let Element::Text(text) = &document.elements[0] else {
747            panic!("Expected Element::Text, got {:?}", document.elements[0]);
748        };
749
750        assert_eq!(text.segments.len(), 1);
751        let TextSegment::Paragraph { span, content } = &text.segments[0] else {
752            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
753        };
754
755        assert_eq!(*content, " (イベント日数をもとに計算)");
756        assert_eq!(&phpdoc[span.start.offset as usize..span.end.offset as usize], "\u{3000}(イベント日数をもとに計算)");
757
758        let Element::Tag(tag) = &document.elements[1] else {
759            panic!("Expected Element::Tag, got {:?}", document.elements[1]);
760        };
761
762        let name = tag.name;
763        let description = tag.description;
764        assert_eq!(name, "return");
765        assert_eq!(tag.kind, TagKind::Return);
766        assert_eq!(description, "int");
767        assert_eq!(&phpdoc[tag.span.start.offset as usize..tag.span.end.offset as usize], "@return\u{3000}int");
768
769        let Element::Tag(tag) = &document.elements[2] else {
770            panic!("Expected Element::Tag, got {:?}", document.elements[2]);
771        };
772
773        let name = tag.name;
774        let description = tag.description;
775        assert_eq!(name, "throws");
776        assert_eq!(tag.kind, TagKind::Throws);
777        assert_eq!(description, "Exception");
778        assert_eq!(&phpdoc[tag.span.start.offset as usize..tag.span.end.offset as usize], "@throws\u{3000}Exception");
779    }
780
781    #[test]
782    fn test_issue_808() {
783        let arena = Bump::new();
784
785        let phpdoc = "/** @param\u{3000}string $foo 中文描述 */";
786        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
787        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
788
789        assert_eq!(document.elements.len(), 1);
790        let Element::Tag(tag) = &document.elements[0] else {
791            panic!("Expected Element::Tag, got {:?}", document.elements[0]);
792        };
793        assert_eq!(tag.name, "param");
794        assert_eq!(tag.description, "string $foo 中文描述");
795
796        let phpdoc2 = "/** @return\u{3000}int 返回🎉值 */";
797        let span2 = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc2.len() as u32));
798        let document2 = parse_phpdoc_with_span(&arena, phpdoc2, span2).expect("Failed to parse PHPDoc");
799
800        assert_eq!(document2.elements.len(), 1);
801        let Element::Tag(tag2) = &document2.elements[0] else {
802            panic!("Expected Element::Tag, got {:?}", document2.elements[0]);
803        };
804        assert_eq!(tag2.name, "return");
805        assert_eq!(tag2.description, "int 返回🎉值");
806
807        let phpdoc3 = "/** @see\u{3000}中文类::方法() 说明 */";
808        let span3 = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc3.len() as u32));
809        let document3 = parse_phpdoc_with_span(&arena, phpdoc3, span3).expect("Failed to parse PHPDoc");
810
811        assert_eq!(document3.elements.len(), 1);
812        let Element::Tag(tag3) = &document3.elements[0] else {
813            panic!("Expected Element::Tag, got {:?}", document3.elements[0]);
814        };
815        assert_eq!(tag3.name, "see");
816        assert_eq!(tag3.description, "中文类::方法() 说明");
817    }
818
819    #[test]
820    fn test_indented_code_with_fullwidth_space_in_indent() {
821        // Test case for multi-byte whitespace in indented code (Issue #967)
822        // parse_indented_code is only called when line starts with ASCII space/tab
823        // The bug occurs when indent contains full-width space after ASCII spaces
824        //
825        // After lexer processing, content becomes "  \u{3000}code"
826        // is_indented_line returns true (starts with ASCII space)
827        // indent_len = 3 (2 ASCII spaces + 1 full-width space char)
828        // But byte offset should be 2 + 3 = 5
829        let arena = Bump::new();
830        // Format: " * " (asterisk + space) + "  " (2 ASCII spaces) + "\u{3000}" (full-width) + "code"
831        let phpdoc = "/**\n *   \u{3000}code\n */";
832        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
833
834        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
835        assert!(result.is_ok(), "Parsing should succeed without panic");
836
837        let document = result.unwrap();
838        assert_eq!(document.elements.len(), 1);
839        let Element::Code(code) = &document.elements[0] else {
840            panic!("Expected Element::Code, got {:?}", document.elements[0]);
841        };
842        assert_eq!(code.content, "code");
843    }
844
845    #[test]
846    fn test_indented_code_with_mixed_multibyte_whitespace() {
847        // Multiple lines with mixed ASCII and full-width whitespace
848        let arena = Bump::new();
849        let phpdoc = "/**\n *  \u{3000}first line\n *  \u{3000}second line\n */";
850        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
851
852        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
853        assert!(result.is_ok(), "Parsing should succeed without panic");
854
855        let document = result.unwrap();
856        assert_eq!(document.elements.len(), 1);
857        let Element::Code(code) = &document.elements[0] else {
858            panic!("Expected Element::Code, got {:?}", document.elements[0]);
859        };
860        assert_eq!(code.content, "first line\nsecond line");
861    }
862
863    #[test]
864    fn test_indented_code_with_tab_and_fullwidth_space() {
865        // Tab + full-width space: is_indented_line checks for '\t' as well
866        let arena = Bump::new();
867        // After "* " there is a tab followed by full-width space
868        let phpdoc = "/**\n * \t\u{3000}code\n */";
869        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
870
871        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
872        assert!(result.is_ok(), "Parsing should succeed without panic");
873
874        let document = result.unwrap();
875        assert_eq!(document.elements.len(), 1);
876        let Element::Code(code) = &document.elements[0] else {
877            panic!("Expected Element::Code, got {:?}", document.elements[0]);
878        };
879        assert_eq!(code.content, "code");
880    }
881
882    #[test]
883    fn test_issue_967_original_pattern() {
884        // Original Issue #967 reproduction case
885        // Error: byte index 3 is not a char boundary; it is inside '\u{3000}' (bytes 1..4) of `   メールクリックがない`
886        // After lexer processing: " " + "\u{3000}" + " " + Japanese text
887        // This triggers parse_indented_code because line starts with ASCII space
888        let arena = Bump::new();
889        // Format: " * " + " " (1 ASCII space) + "\u{3000}" (full-width) + " " (1 ASCII space) + text
890        let phpdoc = "/**\n *  \u{3000} メールクリックがない\n */";
891        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
892
893        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
894        assert!(result.is_ok(), "Parsing should succeed without panic");
895
896        let document = result.unwrap();
897        assert_eq!(document.elements.len(), 1);
898        let Element::Code(code) = &document.elements[0] else {
899            panic!("Expected Element::Code, got {:?}", document.elements[0]);
900        };
901        assert_eq!(code.content, "メールクリックがない");
902    }
903
904    #[test]
905    fn test_multiline_inline_tag() {
906        let arena = Bump::new();
907        let phpdoc = "/**
908            * This method gets a count of the Foo.
909            * {@internal Developers should note that it silently
910            *            adds one extra Foo.}
911            *
912            * @return int
913            */";
914
915        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
916        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
917
918        let Element::Text(text) = &document.elements[0] else {
919            panic!("Expected Element::Text, got {:?}", document.elements[0]);
920        };
921
922        assert!(text.segments.len() >= 2, "Expected at least 2 segments, got {:?}", text.segments);
923
924        let has_inline_tag =
925            text.segments.iter().any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == "internal"));
926
927        assert!(has_inline_tag, "Expected an InlineTag with name 'internal', got segments: {:?}", text.segments);
928    }
929
930    #[test]
931    fn test_multiline_inline_tag_with_nested() {
932        let arena = Bump::new();
933        let phpdoc = "/**
934            * {@internal Developers should note that it silently
935            *            adds one extra Foo (see {@link http://example.com}).}
936            */";
937
938        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
939        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
940
941        let Element::Text(text) = &document.elements[0] else {
942            panic!("Expected Element::Text, got {:?}", document.elements[0]);
943        };
944
945        let has_internal_tag =
946            text.segments.iter().any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == "internal"));
947
948        assert!(has_internal_tag, "Expected an InlineTag with name 'internal', got segments: {:?}", text.segments);
949    }
950
951    #[test]
952    fn test_single_line_inline_tag_still_works() {
953        let arena = Bump::new();
954        let phpdoc = r#"/**
955            * See {@see \Some\Class} for details.
956            */"#;
957
958        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
959        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
960
961        let Element::Text(text) = &document.elements[0] else {
962            panic!("Expected Element::Text, got {:?}", document.elements[0]);
963        };
964
965        let has_see_tag =
966            text.segments.iter().any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == "see"));
967
968        assert!(has_see_tag, "Expected an InlineTag with name 'see', got segments: {:?}", text.segments);
969    }
970
971    #[test]
972    fn test_multiline_inline_tag_chinese() {
973        let arena = Bump::new();
974        let phpdoc = "/**
975            * 获取用户数量的方法。
976            * {@internal 开发者请注意,此方法会静默地
977            *            添加一个额外的用户。}
978            *
979            * @return int
980            */";
981
982        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
983        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse Chinese PHPDoc");
984
985        let Element::Text(text) = &document.elements[0] else {
986            panic!("Expected Element::Text, got {:?}", document.elements[0]);
987        };
988
989        let has_internal =
990            text.segments.iter().any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == "internal"));
991        assert!(has_internal, "Expected InlineTag 'internal' with Chinese content, got: {:?}", text.segments);
992    }
993
994    #[test]
995    fn test_multiline_inline_tag_japanese() {
996        let arena = Bump::new();
997        let phpdoc = r#"/**
998            * ユーザー数を取得するメソッド。
999            * {@see \App\Service\UserCounter このクラスは
1000            *       ユーザーの数を数えます。}
1001            */"#;
1002
1003        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
1004        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse Japanese PHPDoc");
1005
1006        let Element::Text(text) = &document.elements[0] else {
1007            panic!("Expected Element::Text, got {:?}", document.elements[0]);
1008        };
1009
1010        let has_see = text.segments.iter().any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == "see"));
1011        assert!(has_see, "Expected InlineTag 'see' with Japanese content, got: {:?}", text.segments);
1012    }
1013
1014    #[test]
1015    fn test_multiline_inline_tag_arabic() {
1016        let arena = Bump::new();
1017        let phpdoc = "/**
1018            * طريقة للحصول على عدد المستخدمين.
1019            * {@internal يجب على المطورين ملاحظة أن هذه الطريقة
1020            *            تضيف مستخدمًا إضافيًا بصمت.}
1021            *
1022            * @return int
1023            */";
1024
1025        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
1026        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse Arabic PHPDoc");
1027
1028        let Element::Text(text) = &document.elements[0] else {
1029            panic!("Expected Element::Text, got {:?}", document.elements[0]);
1030        };
1031
1032        let has_internal =
1033            text.segments.iter().any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == "internal"));
1034        assert!(has_internal, "Expected InlineTag 'internal' with Arabic content, got: {:?}", text.segments);
1035    }
1036
1037    #[test]
1038    fn test_multiline_inline_tag_mixed_scripts() {
1039        let arena = Bump::new();
1040        let phpdoc = "/**
1041            * Documentation with mixed scripts.
1042            * {@internal 注意: This method は静かに adds один
1043            *            дополнительный элемент 요소를 추가합니다.}
1044            */";
1045
1046        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
1047        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse mixed-script PHPDoc");
1048
1049        let Element::Text(text) = &document.elements[0] else {
1050            panic!("Expected Element::Text, got {:?}", document.elements[0]);
1051        };
1052
1053        let has_internal =
1054            text.segments.iter().any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == "internal"));
1055        assert!(has_internal, "Expected InlineTag 'internal' with mixed-script content, got: {:?}", text.segments);
1056    }
1057}