Skip to main content

mago_docblock/
lib.rs

1use bumpalo::Bump;
2
3use mago_span::Span;
4use mago_syntax::ast::Trivia;
5use mago_syntax::ast::TriviaKind;
6
7use crate::document::Document;
8use crate::error::ParseError;
9
10mod internal;
11
12pub mod document;
13pub mod error;
14pub mod tag;
15
16/// Parses a docblock from a trivia token.
17///
18/// # Errors
19///
20/// Returns a [`ParseError`] if the trivia is not a docblock comment or parsing fails.
21#[inline]
22pub fn parse_trivia<'arena>(arena: &'arena Bump, trivia: &Trivia<'arena>) -> Result<Document<'arena>, ParseError> {
23    if TriviaKind::DocBlockComment != trivia.kind {
24        return Err(ParseError::InvalidTrivia(trivia.span));
25    }
26
27    parse_phpdoc_with_span(arena, trivia.value, trivia.span)
28}
29
30/// Parses a `PHPDoc` comment string with an associated span.
31///
32/// # Errors
33///
34/// Returns a [`ParseError`] if tokenization or parsing fails.
35#[inline]
36pub fn parse_phpdoc_with_span<'arena>(
37    arena: &'arena Bump,
38    content: &'arena str,
39    span: Span,
40) -> Result<Document<'arena>, ParseError> {
41    let tokens = internal::lexer::tokenize(content, span)?;
42
43    internal::parser::parse_document(span, tokens.as_slice(), arena)
44}
45
46#[cfg(test)]
47mod tests {
48    use super::*;
49
50    use mago_database::file::FileId;
51    use mago_span::Position;
52    use mago_span::Span;
53
54    use crate::document::*;
55
56    #[test]
57    fn test_parse_all_elements() {
58        let arena = Bump::new();
59        let phpdoc = r#"/**
60            * This is a simple description.
61            *
62            * This text contains an inline code `echo "Hello, World!";`.
63            *
64            * This text contains an inline tag {@see \Some\Class}.
65            *
66            * ```php
67            * echo "Hello, World!";
68            * ```
69            *
70            *     $foo = "bar";
71            *     echo "Hello, World!";
72            *
73            * @param string $foo
74            * @param array{
75            *   bar: string,
76            *   baz: int
77            * } $bar
78            * @return void
79            */"#;
80
81        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
82        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
83        assert_eq!(document.elements.len(), 12);
84
85        let Element::Text(text) = &document.elements[0] else {
86            panic!("Expected Element::Text, got {:?}", document.elements[0]);
87        };
88
89        assert_eq!(text.segments.len(), 1);
90
91        let TextSegment::Paragraph { span, content } = text.segments[0] else {
92            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
93        };
94
95        assert_eq!(content, "This is a simple description.");
96        assert_eq!(&phpdoc[span.start.offset as usize..span.end.offset as usize], "This is a simple description.");
97
98        let Element::Line(_) = &document.elements[1] else {
99            panic!("Expected Element::Line, got {:?}", document.elements[1]);
100        };
101
102        let Element::Text(text) = &document.elements[2] else {
103            panic!("Expected Element::Text, got {:?}", document.elements[2]);
104        };
105
106        assert_eq!(text.segments.len(), 3);
107
108        let TextSegment::Paragraph { content, .. } = text.segments[0] else {
109            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
110        };
111
112        assert_eq!(content, "This text contains an inline code ");
113
114        let TextSegment::InlineCode(code) = &text.segments[1] else {
115            panic!("Expected TextSegment::InlineCode, got {:?}", text.segments[1]);
116        };
117
118        let content = code.content;
119        assert_eq!(content, "echo \"Hello, World!\";");
120        assert_eq!(
121            &phpdoc[code.span.start.offset as usize..code.span.end.offset as usize],
122            "`echo \"Hello, World!\";`"
123        );
124
125        let TextSegment::Paragraph { content, .. } = text.segments[2] else {
126            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[2]);
127        };
128
129        assert_eq!(content, ".");
130
131        let Element::Line(_) = &document.elements[3] else {
132            panic!("Expected Element::Line, got {:?}", document.elements[3]);
133        };
134
135        let Element::Text(text) = &document.elements[4] else {
136            panic!("Expected Element::Text, got {:?}", document.elements[4]);
137        };
138
139        assert_eq!(text.segments.len(), 3);
140
141        let TextSegment::Paragraph { content, .. } = text.segments[0] else {
142            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
143        };
144
145        assert_eq!(content, "This text contains an inline tag ");
146
147        let TextSegment::InlineTag(tag) = &text.segments[1] else {
148            panic!("Expected TextSegment::InlineTag, got {:?}", text.segments[1]);
149        };
150
151        let name = tag.name;
152        let description = tag.description;
153        assert_eq!(name, "see");
154        assert_eq!(description, "\\Some\\Class");
155        assert_eq!(tag.kind, TagKind::See);
156        assert_eq!(&phpdoc[tag.span.start.offset as usize..tag.span.end.offset as usize], "{@see \\Some\\Class}");
157
158        let TextSegment::Paragraph { content, .. } = text.segments[2] else {
159            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[2]);
160        };
161
162        assert_eq!(content, ".");
163
164        let Element::Line(_) = &document.elements[5] else {
165            panic!("Expected Element::Line, got {:?}", document.elements[5]);
166        };
167
168        let Element::Code(code) = &document.elements[6] else {
169            panic!("Expected Element::CodeBlock, got {:?}", document.elements[6]);
170        };
171
172        let content = code.content;
173        assert_eq!(code.directives, &["php"]);
174        assert_eq!(content, "echo \"Hello, World!\";");
175        assert_eq!(
176            &phpdoc[code.span.start.offset as usize..code.span.end.offset as usize],
177            "```php\n            * echo \"Hello, World!\";\n            * ```"
178        );
179
180        let Element::Line(_) = &document.elements[7] else {
181            panic!("Expected Element::Line, got {:?}", document.elements[7]);
182        };
183
184        let Element::Code(code) = &document.elements[8] else {
185            panic!("Expected Element::CodeBlock, got {:?}", document.elements[8]);
186        };
187
188        let content = code.content;
189        assert!(code.directives.is_empty());
190        assert_eq!(content, "$foo = \"bar\";\necho \"Hello, World!\";\n");
191        assert_eq!(
192            &phpdoc[code.span.start.offset as usize..code.span.end.offset as usize],
193            "    $foo = \"bar\";\n            *     echo \"Hello, World!\";\n"
194        );
195
196        let Element::Tag(tag) = &document.elements[9] else {
197            panic!("Expected Element::Tag, got {:?}", document.elements[9]);
198        };
199
200        let name = tag.name;
201        let description = tag.description;
202        assert_eq!(name, "param");
203        assert_eq!(tag.kind, TagKind::Param);
204        assert_eq!(description, "string $foo");
205        assert_eq!(&phpdoc[tag.span.start.offset as usize..tag.span.end.offset as usize], "@param string $foo");
206
207        let Element::Tag(tag) = &document.elements[10] else {
208            panic!("Expected Element::Tag, got {:?}", document.elements[10]);
209        };
210
211        let name = tag.name;
212        let description = tag.description;
213        assert_eq!(name, "param");
214        assert_eq!(tag.kind, TagKind::Param);
215        assert_eq!(description, "array{\n  bar: string,\n  baz: int\n} $bar");
216        assert_eq!(
217            &phpdoc[tag.span.start.offset as usize..tag.span.end.offset as usize],
218            "@param array{\n            *   bar: string,\n            *   baz: int\n            * } $bar"
219        );
220
221        let Element::Tag(tag) = &document.elements[11] else {
222            panic!("Expected Element::Tag, got {:?}", document.elements[11]);
223        };
224
225        let name = tag.name;
226        let description = tag.description;
227        assert_eq!(name, "return");
228        assert_eq!(tag.kind, TagKind::Return);
229        assert_eq!(description, "void");
230        assert_eq!(&phpdoc[tag.span.start.offset as usize..tag.span.end.offset as usize], "@return void");
231    }
232
233    #[test]
234    fn test_unclosed_inline_tag() {
235        // Test case for ParseError::UnclosedInlineTag
236        let arena = Bump::new();
237        let phpdoc = "/** This is a doc block with an unclosed inline tag {@see Class */";
238        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
239
240        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
241
242        match result {
243            Err(ParseError::UnclosedInlineTag(error_span)) => {
244                let expected_start = phpdoc.find("{@see").unwrap();
245                let expected_span = span.subspan(expected_start as u32, phpdoc.len() as u32 - 3);
246                assert_eq!(error_span, expected_span);
247            }
248            _ => {
249                panic!("Expected ParseError::UnclosedInlineTag");
250            }
251        }
252    }
253
254    #[test]
255    fn test_unclosed_inline_code() {
256        // Test case for ParseError::UnclosedInlineCode
257        let arena = Bump::new();
258        let phpdoc = "/** This is a doc block with unclosed inline code `code sample */";
259        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
260
261        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
262
263        match result {
264            Err(ParseError::UnclosedInlineCode(error_span)) => {
265                let expected_start = phpdoc.find('`').unwrap();
266                let expected_span = span.subspan(expected_start as u32, phpdoc.len() as u32 - 3);
267                assert_eq!(error_span, expected_span);
268            }
269            _ => {
270                panic!("Expected ParseError::UnclosedInlineCode");
271            }
272        }
273    }
274
275    #[test]
276    fn test_unclosed_code_block() {
277        let arena = Bump::new();
278        let phpdoc = r"/**
279            * This is a doc block with unclosed code block
280            * ```
281            * Some code here
282            */";
283        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
284
285        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
286
287        match result {
288            Err(ParseError::UnclosedCodeBlock(error_span)) => {
289                let code_block_start = phpdoc.find("```").unwrap();
290                let expected_span = span.subspan(code_block_start as u32, 109);
291                assert_eq!(error_span, expected_span);
292            }
293            _ => {
294                panic!("Expected ParseError::UnclosedCodeBlock");
295            }
296        }
297    }
298
299    #[test]
300    fn test_invalid_tag_name() {
301        // Test case for ParseError::InvalidTagName — use a character not valid in identifiers
302        let arena = Bump::new();
303        let phpdoc = "/** @invalid!tag Description */";
304        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
305
306        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
307
308        assert!(
309            matches!(result, Err(ParseError::InvalidTagName(_))),
310            "Expected ParseError::InvalidTagName, got {result:?}"
311        );
312    }
313
314    #[test]
315    fn test_underscore_tag_name_is_valid() {
316        let arena = Bump::new();
317        let phpdoc = "/** @some_tag Description */";
318        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
319
320        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
321        let Element::Tag(tag) = &document.elements[0] else {
322            panic!("Expected Element::Tag");
323        };
324        assert_eq!(tag.name, "some_tag");
325    }
326
327    #[test]
328    fn test_malformed_code_block() {
329        let arena = Bump::new();
330        let phpdoc = r"/**
331            * ```
332            * Some code here
333            * Incorrect closing
334            */";
335        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
336
337        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
338
339        match result {
340            Ok(document) => {
341                panic!("Expected the parser to return an error, got {document:#?}");
342            }
343            Err(ParseError::UnclosedCodeBlock(error_span)) => {
344                let code_block_start = phpdoc.find("```").unwrap();
345                let expected_span = span.subspan(code_block_start as u32, 82);
346                assert_eq!(error_span, expected_span);
347            }
348            _ => {
349                panic!("Expected ParseError::UnclosedCodeBlock");
350            }
351        }
352    }
353
354    #[test]
355    fn test_invalid_comment() {
356        // Test case for ParseError::InvalidComment
357        let arena = Bump::new();
358        let phpdoc = "/* Not a valid doc block */";
359        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
360
361        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
362
363        match result {
364            Err(ParseError::InvalidComment(error_span)) => {
365                assert_eq!(error_span, span);
366            }
367            _ => {
368                panic!("Expected ParseError::InvalidComment");
369            }
370        }
371    }
372
373    #[test]
374    fn test_inconsistent_indentation() {
375        // Test case for ParseError::InconsistentIndentation
376        let arena = Bump::new();
377        let phpdoc = r"/**
378    * This is a doc block
379      * With inconsistent indentation
380    */";
381        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
382
383        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
384
385        match result {
386            Ok(document) => {
387                assert_eq!(document.elements.len(), 1);
388                let Element::Text(text) = &document.elements[0] else {
389                    panic!("Expected Element::Text, got {:?}", document.elements[0]);
390                };
391
392                assert_eq!(text.segments.len(), 1);
393                let TextSegment::Paragraph { span, content } = &text.segments[0] else {
394                    panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
395                };
396
397                assert_eq!(*content, "This is a doc block\nWith inconsistent indentation");
398                assert_eq!(
399                    &phpdoc[span.start.offset as usize..span.end.offset as usize],
400                    "This is a doc block\n      * With inconsistent indentation"
401                );
402            }
403            _ => {
404                panic!("Expected ParseError::InconsistentIndentation");
405            }
406        }
407    }
408
409    #[test]
410    fn test_missing_asterisk() {
411        let arena = Bump::new();
412        let phpdoc = r"/**
413     This line is missing an asterisk
414     * This line is fine
415     */";
416        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
417
418        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
419
420        match result {
421            Ok(document) => {
422                assert_eq!(document.elements.len(), 1);
423                let Element::Text(text) = &document.elements[0] else {
424                    panic!("Expected Element::Text, got {:?}", document.elements[0]);
425                };
426
427                assert_eq!(text.segments.len(), 1);
428
429                let TextSegment::Paragraph { span, content } = &text.segments[0] else {
430                    panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
431                };
432
433                assert_eq!(*content, "This line is missing an asterisk\nThis line is fine");
434                assert_eq!(
435                    &phpdoc[span.start.offset as usize..span.end.offset as usize],
436                    "This line is missing an asterisk\n     * This line is fine"
437                );
438            }
439            _ => {
440                panic!("Expected ParseError::MissingAsterisk");
441            }
442        }
443    }
444
445    #[test]
446    fn test_missing_whitespace_after_asterisk() {
447        let arena = Bump::new();
448        let phpdoc = r"/**
449     *This line is missing a space after asterisk
450     */";
451        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
452
453        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
454
455        match result {
456            Ok(document) => {
457                assert_eq!(document.elements.len(), 1);
458                let Element::Text(text) = &document.elements[0] else {
459                    panic!("Expected Element::Text, got {:?}", document.elements[0]);
460                };
461
462                assert_eq!(text.segments.len(), 1);
463                let TextSegment::Paragraph { span, content } = &text.segments[0] else {
464                    panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
465                };
466
467                assert_eq!(*content, "This line is missing a space after asterisk");
468                assert_eq!(
469                    &phpdoc[span.start.offset as usize..span.end.offset as usize],
470                    "This line is missing a space after asterisk"
471                );
472            }
473            _ => {
474                panic!("Expected ParseError::MissingWhitespaceAfterAsterisk");
475            }
476        }
477    }
478
479    #[test]
480    fn test_missing_whitespace_after_opening_asterisk() {
481        let arena = Bump::new();
482        let phpdoc = "/**This is a doc block without space after /** */";
483        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
484
485        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
486
487        match result {
488            Ok(document) => {
489                assert_eq!(document.elements.len(), 1);
490                let Element::Text(text) = &document.elements[0] else {
491                    panic!("Expected Element::Text, got {:?}", document.elements[0]);
492                };
493
494                assert_eq!(text.segments.len(), 1);
495                let TextSegment::Paragraph { span, content } = &text.segments[0] else {
496                    panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
497                };
498
499                assert_eq!(*content, "This is a doc block without space after /**");
500                assert_eq!(
501                    &phpdoc[span.start.offset as usize..span.end.offset as usize],
502                    "This is a doc block without space after /**"
503                );
504            }
505            _ => {
506                panic!("Expected ParseError::MissingWhitespaceAfterOpeningAsterisk");
507            }
508        }
509    }
510
511    #[test]
512    fn test_missing_whitespace_before_closing_asterisk() {
513        let arena = Bump::new();
514        let phpdoc = "/** This is a doc block without space before */*/";
515        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
516
517        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
518
519        match result {
520            Ok(document) => {
521                assert_eq!(document.elements.len(), 1);
522                let Element::Text(text) = &document.elements[0] else {
523                    panic!("Expected Element::Text, got {:?}", document.elements[0]);
524                };
525
526                assert_eq!(text.segments.len(), 1);
527                let TextSegment::Paragraph { span, content } = &text.segments[0] else {
528                    panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
529                };
530
531                assert_eq!(*content, "This is a doc block without space before */");
532                assert_eq!(
533                    &phpdoc[span.start.offset as usize..span.end.offset as usize],
534                    "This is a doc block without space before */"
535                );
536            }
537            _ => {
538                panic!("Expected ParseError::MissingWhitespaceBeforeClosingAsterisk");
539            }
540        }
541    }
542
543    #[test]
544    fn test_utf8_characters() {
545        let arena = Bump::new();
546        let phpdoc = r#"/**
547    * هذا نص باللغة العربية.
548    * 这是一段中文。
549    * Here are some mathematical symbols: ∑, ∆, π, θ.
550    *
551    * ```php
552    * // Arabic comment
553    * echo "مرحبا بالعالم";
554    * // Chinese comment
555    * echo "你好,世界";
556    * // Math symbols in code
557    * $sum = $a + $b; // ∑
558    * ```
559    *
560    * @param string $مثال A parameter with an Arabic variable name.
561    * @return int 返回值是整数类型。
562    */"#;
563
564        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
565        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
566
567        // Verify the number of elements parsed
568        assert_eq!(document.elements.len(), 6);
569
570        // First text element (Arabic text)
571        let Element::Text(text) = &document.elements[0] else {
572            panic!("Expected Element::Text, got {:?}", document.elements[0]);
573        };
574
575        assert_eq!(text.segments.len(), 1);
576
577        let TextSegment::Paragraph { span, content } = &text.segments[0] else {
578            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
579        };
580
581        assert_eq!(*content, "هذا نص باللغة العربية.\n这是一段中文。\nHere are some mathematical symbols: ∑, ∆, π, θ.");
582
583        assert_eq!(
584            &phpdoc[span.start.offset as usize..span.end.offset as usize],
585            "هذا نص باللغة العربية.\n    * 这是一段中文。\n    * Here are some mathematical symbols: ∑, ∆, π, θ."
586        );
587
588        // Empty line
589        let Element::Line(_) = &document.elements[1] else {
590            panic!("Expected Element::Line, got {:?}", document.elements[3]);
591        };
592
593        // Code block
594        let Element::Code(code) = &document.elements[2] else {
595            panic!("Expected Element::Code, got {:?}", document.elements[2]);
596        };
597
598        let content_str = code.content;
599        let expected_code = "// Arabic comment\necho \"مرحبا بالعالم\";\n// Chinese comment\necho \"你好,世界\";\n// Math symbols in code\n$sum = $a + $b; // ∑";
600        assert_eq!(content_str, expected_code);
601        assert_eq!(
602            &phpdoc[code.span.start.offset as usize..code.span.end.offset as usize],
603            "```php\n    * // Arabic comment\n    * echo \"مرحبا بالعالم\";\n    * // Chinese comment\n    * echo \"你好,世界\";\n    * // Math symbols in code\n    * $sum = $a + $b; // ∑\n    * ```"
604        );
605
606        // Empty line
607        let Element::Line(_) = &document.elements[3] else {
608            panic!("Expected Element::Line, got {:?}", document.elements[3]);
609        };
610
611        // @param tag with Arabic variable name
612        let Element::Tag(tag) = &document.elements[4] else {
613            panic!("Expected Element::Tag, got {:?}", document.elements[4]);
614        };
615
616        let name = tag.name;
617        let description = tag.description;
618        assert_eq!(name, "param");
619        assert_eq!(tag.kind, TagKind::Param);
620        assert_eq!(description, "string $مثال A parameter with an Arabic variable name.");
621        assert_eq!(
622            &phpdoc[tag.span.start.offset as usize..tag.span.end.offset as usize],
623            "@param string $مثال A parameter with an Arabic variable name."
624        );
625
626        // @return tag with Chinese description
627        let Element::Tag(tag) = &document.elements[5] else {
628            panic!("Expected Element::Tag, got {:?}", document.elements[5]);
629        };
630
631        let name = tag.name;
632        let description = tag.description;
633        assert_eq!(name, "return");
634        assert_eq!(tag.kind, TagKind::Return);
635        assert_eq!(description, "int 返回值是整数类型。");
636        assert_eq!(
637            &phpdoc[tag.span.start.offset as usize..tag.span.end.offset as usize],
638            "@return int 返回值是整数类型。"
639        );
640    }
641
642    #[test]
643    fn test_annotation_parsing() {
644        let arena = Bump::new();
645        let phpdoc = r#"/**
646         * @Event("Symfony\Component\Workflow\Event\CompletedEvent")
647         * @AnotherAnnotation({
648         *     "key": "value",
649         *     "list": [1, 2, 3]
650         * })
651         * @SimpleAnnotation
652         */"#;
653        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
654        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
655
656        let Element::Tag(tag) = &document.elements[0] else {
657            panic!("Expected Element::Tag, got {:?}", document.elements[0]);
658        };
659
660        assert_eq!(tag.name, "Event");
661        assert_eq!(tag.metadata.unwrap(), "(\"Symfony\\Component\\Workflow\\Event\\CompletedEvent\")");
662
663        let Element::Tag(tag) = &document.elements[1] else {
664            panic!("Expected Element::Tag, got {:?}", document.elements[1]);
665        };
666        assert_eq!(tag.name, "AnotherAnnotation");
667
668        let last_idx = document.elements.len() - 1;
669        let Element::Tag(tag) = &document.elements[last_idx] else {
670            panic!("Expected Element::Tag, got {:?}", document.elements[last_idx]);
671        };
672        assert_eq!(tag.name, "SimpleAnnotation");
673        assert!(tag.metadata.is_none());
674    }
675
676    #[test]
677    fn test_long_description_with_missing_asterisk() {
678        let arena = Bump::new();
679        let phpdoc = r"/** @var string[] this is a really long description
680            that spans multiple lines, and demonstrates how the parser handles
681            docblocks with multiple descriptions, and missing astricks*/";
682        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
683        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
684
685        assert_eq!(document.elements.len(), 1);
686        let Element::Tag(tag) = &document.elements[0] else {
687            panic!("Expected Element::Tag, got {:?}", document.elements[0]);
688        };
689
690        let name = tag.name;
691        let description = tag.description;
692        assert_eq!(name, "var");
693        assert_eq!(tag.kind, TagKind::Var);
694        assert_eq!(
695            description,
696            "string[] this is a really long description\nthat spans multiple lines, and demonstrates how the parser handles\ndocblocks with multiple descriptions, and missing astricks"
697        );
698        assert_eq!(
699            &phpdoc[tag.span.start.offset as usize..tag.span.end.offset as usize],
700            "@var string[] this is a really long description\n            that spans multiple lines, and demonstrates how the parser handles\n            docblocks with multiple descriptions, and missing astricks"
701        );
702    }
703
704    #[test]
705    fn test_code_indent_using_non_ascii_chars() {
706        let arena = Bump::new();
707        let phpdoc = r"/**
708        *    └─ comment 2
709        *       └─ comment 4
710        *    └─ comment 3
711        */";
712
713        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
714        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
715
716        assert_eq!(document.elements.len(), 1);
717
718        let Element::Code(code) = &document.elements[0] else {
719            panic!("Expected Element::Code, got {:?}", document.elements[0]);
720        };
721
722        let content_str = code.content;
723        // After fix: correctly strips indent_len characters (not bytes) from each line
724        assert_eq!(content_str, "└─ comment 2\n\u{a0}\u{a0} └─ comment 4\n└─ comment 3");
725        assert_eq!(
726            &phpdoc[code.span.start.offset as usize..code.span.end.offset as usize],
727            " \u{a0} └─ comment 2\n        *    \u{a0}\u{a0} └─ comment 4\n        *  \u{a0} └─ comment 3"
728        );
729    }
730
731    #[test]
732    fn test_issue_456() {
733        let arena = Bump::new();
734        let phpdoc = "/**
735             * \u{3000}(イベント日数をもとに計算)\u{3000}
736             * @return\u{3000}int
737             * @throws\u{3000}Exception
738             */";
739
740        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
741        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
742
743        assert_eq!(document.elements.len(), 3);
744
745        let Element::Text(text) = &document.elements[0] else {
746            panic!("Expected Element::Text, got {:?}", document.elements[0]);
747        };
748
749        assert_eq!(text.segments.len(), 1);
750        let TextSegment::Paragraph { span, content } = &text.segments[0] else {
751            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
752        };
753
754        assert_eq!(*content, " (イベント日数をもとに計算)");
755        assert_eq!(&phpdoc[span.start.offset as usize..span.end.offset as usize], "\u{3000}(イベント日数をもとに計算)");
756
757        let Element::Tag(tag) = &document.elements[1] else {
758            panic!("Expected Element::Tag, got {:?}", document.elements[1]);
759        };
760
761        let name = tag.name;
762        let description = tag.description;
763        assert_eq!(name, "return");
764        assert_eq!(tag.kind, TagKind::Return);
765        assert_eq!(description, "int");
766        assert_eq!(&phpdoc[tag.span.start.offset as usize..tag.span.end.offset as usize], "@return\u{3000}int");
767
768        let Element::Tag(tag) = &document.elements[2] else {
769            panic!("Expected Element::Tag, got {:?}", document.elements[2]);
770        };
771
772        let name = tag.name;
773        let description = tag.description;
774        assert_eq!(name, "throws");
775        assert_eq!(tag.kind, TagKind::Throws);
776        assert_eq!(description, "Exception");
777        assert_eq!(&phpdoc[tag.span.start.offset as usize..tag.span.end.offset as usize], "@throws\u{3000}Exception");
778    }
779
780    #[test]
781    fn test_issue_808() {
782        let arena = Bump::new();
783
784        let phpdoc = "/** @param\u{3000}string $foo 中文描述 */";
785        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
786        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
787
788        assert_eq!(document.elements.len(), 1);
789        let Element::Tag(tag) = &document.elements[0] else {
790            panic!("Expected Element::Tag, got {:?}", document.elements[0]);
791        };
792        assert_eq!(tag.name, "param");
793        assert_eq!(tag.description, "string $foo 中文描述");
794
795        let phpdoc2 = "/** @return\u{3000}int 返回🎉值 */";
796        let span2 = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc2.len() as u32));
797        let document2 = parse_phpdoc_with_span(&arena, phpdoc2, span2).expect("Failed to parse PHPDoc");
798
799        assert_eq!(document2.elements.len(), 1);
800        let Element::Tag(tag2) = &document2.elements[0] else {
801            panic!("Expected Element::Tag, got {:?}", document2.elements[0]);
802        };
803        assert_eq!(tag2.name, "return");
804        assert_eq!(tag2.description, "int 返回🎉值");
805
806        let phpdoc3 = "/** @see\u{3000}中文类::方法() 说明 */";
807        let span3 = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc3.len() as u32));
808        let document3 = parse_phpdoc_with_span(&arena, phpdoc3, span3).expect("Failed to parse PHPDoc");
809
810        assert_eq!(document3.elements.len(), 1);
811        let Element::Tag(tag3) = &document3.elements[0] else {
812            panic!("Expected Element::Tag, got {:?}", document3.elements[0]);
813        };
814        assert_eq!(tag3.name, "see");
815        assert_eq!(tag3.description, "中文类::方法() 说明");
816    }
817
818    #[test]
819    fn test_indented_code_with_fullwidth_space_in_indent() {
820        // Test case for multi-byte whitespace in indented code (Issue #967)
821        // parse_indented_code is only called when line starts with ASCII space/tab
822        // The bug occurs when indent contains full-width space after ASCII spaces
823        //
824        // After lexer processing, content becomes "  \u{3000}code"
825        // is_indented_line returns true (starts with ASCII space)
826        // indent_len = 3 (2 ASCII spaces + 1 full-width space char)
827        // But byte offset should be 2 + 3 = 5
828        let arena = Bump::new();
829        // Format: " * " (asterisk + space) + "  " (2 ASCII spaces) + "\u{3000}" (full-width) + "code"
830        let phpdoc = "/**\n *   \u{3000}code\n */";
831        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
832
833        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
834        assert!(result.is_ok(), "Parsing should succeed without panic");
835
836        let document = result.unwrap();
837        assert_eq!(document.elements.len(), 1);
838        let Element::Code(code) = &document.elements[0] else {
839            panic!("Expected Element::Code, got {:?}", document.elements[0]);
840        };
841        assert_eq!(code.content, "code");
842    }
843
844    #[test]
845    fn test_indented_code_with_mixed_multibyte_whitespace() {
846        // Multiple lines with mixed ASCII and full-width whitespace
847        let arena = Bump::new();
848        let phpdoc = "/**\n *  \u{3000}first line\n *  \u{3000}second line\n */";
849        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
850
851        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
852        assert!(result.is_ok(), "Parsing should succeed without panic");
853
854        let document = result.unwrap();
855        assert_eq!(document.elements.len(), 1);
856        let Element::Code(code) = &document.elements[0] else {
857            panic!("Expected Element::Code, got {:?}", document.elements[0]);
858        };
859        assert_eq!(code.content, "first line\nsecond line");
860    }
861
862    #[test]
863    fn test_indented_code_with_tab_and_fullwidth_space() {
864        // Tab + full-width space: is_indented_line checks for '\t' as well
865        let arena = Bump::new();
866        // After "* " there is a tab followed by full-width space
867        let phpdoc = "/**\n * \t\u{3000}code\n */";
868        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
869
870        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
871        assert!(result.is_ok(), "Parsing should succeed without panic");
872
873        let document = result.unwrap();
874        assert_eq!(document.elements.len(), 1);
875        let Element::Code(code) = &document.elements[0] else {
876            panic!("Expected Element::Code, got {:?}", document.elements[0]);
877        };
878        assert_eq!(code.content, "code");
879    }
880
881    #[test]
882    fn test_issue_967_original_pattern() {
883        // Original Issue #967 reproduction case
884        // Error: byte index 3 is not a char boundary; it is inside '\u{3000}' (bytes 1..4) of `   メールクリックがない`
885        // After lexer processing: " " + "\u{3000}" + " " + Japanese text
886        // This triggers parse_indented_code because line starts with ASCII space
887        let arena = Bump::new();
888        // Format: " * " + " " (1 ASCII space) + "\u{3000}" (full-width) + " " (1 ASCII space) + text
889        let phpdoc = "/**\n *  \u{3000} メールクリックがない\n */";
890        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
891
892        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
893        assert!(result.is_ok(), "Parsing should succeed without panic");
894
895        let document = result.unwrap();
896        assert_eq!(document.elements.len(), 1);
897        let Element::Code(code) = &document.elements[0] else {
898            panic!("Expected Element::Code, got {:?}", document.elements[0]);
899        };
900        assert_eq!(code.content, "メールクリックがない");
901    }
902
903    #[test]
904    fn test_multiline_inline_tag() {
905        let arena = Bump::new();
906        let phpdoc = r#"/**
907            * This method gets a count of the Foo.
908            * {@internal Developers should note that it silently
909            *            adds one extra Foo.}
910            *
911            * @return int
912            */"#;
913
914        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
915        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
916
917        let Element::Text(text) = &document.elements[0] else {
918            panic!("Expected Element::Text, got {:?}", document.elements[0]);
919        };
920
921        assert!(text.segments.len() >= 2, "Expected at least 2 segments, got {:?}", text.segments);
922
923        let has_inline_tag =
924            text.segments.iter().any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == "internal"));
925
926        assert!(has_inline_tag, "Expected an InlineTag with name 'internal', got segments: {:?}", text.segments);
927    }
928
929    #[test]
930    fn test_multiline_inline_tag_with_nested() {
931        let arena = Bump::new();
932        let phpdoc = r#"/**
933            * {@internal Developers should note that it silently
934            *            adds one extra Foo (see {@link http://example.com}).}
935            */"#;
936
937        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
938        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
939
940        let Element::Text(text) = &document.elements[0] else {
941            panic!("Expected Element::Text, got {:?}", document.elements[0]);
942        };
943
944        let has_internal_tag =
945            text.segments.iter().any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == "internal"));
946
947        assert!(has_internal_tag, "Expected an InlineTag with name 'internal', got segments: {:?}", text.segments);
948    }
949
950    #[test]
951    fn test_single_line_inline_tag_still_works() {
952        let arena = Bump::new();
953        let phpdoc = r#"/**
954            * See {@see \Some\Class} for details.
955            */"#;
956
957        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
958        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
959
960        let Element::Text(text) = &document.elements[0] else {
961            panic!("Expected Element::Text, got {:?}", document.elements[0]);
962        };
963
964        let has_see_tag =
965            text.segments.iter().any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == "see"));
966
967        assert!(has_see_tag, "Expected an InlineTag with name 'see', got segments: {:?}", text.segments);
968    }
969
970    #[test]
971    fn test_multiline_inline_tag_chinese() {
972        let arena = Bump::new();
973        let phpdoc = r#"/**
974            * 获取用户数量的方法。
975            * {@internal 开发者请注意,此方法会静默地
976            *            添加一个额外的用户。}
977            *
978            * @return int
979            */"#;
980
981        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
982        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse Chinese PHPDoc");
983
984        let Element::Text(text) = &document.elements[0] else {
985            panic!("Expected Element::Text, got {:?}", document.elements[0]);
986        };
987
988        let has_internal =
989            text.segments.iter().any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == "internal"));
990        assert!(has_internal, "Expected InlineTag 'internal' with Chinese content, got: {:?}", text.segments);
991    }
992
993    #[test]
994    fn test_multiline_inline_tag_japanese() {
995        let arena = Bump::new();
996        let phpdoc = r#"/**
997            * ユーザー数を取得するメソッド。
998            * {@see \App\Service\UserCounter このクラスは
999            *       ユーザーの数を数えます。}
1000            */"#;
1001
1002        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
1003        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse Japanese PHPDoc");
1004
1005        let Element::Text(text) = &document.elements[0] else {
1006            panic!("Expected Element::Text, got {:?}", document.elements[0]);
1007        };
1008
1009        let has_see = text.segments.iter().any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == "see"));
1010        assert!(has_see, "Expected InlineTag 'see' with Japanese content, got: {:?}", text.segments);
1011    }
1012
1013    #[test]
1014    fn test_multiline_inline_tag_arabic() {
1015        let arena = Bump::new();
1016        let phpdoc = r#"/**
1017            * طريقة للحصول على عدد المستخدمين.
1018            * {@internal يجب على المطورين ملاحظة أن هذه الطريقة
1019            *            تضيف مستخدمًا إضافيًا بصمت.}
1020            *
1021            * @return int
1022            */"#;
1023
1024        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
1025        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse Arabic PHPDoc");
1026
1027        let Element::Text(text) = &document.elements[0] else {
1028            panic!("Expected Element::Text, got {:?}", document.elements[0]);
1029        };
1030
1031        let has_internal =
1032            text.segments.iter().any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == "internal"));
1033        assert!(has_internal, "Expected InlineTag 'internal' with Arabic content, got: {:?}", text.segments);
1034    }
1035
1036    #[test]
1037    fn test_multiline_inline_tag_mixed_scripts() {
1038        let arena = Bump::new();
1039        let phpdoc = r#"/**
1040            * Documentation with mixed scripts.
1041            * {@internal 注意: This method は静かに adds один
1042            *            дополнительный элемент 요소를 추가합니다.}
1043            */"#;
1044
1045        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
1046        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse mixed-script PHPDoc");
1047
1048        let Element::Text(text) = &document.elements[0] else {
1049            panic!("Expected Element::Text, got {:?}", document.elements[0]);
1050        };
1051
1052        let has_internal =
1053            text.segments.iter().any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == "internal"));
1054        assert!(has_internal, "Expected InlineTag 'internal' with mixed-script content, got: {:?}", text.segments);
1055    }
1056}