Skip to main content

mago_docblock/
lib.rs

1use bumpalo::Bump;
2
3use mago_span::Span;
4use mago_syntax::ast::Trivia;
5use mago_syntax::ast::TriviaKind;
6
7use crate::document::Document;
8use crate::error::ParseError;
9
10mod internal;
11
12pub mod document;
13pub mod error;
14pub mod tag;
15
16/// Parses a docblock from a trivia token.
17///
18/// # Errors
19///
20/// Returns a [`ParseError`] if the trivia is not a docblock comment or parsing fails.
21#[inline]
22pub fn parse_trivia<'arena>(arena: &'arena Bump, trivia: &Trivia<'arena>) -> Result<Document<'arena>, ParseError> {
23    if TriviaKind::DocBlockComment != trivia.kind {
24        return Err(ParseError::InvalidTrivia(trivia.span));
25    }
26
27    parse_phpdoc_with_span(arena, trivia.value, trivia.span)
28}
29
30/// Parses a `PHPDoc` comment string with an associated span.
31///
32/// # Errors
33///
34/// Returns a [`ParseError`] if tokenization or parsing fails.
35#[inline]
36pub fn parse_phpdoc_with_span<'arena>(
37    arena: &'arena Bump,
38    content: &'arena [u8],
39    span: Span,
40) -> Result<Document<'arena>, ParseError> {
41    let tokens = internal::lexer::tokenize(content, span)?;
42
43    internal::parser::parse_document(span, tokens.as_slice(), arena)
44}
45
46#[cfg(test)]
47#[allow(clippy::unwrap_used, clippy::expect_used)]
48mod tests {
49    use super::*;
50
51    use mago_database::file::FileId;
52    use mago_span::HasSpan;
53    use mago_span::Position;
54    use mago_span::Span;
55
56    use crate::document::*;
57
58    #[test]
59    fn test_parse_all_elements() {
60        let arena = Bump::new();
61        let phpdoc = br#"/**
62            * This is a simple description.
63            *
64            * This text contains an inline code `echo "Hello, World!";`.
65            *
66            * This text contains an inline tag {@see \Some\Class}.
67            *
68            * ```php
69            * echo "Hello, World!";
70            * ```
71            *
72            *     $foo = "bar";
73            *     echo "Hello, World!";
74            *
75            * @param string $foo
76            * @param array{
77            *   bar: string,
78            *   baz: int
79            * } $bar
80            * @return void
81            */"#;
82
83        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
84        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
85        assert_eq!(document.elements.len(), 12);
86
87        let Element::Text(text) = &document.elements[0] else {
88            panic!("Expected Element::Text, got {:?}", document.elements[0]);
89        };
90
91        assert_eq!(text.segments.len(), 1);
92
93        let TextSegment::Paragraph { span, content } = text.segments[0] else {
94            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
95        };
96
97        assert_eq!(content, b"This is a simple description." as &[u8]);
98        assert_eq!(&phpdoc[span.start_offset() as usize..span.end_offset() as usize], b"This is a simple description.");
99
100        let Element::Line(_) = &document.elements[1] else {
101            panic!("Expected Element::Line, got {:?}", document.elements[1]);
102        };
103
104        let Element::Text(text) = &document.elements[2] else {
105            panic!("Expected Element::Text, got {:?}", document.elements[2]);
106        };
107
108        assert_eq!(text.segments.len(), 3);
109
110        let TextSegment::Paragraph { content, .. } = text.segments[0] else {
111            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
112        };
113
114        assert_eq!(content, b"This text contains an inline code " as &[u8]);
115
116        let TextSegment::InlineCode(code) = &text.segments[1] else {
117            panic!("Expected TextSegment::InlineCode, got {:?}", text.segments[1]);
118        };
119
120        let content = code.content;
121        assert_eq!(content, b"echo \"Hello, World!\";" as &[u8]);
122        assert_eq!(
123            &phpdoc[code.span.start_offset() as usize..code.span.end_offset() as usize],
124            b"`echo \"Hello, World!\";`"
125        );
126
127        let TextSegment::Paragraph { content, .. } = text.segments[2] else {
128            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[2]);
129        };
130
131        assert_eq!(content, b"." as &[u8]);
132
133        let Element::Line(_) = &document.elements[3] else {
134            panic!("Expected Element::Line, got {:?}", document.elements[3]);
135        };
136
137        let Element::Text(text) = &document.elements[4] else {
138            panic!("Expected Element::Text, got {:?}", document.elements[4]);
139        };
140
141        assert_eq!(text.segments.len(), 3);
142
143        let TextSegment::Paragraph { content, .. } = text.segments[0] else {
144            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
145        };
146
147        assert_eq!(content, b"This text contains an inline tag " as &[u8]);
148
149        let TextSegment::InlineTag(tag) = &text.segments[1] else {
150            panic!("Expected TextSegment::InlineTag, got {:?}", text.segments[1]);
151        };
152
153        let name = tag.name;
154        let description = tag.description;
155        assert_eq!(name, b"see" as &[u8]);
156        assert_eq!(description, b"\\Some\\Class" as &[u8]);
157        assert_eq!(tag.kind, TagKind::See);
158        assert_eq!(&phpdoc[tag.span.start_offset() as usize..tag.span.end_offset() as usize], b"{@see \\Some\\Class}");
159
160        let TextSegment::Paragraph { content, .. } = text.segments[2] else {
161            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[2]);
162        };
163
164        assert_eq!(content, b"." as &[u8]);
165
166        let Element::Line(_) = &document.elements[5] else {
167            panic!("Expected Element::Line, got {:?}", document.elements[5]);
168        };
169
170        let Element::Code(code) = &document.elements[6] else {
171            panic!("Expected Element::CodeBlock, got {:?}", document.elements[6]);
172        };
173
174        let content = code.content;
175        assert_eq!(code.directives.as_slice(), &[b"php" as &[u8]]);
176        assert_eq!(content, b"echo \"Hello, World!\";" as &[u8]);
177        assert_eq!(
178            &phpdoc[code.span.start_offset() as usize..code.span.end_offset() as usize],
179            "```php\n            * echo \"Hello, World!\";\n            * ```".as_bytes()
180        );
181
182        let Element::Line(_) = &document.elements[7] else {
183            panic!("Expected Element::Line, got {:?}", document.elements[7]);
184        };
185
186        let Element::Code(code) = &document.elements[8] else {
187            panic!("Expected Element::CodeBlock, got {:?}", document.elements[8]);
188        };
189
190        let content = code.content;
191        assert!(code.directives.is_empty());
192        assert_eq!(content, b"$foo = \"bar\";\necho \"Hello, World!\";\n" as &[u8]);
193        assert_eq!(
194            &phpdoc[code.span.start_offset() as usize..code.span.end_offset() as usize],
195            "    $foo = \"bar\";\n            *     echo \"Hello, World!\";\n".as_bytes()
196        );
197
198        let Element::Tag(tag) = &document.elements[9] else {
199            panic!("Expected Element::Tag, got {:?}", document.elements[9]);
200        };
201
202        let name = tag.name;
203        let description = tag.description;
204        assert_eq!(name, b"param" as &[u8]);
205        assert_eq!(tag.kind, TagKind::Param);
206        assert_eq!(description, b"string $foo" as &[u8]);
207        assert_eq!(&phpdoc[tag.span.start_offset() as usize..tag.span.end_offset() as usize], b"@param string $foo");
208
209        let Element::Tag(tag) = &document.elements[10] else {
210            panic!("Expected Element::Tag, got {:?}", document.elements[10]);
211        };
212
213        let name = tag.name;
214        let description = tag.description;
215        assert_eq!(name, b"param" as &[u8]);
216        assert_eq!(tag.kind, TagKind::Param);
217        assert_eq!(description, b"array{\n  bar: string,\n  baz: int\n} $bar" as &[u8]);
218        assert_eq!(
219            &phpdoc[tag.span.start_offset() as usize..tag.span.end_offset() as usize],
220            "@param array{\n            *   bar: string,\n            *   baz: int\n            * } $bar".as_bytes()
221        );
222
223        let Element::Tag(tag) = &document.elements[11] else {
224            panic!("Expected Element::Tag, got {:?}", document.elements[11]);
225        };
226
227        let name = tag.name;
228        let description = tag.description;
229        assert_eq!(name, b"return" as &[u8]);
230        assert_eq!(tag.kind, TagKind::Return);
231        assert_eq!(description, b"void" as &[u8]);
232        assert_eq!(&phpdoc[tag.span.start_offset() as usize..tag.span.end_offset() as usize], b"@return void");
233    }
234
235    #[test]
236    fn test_unclosed_inline_tag() {
237        // Test case for ParseError::UnclosedInlineTag
238        let arena = Bump::new();
239        let phpdoc = b"/** This is a doc block with an unclosed inline tag {@see Class */";
240        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
241
242        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
243
244        match result {
245            Err(ParseError::UnclosedInlineTag(error_span)) => {
246                let expected_start = memchr::memmem::find(phpdoc, b"{@see").unwrap();
247                let expected_span = span.subspan(expected_start as u32, phpdoc.len() as u32 - 3);
248                assert_eq!(error_span, expected_span);
249            }
250            _ => {
251                panic!("Expected ParseError::UnclosedInlineTag");
252            }
253        }
254    }
255
256    #[test]
257    fn test_unclosed_inline_code() {
258        // Test case for ParseError::UnclosedInlineCode
259        let arena = Bump::new();
260        let phpdoc = b"/** This is a doc block with unclosed inline code `code sample */";
261        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
262
263        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
264
265        match result {
266            Err(ParseError::UnclosedInlineCode(error_span)) => {
267                let expected_start = memchr::memchr(b'`', phpdoc).unwrap();
268                let expected_span = span.subspan(expected_start as u32, phpdoc.len() as u32 - 3);
269                assert_eq!(error_span, expected_span);
270            }
271            _ => {
272                panic!("Expected ParseError::UnclosedInlineCode");
273            }
274        }
275    }
276
277    #[test]
278    fn test_unclosed_code_block() {
279        let arena = Bump::new();
280        let phpdoc = b"/**
281            * This is a doc block with unclosed code block
282            * ```
283            * Some code here
284            */";
285        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
286
287        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
288
289        match result {
290            Err(ParseError::UnclosedCodeBlock(error_span)) => {
291                let code_block_start = memchr::memmem::find(phpdoc, b"```").unwrap();
292                let expected_span = span.subspan(code_block_start as u32, 109);
293                assert_eq!(error_span, expected_span);
294            }
295            _ => {
296                panic!("Expected ParseError::UnclosedCodeBlock");
297            }
298        }
299    }
300
301    #[test]
302    fn test_invalid_tag_name() {
303        // Test case for ParseError::InvalidTagName — use a character not valid in identifiers
304        let arena = Bump::new();
305        let phpdoc = b"/** @invalid!tag Description */";
306        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
307
308        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
309
310        assert!(
311            matches!(result, Err(ParseError::InvalidTagName(_))),
312            "Expected ParseError::InvalidTagName, got {result:?}"
313        );
314    }
315
316    #[test]
317    fn test_underscore_tag_name_is_valid() {
318        let arena = Bump::new();
319        let phpdoc = b"/** @some_tag Description */";
320        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
321
322        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
323        let Element::Tag(tag) = &document.elements[0] else {
324            panic!("Expected Element::Tag");
325        };
326        assert_eq!(tag.name, b"some_tag" as &[u8]);
327    }
328
329    #[test]
330    fn test_malformed_code_block() {
331        let arena = Bump::new();
332        let phpdoc = b"/**
333            * ```
334            * Some code here
335            * Incorrect closing
336            */";
337        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
338
339        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
340
341        match result {
342            Ok(document) => {
343                panic!("Expected the parser to return an error, got {document:#?}");
344            }
345            Err(ParseError::UnclosedCodeBlock(error_span)) => {
346                let code_block_start = memchr::memmem::find(phpdoc, b"```").unwrap();
347                let expected_span = span.subspan(code_block_start as u32, 82);
348                assert_eq!(error_span, expected_span);
349            }
350            _ => {
351                panic!("Expected ParseError::UnclosedCodeBlock");
352            }
353        }
354    }
355
356    #[test]
357    fn test_invalid_comment() {
358        // Test case for ParseError::InvalidComment
359        let arena = Bump::new();
360        let phpdoc = b"/* Not a valid doc block */";
361        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
362
363        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
364
365        match result {
366            Err(ParseError::InvalidComment(error_span)) => {
367                assert_eq!(error_span, span);
368            }
369            _ => {
370                panic!("Expected ParseError::InvalidComment");
371            }
372        }
373    }
374
375    #[test]
376    fn test_inconsistent_indentation() {
377        // Test case for ParseError::InconsistentIndentation
378        let arena = Bump::new();
379        let phpdoc = b"/**
380    * This is a doc block
381      * With inconsistent indentation
382    */";
383        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
384
385        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
386
387        match result {
388            Ok(document) => {
389                assert_eq!(document.elements.len(), 1);
390                let Element::Text(text) = &document.elements[0] else {
391                    panic!("Expected Element::Text, got {:?}", document.elements[0]);
392                };
393
394                assert_eq!(text.segments.len(), 1);
395                let TextSegment::Paragraph { span, content } = &text.segments[0] else {
396                    panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
397                };
398
399                assert_eq!(*content, b"This is a doc block\nWith inconsistent indentation" as &[u8]);
400                assert_eq!(
401                    &phpdoc[span.start_offset() as usize..span.end_offset() as usize],
402                    b"This is a doc block\n      * With inconsistent indentation"
403                );
404            }
405            _ => {
406                panic!("Expected ParseError::InconsistentIndentation");
407            }
408        }
409    }
410
411    #[test]
412    fn test_missing_asterisk() {
413        let arena = Bump::new();
414        let phpdoc = b"/**
415     This line is missing an asterisk
416     * This line is fine
417     */";
418        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
419
420        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
421
422        match result {
423            Ok(document) => {
424                assert_eq!(document.elements.len(), 1);
425                let Element::Text(text) = &document.elements[0] else {
426                    panic!("Expected Element::Text, got {:?}", document.elements[0]);
427                };
428
429                assert_eq!(text.segments.len(), 1);
430
431                let TextSegment::Paragraph { span, content } = &text.segments[0] else {
432                    panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
433                };
434
435                assert_eq!(*content, b"This line is missing an asterisk\nThis line is fine" as &[u8]);
436                assert_eq!(
437                    &phpdoc[span.start_offset() as usize..span.end_offset() as usize],
438                    b"This line is missing an asterisk\n     * This line is fine"
439                );
440            }
441            _ => {
442                panic!("Expected ParseError::MissingAsterisk");
443            }
444        }
445    }
446
447    #[test]
448    fn test_missing_whitespace_after_asterisk() {
449        let arena = Bump::new();
450        let phpdoc = b"/**
451     *This line is missing a space after asterisk
452     */";
453        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
454
455        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
456
457        match result {
458            Ok(document) => {
459                assert_eq!(document.elements.len(), 1);
460                let Element::Text(text) = &document.elements[0] else {
461                    panic!("Expected Element::Text, got {:?}", document.elements[0]);
462                };
463
464                assert_eq!(text.segments.len(), 1);
465                let TextSegment::Paragraph { span, content } = &text.segments[0] else {
466                    panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
467                };
468
469                assert_eq!(*content, b"This line is missing a space after asterisk" as &[u8]);
470                assert_eq!(
471                    &phpdoc[span.start_offset() as usize..span.end_offset() as usize],
472                    b"This line is missing a space after asterisk"
473                );
474            }
475            _ => {
476                panic!("Expected ParseError::MissingWhitespaceAfterAsterisk");
477            }
478        }
479    }
480
481    #[test]
482    fn test_missing_whitespace_after_opening_asterisk() {
483        let arena = Bump::new();
484        let phpdoc = b"/**This is a doc block without space after /** */";
485        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
486
487        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
488
489        match result {
490            Ok(document) => {
491                assert_eq!(document.elements.len(), 1);
492                let Element::Text(text) = &document.elements[0] else {
493                    panic!("Expected Element::Text, got {:?}", document.elements[0]);
494                };
495
496                assert_eq!(text.segments.len(), 1);
497                let TextSegment::Paragraph { span, content } = &text.segments[0] else {
498                    panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
499                };
500
501                assert_eq!(*content, b"This is a doc block without space after /**" as &[u8]);
502                assert_eq!(
503                    &phpdoc[span.start_offset() as usize..span.end_offset() as usize],
504                    b"This is a doc block without space after /**"
505                );
506            }
507            _ => {
508                panic!("Expected ParseError::MissingWhitespaceAfterOpeningAsterisk");
509            }
510        }
511    }
512
513    #[test]
514    fn test_missing_whitespace_before_closing_asterisk() {
515        let arena = Bump::new();
516        let phpdoc = b"/** This is a doc block without space before */*/";
517        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
518
519        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
520
521        match result {
522            Ok(document) => {
523                assert_eq!(document.elements.len(), 1);
524                let Element::Text(text) = &document.elements[0] else {
525                    panic!("Expected Element::Text, got {:?}", document.elements[0]);
526                };
527
528                assert_eq!(text.segments.len(), 1);
529                let TextSegment::Paragraph { span, content } = &text.segments[0] else {
530                    panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
531                };
532
533                assert_eq!(*content, b"This is a doc block without space before */" as &[u8]);
534                assert_eq!(
535                    &phpdoc[span.start_offset() as usize..span.end_offset() as usize],
536                    b"This is a doc block without space before */"
537                );
538            }
539            _ => {
540                panic!("Expected ParseError::MissingWhitespaceBeforeClosingAsterisk");
541            }
542        }
543    }
544
545    #[test]
546    fn test_utf8_characters() {
547        let arena = Bump::new();
548        let phpdoc = r#"/**
549    * هذا نص باللغة العربية.
550    * 这是一段中文。
551    * Here are some mathematical symbols: ∑, ∆, π, θ.
552    *
553    * ```php
554    * // Arabic comment
555    * echo "مرحبا بالعالم";
556    * // Chinese comment
557    * echo "你好,世界";
558    * // Math symbols in code
559    * $sum = $a + $b; // ∑
560    * ```
561    *
562    * @param string $مثال A parameter with an Arabic variable name.
563    * @return int 返回值是整数类型。
564    */"#
565        .as_bytes();
566
567        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
568        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
569
570        // Verify the number of elements parsed
571        assert_eq!(document.elements.len(), 6);
572
573        // First text element (Arabic text)
574        let Element::Text(text) = &document.elements[0] else {
575            panic!("Expected Element::Text, got {:?}", document.elements[0]);
576        };
577
578        assert_eq!(text.segments.len(), 1);
579
580        let TextSegment::Paragraph { span, content } = &text.segments[0] else {
581            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
582        };
583
584        assert_eq!(
585            *content,
586            "هذا نص باللغة العربية.\n这是一段中文。\nHere are some mathematical symbols: ∑, ∆, π, θ.".as_bytes()
587        );
588
589        assert_eq!(
590            &phpdoc[span.start_offset() as usize..span.end_offset() as usize],
591            "هذا نص باللغة العربية.\n    * 这是一段中文。\n    * Here are some mathematical symbols: ∑, ∆, π, θ."
592                .as_bytes()
593        );
594
595        // Empty line
596        let Element::Line(_) = &document.elements[1] else {
597            panic!("Expected Element::Line, got {:?}", document.elements[3]);
598        };
599
600        // Code block
601        let Element::Code(code) = &document.elements[2] else {
602            panic!("Expected Element::Code, got {:?}", document.elements[2]);
603        };
604
605        let content_str = code.content;
606        let expected_code = "// Arabic comment\necho \"مرحبا بالعالم\";\n// Chinese comment\necho \"你好,世界\";\n// Math symbols in code\n$sum = $a + $b; // ∑".as_bytes();
607        assert_eq!(content_str, expected_code);
608        assert_eq!(
609            &phpdoc[code.span.start_offset() as usize..code.span.end_offset() as usize],
610            "```php\n    * // Arabic comment\n    * echo \"مرحبا بالعالم\";\n    * // Chinese comment\n    * echo \"你好,世界\";\n    * // Math symbols in code\n    * $sum = $a + $b; // ∑\n    * ```".as_bytes()
611        );
612
613        // Empty line
614        let Element::Line(_) = &document.elements[3] else {
615            panic!("Expected Element::Line, got {:?}", document.elements[3]);
616        };
617
618        // @param tag with Arabic variable name
619        let Element::Tag(tag) = &document.elements[4] else {
620            panic!("Expected Element::Tag, got {:?}", document.elements[4]);
621        };
622
623        let name = tag.name;
624        let description = tag.description;
625        assert_eq!(name, b"param" as &[u8]);
626        assert_eq!(tag.kind, TagKind::Param);
627        assert_eq!(description, "string $مثال A parameter with an Arabic variable name.".as_bytes());
628        assert_eq!(
629            &phpdoc[tag.span.start_offset() as usize..tag.span.end_offset() as usize],
630            "@param string $مثال A parameter with an Arabic variable name.".as_bytes()
631        );
632
633        // @return tag with Chinese description
634        let Element::Tag(tag) = &document.elements[5] else {
635            panic!("Expected Element::Tag, got {:?}", document.elements[5]);
636        };
637
638        let name = tag.name;
639        let description = tag.description;
640        assert_eq!(name, b"return" as &[u8]);
641        assert_eq!(tag.kind, TagKind::Return);
642        assert_eq!(description, "int 返回值是整数类型。".as_bytes());
643        assert_eq!(
644            &phpdoc[tag.span.start_offset() as usize..tag.span.end_offset() as usize],
645            "@return int 返回值是整数类型。".as_bytes()
646        );
647    }
648
649    #[test]
650    fn test_annotation_parsing() {
651        let arena = Bump::new();
652        let phpdoc = br#"/**
653         * @Event("Symfony\Component\Workflow\Event\CompletedEvent")
654         * @AnotherAnnotation({
655         *     "key": "value",
656         *     "list": [1, 2, 3]
657         * })
658         * @SimpleAnnotation
659         */"#;
660        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
661        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
662
663        let Element::Tag(tag) = &document.elements[0] else {
664            panic!("Expected Element::Tag, got {:?}", document.elements[0]);
665        };
666
667        assert_eq!(tag.name, b"Event" as &[u8]);
668        assert_eq!(tag.metadata.unwrap(), b"(\"Symfony\\Component\\Workflow\\Event\\CompletedEvent\")" as &[u8]);
669
670        let Element::Tag(tag) = &document.elements[1] else {
671            panic!("Expected Element::Tag, got {:?}", document.elements[1]);
672        };
673        assert_eq!(tag.name, b"AnotherAnnotation" as &[u8]);
674
675        let last_idx = document.elements.len() - 1;
676        let Element::Tag(tag) = &document.elements[last_idx] else {
677            panic!("Expected Element::Tag, got {:?}", document.elements[last_idx]);
678        };
679        assert_eq!(tag.name, b"SimpleAnnotation" as &[u8]);
680        assert!(tag.metadata.is_none());
681    }
682
683    #[test]
684    fn test_long_description_with_missing_asterisk() {
685        let arena = Bump::new();
686        let phpdoc = b"/** @var string[] this is a really long description
687            that spans multiple lines, and demonstrates how the parser handles
688            docblocks with multiple descriptions, and missing astricks*/";
689        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
690        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
691
692        assert_eq!(document.elements.len(), 1);
693        let Element::Tag(tag) = &document.elements[0] else {
694            panic!("Expected Element::Tag, got {:?}", document.elements[0]);
695        };
696
697        let name = tag.name;
698        let description = tag.description;
699        assert_eq!(name, b"var" as &[u8]);
700        assert_eq!(tag.kind, TagKind::Var);
701        assert_eq!(
702            description,
703            b"string[] this is a really long description\nthat spans multiple lines, and demonstrates how the parser handles\ndocblocks with multiple descriptions, and missing astricks" as &[u8]
704        );
705        assert_eq!(
706            &phpdoc[tag.span.start_offset() as usize..tag.span.end_offset() as usize],
707            b"@var string[] this is a really long description\n            that spans multiple lines, and demonstrates how the parser handles\n            docblocks with multiple descriptions, and missing astricks"
708        );
709    }
710
711    #[test]
712    fn test_code_indent_using_non_ascii_chars() {
713        let arena = Bump::new();
714        let phpdoc = "/**
715        *    └─ comment 2
716        *       └─ comment 4
717        *    └─ comment 3
718        */"
719        .as_bytes();
720
721        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
722        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
723
724        assert_eq!(document.elements.len(), 1);
725
726        let Element::Code(code) = &document.elements[0] else {
727            panic!("Expected Element::Code, got {:?}", document.elements[0]);
728        };
729
730        let content_str = code.content;
731        assert_eq!(content_str, "\u{a0} └─ comment 2\n  \u{a0}\u{a0} └─ comment 4\n\u{a0} └─ comment 3".as_bytes());
732        assert_eq!(
733            &phpdoc[code.span.start_offset() as usize..code.span.end_offset() as usize],
734            " \u{a0} └─ comment 2\n        *    \u{a0}\u{a0} └─ comment 4\n        *  \u{a0} └─ comment 3".as_bytes()
735        );
736    }
737
738    #[test]
739    fn test_issue_456() {
740        let arena = Bump::new();
741        let phpdoc = "/**
742             * \u{3000}(イベント日数をもとに計算)\u{3000}
743             * @return\u{3000}int
744             * @throws\u{3000}Exception
745             */"
746        .as_bytes();
747
748        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
749        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
750
751        assert_eq!(document.elements.len(), 3);
752
753        let Element::Text(text) = &document.elements[0] else {
754            panic!("Expected Element::Text, got {:?}", document.elements[0]);
755        };
756
757        assert_eq!(text.segments.len(), 1);
758        let TextSegment::Paragraph { span, content } = &text.segments[0] else {
759            panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
760        };
761
762        assert_eq!(*content, "\u{3000}(イベント日数をもとに計算)\u{3000}".as_bytes());
763        assert_eq!(
764            &phpdoc[span.start_offset() as usize..span.end_offset() as usize],
765            "\u{3000}(イベント日数をもとに計算)\u{3000}".as_bytes()
766        );
767
768        let Element::Tag(tag) = &document.elements[1] else {
769            panic!("Expected Element::Tag, got {:?}", document.elements[1]);
770        };
771
772        let name = tag.name;
773        let description = tag.description;
774        assert_eq!(name, "return\u{3000}int".as_bytes());
775        assert_eq!(tag.kind, TagKind::Other);
776        assert_eq!(description, b"" as &[u8]);
777        assert_eq!(
778            &phpdoc[tag.span.start_offset() as usize..tag.span.end_offset() as usize],
779            "@return\u{3000}int".as_bytes()
780        );
781
782        let Element::Tag(tag) = &document.elements[2] else {
783            panic!("Expected Element::Tag, got {:?}", document.elements[2]);
784        };
785
786        let name = tag.name;
787        let description = tag.description;
788        assert_eq!(name, "throws\u{3000}Exception".as_bytes());
789        assert_eq!(tag.kind, TagKind::Other);
790        assert_eq!(description, b"" as &[u8]);
791        assert_eq!(
792            &phpdoc[tag.span.start_offset() as usize..tag.span.end_offset() as usize],
793            "@throws\u{3000}Exception".as_bytes()
794        );
795    }
796
797    #[test]
798    fn test_issue_808() {
799        let arena = Bump::new();
800
801        let phpdoc = "/** @param\u{3000}string $foo 中文描述 */".as_bytes();
802        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
803        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
804
805        assert_eq!(document.elements.len(), 1);
806        let Element::Tag(tag) = &document.elements[0] else {
807            panic!("Expected Element::Tag, got {:?}", document.elements[0]);
808        };
809        assert_eq!(tag.name, "param\u{3000}string".as_bytes());
810        assert_eq!(tag.description, "$foo 中文描述".as_bytes());
811
812        let phpdoc2 = "/** @return\u{3000}int 返回🎉值 */".as_bytes();
813        let span2 = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc2.len() as u32));
814        let document2 = parse_phpdoc_with_span(&arena, phpdoc2, span2).expect("Failed to parse PHPDoc");
815
816        assert_eq!(document2.elements.len(), 1);
817        let Element::Tag(tag2) = &document2.elements[0] else {
818            panic!("Expected Element::Tag, got {:?}", document2.elements[0]);
819        };
820        assert_eq!(tag2.name, "return\u{3000}int".as_bytes());
821        assert_eq!(tag2.description, "返回🎉值".as_bytes());
822
823        let phpdoc3 = "/** @see\u{3000}中文类::方法() 说明 */".as_bytes();
824        let span3 = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc3.len() as u32));
825        let document3 = parse_phpdoc_with_span(&arena, phpdoc3, span3).expect("Failed to parse PHPDoc");
826
827        assert_eq!(document3.elements.len(), 1);
828        let Element::Tag(tag3) = &document3.elements[0] else {
829            panic!("Expected Element::Tag, got {:?}", document3.elements[0]);
830        };
831        assert_eq!(tag3.name, "see\u{3000}中文类::方法".as_bytes());
832        assert_eq!(tag3.description, "说明".as_bytes());
833    }
834
835    #[test]
836    fn test_indented_code_with_fullwidth_space_in_indent() {
837        // Test case for multi-byte whitespace in indented code (Issue #967)
838        // parse_indented_code is only called when line starts with ASCII space/tab
839        // The bug occurs when indent contains full-width space after ASCII spaces
840        //
841        // After lexer processing, content becomes "  \u{3000}code"
842        // is_indented_line returns true (starts with ASCII space)
843        // indent_len = 3 (2 ASCII spaces + 1 full-width space char)
844        // But byte offset should be 2 + 3 = 5
845        let arena = Bump::new();
846        // Format: " * " (asterisk + space) + "  " (2 ASCII spaces) + "\u{3000}" (full-width) + "code"
847        let phpdoc = "/**\n *   \u{3000}code\n */".as_bytes();
848        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
849
850        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
851        assert!(result.is_ok(), "Parsing should succeed without panic");
852
853        let document = result.unwrap();
854        assert_eq!(document.elements.len(), 1);
855        let Element::Code(code) = &document.elements[0] else {
856            panic!("Expected Element::Code, got {:?}", document.elements[0]);
857        };
858        assert_eq!(code.content, "\u{3000}code".as_bytes());
859    }
860
861    #[test]
862    fn test_indented_code_with_mixed_multibyte_whitespace() {
863        // Multiple lines with mixed ASCII and full-width whitespace
864        let arena = Bump::new();
865        let phpdoc = "/**\n *  \u{3000}first line\n *  \u{3000}second line\n */".as_bytes();
866        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
867
868        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
869        assert!(result.is_ok(), "Parsing should succeed without panic");
870
871        let document = result.unwrap();
872        assert_eq!(document.elements.len(), 1);
873        let Element::Code(code) = &document.elements[0] else {
874            panic!("Expected Element::Code, got {:?}", document.elements[0]);
875        };
876        assert_eq!(code.content, "\u{3000}first line\n\u{3000}second line".as_bytes());
877    }
878
879    #[test]
880    fn test_indented_code_with_tab_and_fullwidth_space() {
881        // Tab + full-width space: is_indented_line checks for '\t' as well
882        let arena = Bump::new();
883        // After "* " there is a tab followed by full-width space
884        let phpdoc = "/**\n * \t\u{3000}code\n */".as_bytes();
885        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
886
887        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
888        assert!(result.is_ok(), "Parsing should succeed without panic");
889
890        let document = result.unwrap();
891        assert_eq!(document.elements.len(), 1);
892        let Element::Code(code) = &document.elements[0] else {
893            panic!("Expected Element::Code, got {:?}", document.elements[0]);
894        };
895        assert_eq!(code.content, "\u{3000}code".as_bytes());
896    }
897
898    #[test]
899    fn test_issue_967_original_pattern() {
900        // Original Issue #967 reproduction case
901        // Error: byte index 3 is not a char boundary; it is inside '\u{3000}' (bytes 1..4) of `   メールクリックがない`
902        // After lexer processing: " " + "\u{3000}" + " " + Japanese text
903        // This triggers parse_indented_code because line starts with ASCII space
904        let arena = Bump::new();
905        // Format: " * " + " " (1 ASCII space) + "\u{3000}" (full-width) + " " (1 ASCII space) + text
906        let phpdoc = "/**\n *  \u{3000} メールクリックがない\n */".as_bytes();
907        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
908
909        let result = parse_phpdoc_with_span(&arena, phpdoc, span);
910        assert!(result.is_ok(), "Parsing should succeed without panic");
911
912        let document = result.unwrap();
913        assert_eq!(document.elements.len(), 1);
914        let Element::Code(code) = &document.elements[0] else {
915            panic!("Expected Element::Code, got {:?}", document.elements[0]);
916        };
917        assert_eq!(code.content, "\u{3000} メールクリックがない".as_bytes());
918    }
919
920    #[test]
921    fn test_multiline_inline_tag() {
922        let arena = Bump::new();
923        let phpdoc = b"/**
924            * This method gets a count of the Foo.
925            * {@internal Developers should note that it silently
926            *            adds one extra Foo.}
927            *
928            * @return int
929            */";
930
931        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
932        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
933
934        let Element::Text(text) = &document.elements[0] else {
935            panic!("Expected Element::Text, got {:?}", document.elements[0]);
936        };
937
938        assert!(text.segments.len() >= 2, "Expected at least 2 segments, got {:?}", text.segments);
939
940        let has_inline_tag = text
941            .segments
942            .iter()
943            .any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == b"internal" as &[u8]));
944
945        assert!(has_inline_tag, "Expected an InlineTag with name 'internal', got segments: {:?}", text.segments);
946    }
947
948    #[test]
949    fn test_multiline_inline_tag_with_nested() {
950        let arena = Bump::new();
951        let phpdoc = b"/**
952            * {@internal Developers should note that it silently
953            *            adds one extra Foo (see {@link http://example.com}).}
954            */";
955
956        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
957        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
958
959        let Element::Text(text) = &document.elements[0] else {
960            panic!("Expected Element::Text, got {:?}", document.elements[0]);
961        };
962
963        let has_internal_tag = text
964            .segments
965            .iter()
966            .any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == b"internal" as &[u8]));
967
968        assert!(has_internal_tag, "Expected an InlineTag with name 'internal', got segments: {:?}", text.segments);
969    }
970
971    #[test]
972    fn test_single_line_inline_tag_still_works() {
973        let arena = Bump::new();
974        let phpdoc = br#"/**
975            * See {@see \Some\Class} for details.
976            */"#;
977
978        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
979        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
980
981        let Element::Text(text) = &document.elements[0] else {
982            panic!("Expected Element::Text, got {:?}", document.elements[0]);
983        };
984
985        let has_see_tag =
986            text.segments.iter().any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == b"see" as &[u8]));
987
988        assert!(has_see_tag, "Expected an InlineTag with name 'see', got segments: {:?}", text.segments);
989    }
990
991    #[test]
992    fn test_multiline_inline_tag_chinese() {
993        let arena = Bump::new();
994        let phpdoc = "/**
995            * 获取用户数量的方法。
996            * {@internal 开发者请注意,此方法会静默地
997            *            添加一个额外的用户。}
998            *
999            * @return int
1000            */"
1001        .as_bytes();
1002
1003        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
1004        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse Chinese PHPDoc");
1005
1006        let Element::Text(text) = &document.elements[0] else {
1007            panic!("Expected Element::Text, got {:?}", document.elements[0]);
1008        };
1009
1010        let has_internal = text
1011            .segments
1012            .iter()
1013            .any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == b"internal" as &[u8]));
1014        assert!(has_internal, "Expected InlineTag 'internal' with Chinese content, got: {:?}", text.segments);
1015    }
1016
1017    #[test]
1018    fn test_multiline_inline_tag_japanese() {
1019        let arena = Bump::new();
1020        let phpdoc = r#"/**
1021            * ユーザー数を取得するメソッド。
1022            * {@see \App\Service\UserCounter このクラスは
1023            *       ユーザーの数を数えます。}
1024            */"#
1025        .as_bytes();
1026
1027        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
1028        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse Japanese PHPDoc");
1029
1030        let Element::Text(text) = &document.elements[0] else {
1031            panic!("Expected Element::Text, got {:?}", document.elements[0]);
1032        };
1033
1034        let has_see =
1035            text.segments.iter().any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == b"see" as &[u8]));
1036        assert!(has_see, "Expected InlineTag 'see' with Japanese content, got: {:?}", text.segments);
1037    }
1038
1039    #[test]
1040    fn test_multiline_inline_tag_arabic() {
1041        let arena = Bump::new();
1042        let phpdoc = "/**
1043            * طريقة للحصول على عدد المستخدمين.
1044            * {@internal يجب على المطورين ملاحظة أن هذه الطريقة
1045            *            تضيف مستخدمًا إضافيًا بصمت.}
1046            *
1047            * @return int
1048            */"
1049        .as_bytes();
1050
1051        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
1052        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse Arabic PHPDoc");
1053
1054        let Element::Text(text) = &document.elements[0] else {
1055            panic!("Expected Element::Text, got {:?}", document.elements[0]);
1056        };
1057
1058        let has_internal = text
1059            .segments
1060            .iter()
1061            .any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == b"internal" as &[u8]));
1062        assert!(has_internal, "Expected InlineTag 'internal' with Arabic content, got: {:?}", text.segments);
1063    }
1064
1065    #[test]
1066    fn test_multiline_inline_tag_mixed_scripts() {
1067        let arena = Bump::new();
1068        let phpdoc = "/**
1069            * Documentation with mixed scripts.
1070            * {@internal 注意: This method は静かに adds один
1071            *            дополнительный элемент 요소를 추가합니다.}
1072            */"
1073        .as_bytes();
1074
1075        let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
1076        let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse mixed-script PHPDoc");
1077
1078        let Element::Text(text) = &document.elements[0] else {
1079            panic!("Expected Element::Text, got {:?}", document.elements[0]);
1080        };
1081
1082        let has_internal = text
1083            .segments
1084            .iter()
1085            .any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == b"internal" as &[u8]));
1086        assert!(has_internal, "Expected InlineTag 'internal' with mixed-script content, got: {:?}", text.segments);
1087    }
1088}