1use bumpalo::Bump;
2
3use mago_span::Span;
4use mago_syntax::ast::Trivia;
5use mago_syntax::ast::TriviaKind;
6
7use crate::document::Document;
8use crate::error::ParseError;
9
10mod internal;
11
12pub mod document;
13pub mod error;
14pub mod tag;
15
16#[inline]
22pub fn parse_trivia<'arena>(arena: &'arena Bump, trivia: &Trivia<'arena>) -> Result<Document<'arena>, ParseError> {
23 if TriviaKind::DocBlockComment != trivia.kind {
24 return Err(ParseError::InvalidTrivia(trivia.span));
25 }
26
27 parse_phpdoc_with_span(arena, trivia.value, trivia.span)
28}
29
30#[inline]
36pub fn parse_phpdoc_with_span<'arena>(
37 arena: &'arena Bump,
38 content: &'arena [u8],
39 span: Span,
40) -> Result<Document<'arena>, ParseError> {
41 let tokens = internal::lexer::tokenize(content, span)?;
42
43 internal::parser::parse_document(span, tokens.as_slice(), arena)
44}
45
46#[cfg(test)]
47#[allow(clippy::unwrap_used, clippy::expect_used)]
48mod tests {
49 use super::*;
50
51 use mago_database::file::FileId;
52 use mago_span::HasSpan;
53 use mago_span::Position;
54 use mago_span::Span;
55
56 use crate::document::*;
57
58 #[test]
59 fn test_parse_all_elements() {
60 let arena = Bump::new();
61 let phpdoc = br#"/**
62 * This is a simple description.
63 *
64 * This text contains an inline code `echo "Hello, World!";`.
65 *
66 * This text contains an inline tag {@see \Some\Class}.
67 *
68 * ```php
69 * echo "Hello, World!";
70 * ```
71 *
72 * $foo = "bar";
73 * echo "Hello, World!";
74 *
75 * @param string $foo
76 * @param array{
77 * bar: string,
78 * baz: int
79 * } $bar
80 * @return void
81 */"#;
82
83 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
84 let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
85 assert_eq!(document.elements.len(), 12);
86
87 let Element::Text(text) = &document.elements[0] else {
88 panic!("Expected Element::Text, got {:?}", document.elements[0]);
89 };
90
91 assert_eq!(text.segments.len(), 1);
92
93 let TextSegment::Paragraph { span, content } = text.segments[0] else {
94 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
95 };
96
97 assert_eq!(content, b"This is a simple description." as &[u8]);
98 assert_eq!(&phpdoc[span.start_offset() as usize..span.end_offset() as usize], b"This is a simple description.");
99
100 let Element::Line(_) = &document.elements[1] else {
101 panic!("Expected Element::Line, got {:?}", document.elements[1]);
102 };
103
104 let Element::Text(text) = &document.elements[2] else {
105 panic!("Expected Element::Text, got {:?}", document.elements[2]);
106 };
107
108 assert_eq!(text.segments.len(), 3);
109
110 let TextSegment::Paragraph { content, .. } = text.segments[0] else {
111 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
112 };
113
114 assert_eq!(content, b"This text contains an inline code " as &[u8]);
115
116 let TextSegment::InlineCode(code) = &text.segments[1] else {
117 panic!("Expected TextSegment::InlineCode, got {:?}", text.segments[1]);
118 };
119
120 let content = code.content;
121 assert_eq!(content, b"echo \"Hello, World!\";" as &[u8]);
122 assert_eq!(
123 &phpdoc[code.span.start_offset() as usize..code.span.end_offset() as usize],
124 b"`echo \"Hello, World!\";`"
125 );
126
127 let TextSegment::Paragraph { content, .. } = text.segments[2] else {
128 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[2]);
129 };
130
131 assert_eq!(content, b"." as &[u8]);
132
133 let Element::Line(_) = &document.elements[3] else {
134 panic!("Expected Element::Line, got {:?}", document.elements[3]);
135 };
136
137 let Element::Text(text) = &document.elements[4] else {
138 panic!("Expected Element::Text, got {:?}", document.elements[4]);
139 };
140
141 assert_eq!(text.segments.len(), 3);
142
143 let TextSegment::Paragraph { content, .. } = text.segments[0] else {
144 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
145 };
146
147 assert_eq!(content, b"This text contains an inline tag " as &[u8]);
148
149 let TextSegment::InlineTag(tag) = &text.segments[1] else {
150 panic!("Expected TextSegment::InlineTag, got {:?}", text.segments[1]);
151 };
152
153 let name = tag.name;
154 let description = tag.description;
155 assert_eq!(name, b"see" as &[u8]);
156 assert_eq!(description, b"\\Some\\Class" as &[u8]);
157 assert_eq!(tag.kind, TagKind::See);
158 assert_eq!(&phpdoc[tag.span.start_offset() as usize..tag.span.end_offset() as usize], b"{@see \\Some\\Class}");
159
160 let TextSegment::Paragraph { content, .. } = text.segments[2] else {
161 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[2]);
162 };
163
164 assert_eq!(content, b"." as &[u8]);
165
166 let Element::Line(_) = &document.elements[5] else {
167 panic!("Expected Element::Line, got {:?}", document.elements[5]);
168 };
169
170 let Element::Code(code) = &document.elements[6] else {
171 panic!("Expected Element::CodeBlock, got {:?}", document.elements[6]);
172 };
173
174 let content = code.content;
175 assert_eq!(code.directives.as_slice(), &[b"php" as &[u8]]);
176 assert_eq!(content, b"echo \"Hello, World!\";" as &[u8]);
177 assert_eq!(
178 &phpdoc[code.span.start_offset() as usize..code.span.end_offset() as usize],
179 "```php\n * echo \"Hello, World!\";\n * ```".as_bytes()
180 );
181
182 let Element::Line(_) = &document.elements[7] else {
183 panic!("Expected Element::Line, got {:?}", document.elements[7]);
184 };
185
186 let Element::Code(code) = &document.elements[8] else {
187 panic!("Expected Element::CodeBlock, got {:?}", document.elements[8]);
188 };
189
190 let content = code.content;
191 assert!(code.directives.is_empty());
192 assert_eq!(content, b"$foo = \"bar\";\necho \"Hello, World!\";\n" as &[u8]);
193 assert_eq!(
194 &phpdoc[code.span.start_offset() as usize..code.span.end_offset() as usize],
195 " $foo = \"bar\";\n * echo \"Hello, World!\";\n".as_bytes()
196 );
197
198 let Element::Tag(tag) = &document.elements[9] else {
199 panic!("Expected Element::Tag, got {:?}", document.elements[9]);
200 };
201
202 let name = tag.name;
203 let description = tag.description;
204 assert_eq!(name, b"param" as &[u8]);
205 assert_eq!(tag.kind, TagKind::Param);
206 assert_eq!(description, b"string $foo" as &[u8]);
207 assert_eq!(&phpdoc[tag.span.start_offset() as usize..tag.span.end_offset() as usize], b"@param string $foo");
208
209 let Element::Tag(tag) = &document.elements[10] else {
210 panic!("Expected Element::Tag, got {:?}", document.elements[10]);
211 };
212
213 let name = tag.name;
214 let description = tag.description;
215 assert_eq!(name, b"param" as &[u8]);
216 assert_eq!(tag.kind, TagKind::Param);
217 assert_eq!(description, b"array{\n bar: string,\n baz: int\n} $bar" as &[u8]);
218 assert_eq!(
219 &phpdoc[tag.span.start_offset() as usize..tag.span.end_offset() as usize],
220 "@param array{\n * bar: string,\n * baz: int\n * } $bar".as_bytes()
221 );
222
223 let Element::Tag(tag) = &document.elements[11] else {
224 panic!("Expected Element::Tag, got {:?}", document.elements[11]);
225 };
226
227 let name = tag.name;
228 let description = tag.description;
229 assert_eq!(name, b"return" as &[u8]);
230 assert_eq!(tag.kind, TagKind::Return);
231 assert_eq!(description, b"void" as &[u8]);
232 assert_eq!(&phpdoc[tag.span.start_offset() as usize..tag.span.end_offset() as usize], b"@return void");
233 }
234
235 #[test]
236 fn test_unclosed_inline_tag() {
237 let arena = Bump::new();
239 let phpdoc = b"/** This is a doc block with an unclosed inline tag {@see Class */";
240 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
241
242 let result = parse_phpdoc_with_span(&arena, phpdoc, span);
243
244 match result {
245 Err(ParseError::UnclosedInlineTag(error_span)) => {
246 let expected_start = memchr::memmem::find(phpdoc, b"{@see").unwrap();
247 let expected_span = span.subspan(expected_start as u32, phpdoc.len() as u32 - 3);
248 assert_eq!(error_span, expected_span);
249 }
250 _ => {
251 panic!("Expected ParseError::UnclosedInlineTag");
252 }
253 }
254 }
255
256 #[test]
257 fn test_unclosed_inline_code() {
258 let arena = Bump::new();
260 let phpdoc = b"/** This is a doc block with unclosed inline code `code sample */";
261 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
262
263 let result = parse_phpdoc_with_span(&arena, phpdoc, span);
264
265 match result {
266 Err(ParseError::UnclosedInlineCode(error_span)) => {
267 let expected_start = memchr::memchr(b'`', phpdoc).unwrap();
268 let expected_span = span.subspan(expected_start as u32, phpdoc.len() as u32 - 3);
269 assert_eq!(error_span, expected_span);
270 }
271 _ => {
272 panic!("Expected ParseError::UnclosedInlineCode");
273 }
274 }
275 }
276
277 #[test]
278 fn test_unclosed_code_block() {
279 let arena = Bump::new();
280 let phpdoc = b"/**
281 * This is a doc block with unclosed code block
282 * ```
283 * Some code here
284 */";
285 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
286
287 let result = parse_phpdoc_with_span(&arena, phpdoc, span);
288
289 match result {
290 Err(ParseError::UnclosedCodeBlock(error_span)) => {
291 let code_block_start = memchr::memmem::find(phpdoc, b"```").unwrap();
292 let expected_span = span.subspan(code_block_start as u32, 109);
293 assert_eq!(error_span, expected_span);
294 }
295 _ => {
296 panic!("Expected ParseError::UnclosedCodeBlock");
297 }
298 }
299 }
300
301 #[test]
302 fn test_invalid_tag_name() {
303 let arena = Bump::new();
305 let phpdoc = b"/** @invalid!tag Description */";
306 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
307
308 let result = parse_phpdoc_with_span(&arena, phpdoc, span);
309
310 assert!(
311 matches!(result, Err(ParseError::InvalidTagName(_))),
312 "Expected ParseError::InvalidTagName, got {result:?}"
313 );
314 }
315
316 #[test]
317 fn test_underscore_tag_name_is_valid() {
318 let arena = Bump::new();
319 let phpdoc = b"/** @some_tag Description */";
320 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
321
322 let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
323 let Element::Tag(tag) = &document.elements[0] else {
324 panic!("Expected Element::Tag");
325 };
326 assert_eq!(tag.name, b"some_tag" as &[u8]);
327 }
328
329 #[test]
330 fn test_malformed_code_block() {
331 let arena = Bump::new();
332 let phpdoc = b"/**
333 * ```
334 * Some code here
335 * Incorrect closing
336 */";
337 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
338
339 let result = parse_phpdoc_with_span(&arena, phpdoc, span);
340
341 match result {
342 Ok(document) => {
343 panic!("Expected the parser to return an error, got {document:#?}");
344 }
345 Err(ParseError::UnclosedCodeBlock(error_span)) => {
346 let code_block_start = memchr::memmem::find(phpdoc, b"```").unwrap();
347 let expected_span = span.subspan(code_block_start as u32, 82);
348 assert_eq!(error_span, expected_span);
349 }
350 _ => {
351 panic!("Expected ParseError::UnclosedCodeBlock");
352 }
353 }
354 }
355
356 #[test]
357 fn test_invalid_comment() {
358 let arena = Bump::new();
360 let phpdoc = b"/* Not a valid doc block */";
361 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
362
363 let result = parse_phpdoc_with_span(&arena, phpdoc, span);
364
365 match result {
366 Err(ParseError::InvalidComment(error_span)) => {
367 assert_eq!(error_span, span);
368 }
369 _ => {
370 panic!("Expected ParseError::InvalidComment");
371 }
372 }
373 }
374
375 #[test]
376 fn test_inconsistent_indentation() {
377 let arena = Bump::new();
379 let phpdoc = b"/**
380 * This is a doc block
381 * With inconsistent indentation
382 */";
383 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
384
385 let result = parse_phpdoc_with_span(&arena, phpdoc, span);
386
387 match result {
388 Ok(document) => {
389 assert_eq!(document.elements.len(), 1);
390 let Element::Text(text) = &document.elements[0] else {
391 panic!("Expected Element::Text, got {:?}", document.elements[0]);
392 };
393
394 assert_eq!(text.segments.len(), 1);
395 let TextSegment::Paragraph { span, content } = &text.segments[0] else {
396 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
397 };
398
399 assert_eq!(*content, b"This is a doc block\nWith inconsistent indentation" as &[u8]);
400 assert_eq!(
401 &phpdoc[span.start_offset() as usize..span.end_offset() as usize],
402 b"This is a doc block\n * With inconsistent indentation"
403 );
404 }
405 _ => {
406 panic!("Expected ParseError::InconsistentIndentation");
407 }
408 }
409 }
410
411 #[test]
412 fn test_missing_asterisk() {
413 let arena = Bump::new();
414 let phpdoc = b"/**
415 This line is missing an asterisk
416 * This line is fine
417 */";
418 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
419
420 let result = parse_phpdoc_with_span(&arena, phpdoc, span);
421
422 match result {
423 Ok(document) => {
424 assert_eq!(document.elements.len(), 1);
425 let Element::Text(text) = &document.elements[0] else {
426 panic!("Expected Element::Text, got {:?}", document.elements[0]);
427 };
428
429 assert_eq!(text.segments.len(), 1);
430
431 let TextSegment::Paragraph { span, content } = &text.segments[0] else {
432 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
433 };
434
435 assert_eq!(*content, b"This line is missing an asterisk\nThis line is fine" as &[u8]);
436 assert_eq!(
437 &phpdoc[span.start_offset() as usize..span.end_offset() as usize],
438 b"This line is missing an asterisk\n * This line is fine"
439 );
440 }
441 _ => {
442 panic!("Expected ParseError::MissingAsterisk");
443 }
444 }
445 }
446
447 #[test]
448 fn test_missing_whitespace_after_asterisk() {
449 let arena = Bump::new();
450 let phpdoc = b"/**
451 *This line is missing a space after asterisk
452 */";
453 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
454
455 let result = parse_phpdoc_with_span(&arena, phpdoc, span);
456
457 match result {
458 Ok(document) => {
459 assert_eq!(document.elements.len(), 1);
460 let Element::Text(text) = &document.elements[0] else {
461 panic!("Expected Element::Text, got {:?}", document.elements[0]);
462 };
463
464 assert_eq!(text.segments.len(), 1);
465 let TextSegment::Paragraph { span, content } = &text.segments[0] else {
466 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
467 };
468
469 assert_eq!(*content, b"This line is missing a space after asterisk" as &[u8]);
470 assert_eq!(
471 &phpdoc[span.start_offset() as usize..span.end_offset() as usize],
472 b"This line is missing a space after asterisk"
473 );
474 }
475 _ => {
476 panic!("Expected ParseError::MissingWhitespaceAfterAsterisk");
477 }
478 }
479 }
480
481 #[test]
482 fn test_missing_whitespace_after_opening_asterisk() {
483 let arena = Bump::new();
484 let phpdoc = b"/**This is a doc block without space after /** */";
485 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
486
487 let result = parse_phpdoc_with_span(&arena, phpdoc, span);
488
489 match result {
490 Ok(document) => {
491 assert_eq!(document.elements.len(), 1);
492 let Element::Text(text) = &document.elements[0] else {
493 panic!("Expected Element::Text, got {:?}", document.elements[0]);
494 };
495
496 assert_eq!(text.segments.len(), 1);
497 let TextSegment::Paragraph { span, content } = &text.segments[0] else {
498 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
499 };
500
501 assert_eq!(*content, b"This is a doc block without space after /**" as &[u8]);
502 assert_eq!(
503 &phpdoc[span.start_offset() as usize..span.end_offset() as usize],
504 b"This is a doc block without space after /**"
505 );
506 }
507 _ => {
508 panic!("Expected ParseError::MissingWhitespaceAfterOpeningAsterisk");
509 }
510 }
511 }
512
513 #[test]
514 fn test_missing_whitespace_before_closing_asterisk() {
515 let arena = Bump::new();
516 let phpdoc = b"/** This is a doc block without space before */*/";
517 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
518
519 let result = parse_phpdoc_with_span(&arena, phpdoc, span);
520
521 match result {
522 Ok(document) => {
523 assert_eq!(document.elements.len(), 1);
524 let Element::Text(text) = &document.elements[0] else {
525 panic!("Expected Element::Text, got {:?}", document.elements[0]);
526 };
527
528 assert_eq!(text.segments.len(), 1);
529 let TextSegment::Paragraph { span, content } = &text.segments[0] else {
530 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
531 };
532
533 assert_eq!(*content, b"This is a doc block without space before */" as &[u8]);
534 assert_eq!(
535 &phpdoc[span.start_offset() as usize..span.end_offset() as usize],
536 b"This is a doc block without space before */"
537 );
538 }
539 _ => {
540 panic!("Expected ParseError::MissingWhitespaceBeforeClosingAsterisk");
541 }
542 }
543 }
544
545 #[test]
546 fn test_utf8_characters() {
547 let arena = Bump::new();
548 let phpdoc = r#"/**
549 * هذا نص باللغة العربية.
550 * 这是一段中文。
551 * Here are some mathematical symbols: ∑, ∆, π, θ.
552 *
553 * ```php
554 * // Arabic comment
555 * echo "مرحبا بالعالم";
556 * // Chinese comment
557 * echo "你好,世界";
558 * // Math symbols in code
559 * $sum = $a + $b; // ∑
560 * ```
561 *
562 * @param string $مثال A parameter with an Arabic variable name.
563 * @return int 返回值是整数类型。
564 */"#
565 .as_bytes();
566
567 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
568 let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
569
570 assert_eq!(document.elements.len(), 6);
572
573 let Element::Text(text) = &document.elements[0] else {
575 panic!("Expected Element::Text, got {:?}", document.elements[0]);
576 };
577
578 assert_eq!(text.segments.len(), 1);
579
580 let TextSegment::Paragraph { span, content } = &text.segments[0] else {
581 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
582 };
583
584 assert_eq!(
585 *content,
586 "هذا نص باللغة العربية.\n这是一段中文。\nHere are some mathematical symbols: ∑, ∆, π, θ.".as_bytes()
587 );
588
589 assert_eq!(
590 &phpdoc[span.start_offset() as usize..span.end_offset() as usize],
591 "هذا نص باللغة العربية.\n * 这是一段中文。\n * Here are some mathematical symbols: ∑, ∆, π, θ."
592 .as_bytes()
593 );
594
595 let Element::Line(_) = &document.elements[1] else {
597 panic!("Expected Element::Line, got {:?}", document.elements[3]);
598 };
599
600 let Element::Code(code) = &document.elements[2] else {
602 panic!("Expected Element::Code, got {:?}", document.elements[2]);
603 };
604
605 let content_str = code.content;
606 let expected_code = "// Arabic comment\necho \"مرحبا بالعالم\";\n// Chinese comment\necho \"你好,世界\";\n// Math symbols in code\n$sum = $a + $b; // ∑".as_bytes();
607 assert_eq!(content_str, expected_code);
608 assert_eq!(
609 &phpdoc[code.span.start_offset() as usize..code.span.end_offset() as usize],
610 "```php\n * // Arabic comment\n * echo \"مرحبا بالعالم\";\n * // Chinese comment\n * echo \"你好,世界\";\n * // Math symbols in code\n * $sum = $a + $b; // ∑\n * ```".as_bytes()
611 );
612
613 let Element::Line(_) = &document.elements[3] else {
615 panic!("Expected Element::Line, got {:?}", document.elements[3]);
616 };
617
618 let Element::Tag(tag) = &document.elements[4] else {
620 panic!("Expected Element::Tag, got {:?}", document.elements[4]);
621 };
622
623 let name = tag.name;
624 let description = tag.description;
625 assert_eq!(name, b"param" as &[u8]);
626 assert_eq!(tag.kind, TagKind::Param);
627 assert_eq!(description, "string $مثال A parameter with an Arabic variable name.".as_bytes());
628 assert_eq!(
629 &phpdoc[tag.span.start_offset() as usize..tag.span.end_offset() as usize],
630 "@param string $مثال A parameter with an Arabic variable name.".as_bytes()
631 );
632
633 let Element::Tag(tag) = &document.elements[5] else {
635 panic!("Expected Element::Tag, got {:?}", document.elements[5]);
636 };
637
638 let name = tag.name;
639 let description = tag.description;
640 assert_eq!(name, b"return" as &[u8]);
641 assert_eq!(tag.kind, TagKind::Return);
642 assert_eq!(description, "int 返回值是整数类型。".as_bytes());
643 assert_eq!(
644 &phpdoc[tag.span.start_offset() as usize..tag.span.end_offset() as usize],
645 "@return int 返回值是整数类型。".as_bytes()
646 );
647 }
648
649 #[test]
650 fn test_annotation_parsing() {
651 let arena = Bump::new();
652 let phpdoc = br#"/**
653 * @Event("Symfony\Component\Workflow\Event\CompletedEvent")
654 * @AnotherAnnotation({
655 * "key": "value",
656 * "list": [1, 2, 3]
657 * })
658 * @SimpleAnnotation
659 */"#;
660 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
661 let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
662
663 let Element::Tag(tag) = &document.elements[0] else {
664 panic!("Expected Element::Tag, got {:?}", document.elements[0]);
665 };
666
667 assert_eq!(tag.name, b"Event" as &[u8]);
668 assert_eq!(tag.metadata.unwrap(), b"(\"Symfony\\Component\\Workflow\\Event\\CompletedEvent\")" as &[u8]);
669
670 let Element::Tag(tag) = &document.elements[1] else {
671 panic!("Expected Element::Tag, got {:?}", document.elements[1]);
672 };
673 assert_eq!(tag.name, b"AnotherAnnotation" as &[u8]);
674
675 let last_idx = document.elements.len() - 1;
676 let Element::Tag(tag) = &document.elements[last_idx] else {
677 panic!("Expected Element::Tag, got {:?}", document.elements[last_idx]);
678 };
679 assert_eq!(tag.name, b"SimpleAnnotation" as &[u8]);
680 assert!(tag.metadata.is_none());
681 }
682
683 #[test]
684 fn test_long_description_with_missing_asterisk() {
685 let arena = Bump::new();
686 let phpdoc = b"/** @var string[] this is a really long description
687 that spans multiple lines, and demonstrates how the parser handles
688 docblocks with multiple descriptions, and missing astricks*/";
689 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
690 let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
691
692 assert_eq!(document.elements.len(), 1);
693 let Element::Tag(tag) = &document.elements[0] else {
694 panic!("Expected Element::Tag, got {:?}", document.elements[0]);
695 };
696
697 let name = tag.name;
698 let description = tag.description;
699 assert_eq!(name, b"var" as &[u8]);
700 assert_eq!(tag.kind, TagKind::Var);
701 assert_eq!(
702 description,
703 b"string[] this is a really long description\nthat spans multiple lines, and demonstrates how the parser handles\ndocblocks with multiple descriptions, and missing astricks" as &[u8]
704 );
705 assert_eq!(
706 &phpdoc[tag.span.start_offset() as usize..tag.span.end_offset() as usize],
707 b"@var string[] this is a really long description\n that spans multiple lines, and demonstrates how the parser handles\n docblocks with multiple descriptions, and missing astricks"
708 );
709 }
710
711 #[test]
712 fn test_code_indent_using_non_ascii_chars() {
713 let arena = Bump::new();
714 let phpdoc = "/**
715 * └─ comment 2
716 * └─ comment 4
717 * └─ comment 3
718 */"
719 .as_bytes();
720
721 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
722 let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
723
724 assert_eq!(document.elements.len(), 1);
725
726 let Element::Code(code) = &document.elements[0] else {
727 panic!("Expected Element::Code, got {:?}", document.elements[0]);
728 };
729
730 let content_str = code.content;
731 assert_eq!(content_str, "\u{a0} └─ comment 2\n \u{a0}\u{a0} └─ comment 4\n\u{a0} └─ comment 3".as_bytes());
732 assert_eq!(
733 &phpdoc[code.span.start_offset() as usize..code.span.end_offset() as usize],
734 " \u{a0} └─ comment 2\n * \u{a0}\u{a0} └─ comment 4\n * \u{a0} └─ comment 3".as_bytes()
735 );
736 }
737
738 #[test]
739 fn test_issue_456() {
740 let arena = Bump::new();
741 let phpdoc = "/**
742 * \u{3000}(イベント日数をもとに計算)\u{3000}
743 * @return\u{3000}int
744 * @throws\u{3000}Exception
745 */"
746 .as_bytes();
747
748 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
749 let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
750
751 assert_eq!(document.elements.len(), 3);
752
753 let Element::Text(text) = &document.elements[0] else {
754 panic!("Expected Element::Text, got {:?}", document.elements[0]);
755 };
756
757 assert_eq!(text.segments.len(), 1);
758 let TextSegment::Paragraph { span, content } = &text.segments[0] else {
759 panic!("Expected TextSegment::Paragraph, got {:?}", text.segments[0]);
760 };
761
762 assert_eq!(*content, "\u{3000}(イベント日数をもとに計算)\u{3000}".as_bytes());
763 assert_eq!(
764 &phpdoc[span.start_offset() as usize..span.end_offset() as usize],
765 "\u{3000}(イベント日数をもとに計算)\u{3000}".as_bytes()
766 );
767
768 let Element::Tag(tag) = &document.elements[1] else {
769 panic!("Expected Element::Tag, got {:?}", document.elements[1]);
770 };
771
772 let name = tag.name;
773 let description = tag.description;
774 assert_eq!(name, "return\u{3000}int".as_bytes());
775 assert_eq!(tag.kind, TagKind::Other);
776 assert_eq!(description, b"" as &[u8]);
777 assert_eq!(
778 &phpdoc[tag.span.start_offset() as usize..tag.span.end_offset() as usize],
779 "@return\u{3000}int".as_bytes()
780 );
781
782 let Element::Tag(tag) = &document.elements[2] else {
783 panic!("Expected Element::Tag, got {:?}", document.elements[2]);
784 };
785
786 let name = tag.name;
787 let description = tag.description;
788 assert_eq!(name, "throws\u{3000}Exception".as_bytes());
789 assert_eq!(tag.kind, TagKind::Other);
790 assert_eq!(description, b"" as &[u8]);
791 assert_eq!(
792 &phpdoc[tag.span.start_offset() as usize..tag.span.end_offset() as usize],
793 "@throws\u{3000}Exception".as_bytes()
794 );
795 }
796
797 #[test]
798 fn test_issue_808() {
799 let arena = Bump::new();
800
801 let phpdoc = "/** @param\u{3000}string $foo 中文描述 */".as_bytes();
802 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
803 let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
804
805 assert_eq!(document.elements.len(), 1);
806 let Element::Tag(tag) = &document.elements[0] else {
807 panic!("Expected Element::Tag, got {:?}", document.elements[0]);
808 };
809 assert_eq!(tag.name, "param\u{3000}string".as_bytes());
810 assert_eq!(tag.description, "$foo 中文描述".as_bytes());
811
812 let phpdoc2 = "/** @return\u{3000}int 返回🎉值 */".as_bytes();
813 let span2 = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc2.len() as u32));
814 let document2 = parse_phpdoc_with_span(&arena, phpdoc2, span2).expect("Failed to parse PHPDoc");
815
816 assert_eq!(document2.elements.len(), 1);
817 let Element::Tag(tag2) = &document2.elements[0] else {
818 panic!("Expected Element::Tag, got {:?}", document2.elements[0]);
819 };
820 assert_eq!(tag2.name, "return\u{3000}int".as_bytes());
821 assert_eq!(tag2.description, "返回🎉值".as_bytes());
822
823 let phpdoc3 = "/** @see\u{3000}中文类::方法() 说明 */".as_bytes();
824 let span3 = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc3.len() as u32));
825 let document3 = parse_phpdoc_with_span(&arena, phpdoc3, span3).expect("Failed to parse PHPDoc");
826
827 assert_eq!(document3.elements.len(), 1);
828 let Element::Tag(tag3) = &document3.elements[0] else {
829 panic!("Expected Element::Tag, got {:?}", document3.elements[0]);
830 };
831 assert_eq!(tag3.name, "see\u{3000}中文类::方法".as_bytes());
832 assert_eq!(tag3.description, "说明".as_bytes());
833 }
834
835 #[test]
836 fn test_indented_code_with_fullwidth_space_in_indent() {
837 let arena = Bump::new();
846 let phpdoc = "/**\n * \u{3000}code\n */".as_bytes();
848 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
849
850 let result = parse_phpdoc_with_span(&arena, phpdoc, span);
851 assert!(result.is_ok(), "Parsing should succeed without panic");
852
853 let document = result.unwrap();
854 assert_eq!(document.elements.len(), 1);
855 let Element::Code(code) = &document.elements[0] else {
856 panic!("Expected Element::Code, got {:?}", document.elements[0]);
857 };
858 assert_eq!(code.content, "\u{3000}code".as_bytes());
859 }
860
861 #[test]
862 fn test_indented_code_with_mixed_multibyte_whitespace() {
863 let arena = Bump::new();
865 let phpdoc = "/**\n * \u{3000}first line\n * \u{3000}second line\n */".as_bytes();
866 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
867
868 let result = parse_phpdoc_with_span(&arena, phpdoc, span);
869 assert!(result.is_ok(), "Parsing should succeed without panic");
870
871 let document = result.unwrap();
872 assert_eq!(document.elements.len(), 1);
873 let Element::Code(code) = &document.elements[0] else {
874 panic!("Expected Element::Code, got {:?}", document.elements[0]);
875 };
876 assert_eq!(code.content, "\u{3000}first line\n\u{3000}second line".as_bytes());
877 }
878
879 #[test]
880 fn test_indented_code_with_tab_and_fullwidth_space() {
881 let arena = Bump::new();
883 let phpdoc = "/**\n * \t\u{3000}code\n */".as_bytes();
885 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
886
887 let result = parse_phpdoc_with_span(&arena, phpdoc, span);
888 assert!(result.is_ok(), "Parsing should succeed without panic");
889
890 let document = result.unwrap();
891 assert_eq!(document.elements.len(), 1);
892 let Element::Code(code) = &document.elements[0] else {
893 panic!("Expected Element::Code, got {:?}", document.elements[0]);
894 };
895 assert_eq!(code.content, "\u{3000}code".as_bytes());
896 }
897
898 #[test]
899 fn test_issue_967_original_pattern() {
900 let arena = Bump::new();
905 let phpdoc = "/**\n * \u{3000} メールクリックがない\n */".as_bytes();
907 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
908
909 let result = parse_phpdoc_with_span(&arena, phpdoc, span);
910 assert!(result.is_ok(), "Parsing should succeed without panic");
911
912 let document = result.unwrap();
913 assert_eq!(document.elements.len(), 1);
914 let Element::Code(code) = &document.elements[0] else {
915 panic!("Expected Element::Code, got {:?}", document.elements[0]);
916 };
917 assert_eq!(code.content, "\u{3000} メールクリックがない".as_bytes());
918 }
919
920 #[test]
921 fn test_multiline_inline_tag() {
922 let arena = Bump::new();
923 let phpdoc = b"/**
924 * This method gets a count of the Foo.
925 * {@internal Developers should note that it silently
926 * adds one extra Foo.}
927 *
928 * @return int
929 */";
930
931 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
932 let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
933
934 let Element::Text(text) = &document.elements[0] else {
935 panic!("Expected Element::Text, got {:?}", document.elements[0]);
936 };
937
938 assert!(text.segments.len() >= 2, "Expected at least 2 segments, got {:?}", text.segments);
939
940 let has_inline_tag = text
941 .segments
942 .iter()
943 .any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == b"internal" as &[u8]));
944
945 assert!(has_inline_tag, "Expected an InlineTag with name 'internal', got segments: {:?}", text.segments);
946 }
947
948 #[test]
949 fn test_multiline_inline_tag_with_nested() {
950 let arena = Bump::new();
951 let phpdoc = b"/**
952 * {@internal Developers should note that it silently
953 * adds one extra Foo (see {@link http://example.com}).}
954 */";
955
956 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
957 let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
958
959 let Element::Text(text) = &document.elements[0] else {
960 panic!("Expected Element::Text, got {:?}", document.elements[0]);
961 };
962
963 let has_internal_tag = text
964 .segments
965 .iter()
966 .any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == b"internal" as &[u8]));
967
968 assert!(has_internal_tag, "Expected an InlineTag with name 'internal', got segments: {:?}", text.segments);
969 }
970
971 #[test]
972 fn test_single_line_inline_tag_still_works() {
973 let arena = Bump::new();
974 let phpdoc = br#"/**
975 * See {@see \Some\Class} for details.
976 */"#;
977
978 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
979 let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse PHPDoc");
980
981 let Element::Text(text) = &document.elements[0] else {
982 panic!("Expected Element::Text, got {:?}", document.elements[0]);
983 };
984
985 let has_see_tag =
986 text.segments.iter().any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == b"see" as &[u8]));
987
988 assert!(has_see_tag, "Expected an InlineTag with name 'see', got segments: {:?}", text.segments);
989 }
990
991 #[test]
992 fn test_multiline_inline_tag_chinese() {
993 let arena = Bump::new();
994 let phpdoc = "/**
995 * 获取用户数量的方法。
996 * {@internal 开发者请注意,此方法会静默地
997 * 添加一个额外的用户。}
998 *
999 * @return int
1000 */"
1001 .as_bytes();
1002
1003 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
1004 let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse Chinese PHPDoc");
1005
1006 let Element::Text(text) = &document.elements[0] else {
1007 panic!("Expected Element::Text, got {:?}", document.elements[0]);
1008 };
1009
1010 let has_internal = text
1011 .segments
1012 .iter()
1013 .any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == b"internal" as &[u8]));
1014 assert!(has_internal, "Expected InlineTag 'internal' with Chinese content, got: {:?}", text.segments);
1015 }
1016
1017 #[test]
1018 fn test_multiline_inline_tag_japanese() {
1019 let arena = Bump::new();
1020 let phpdoc = r#"/**
1021 * ユーザー数を取得するメソッド。
1022 * {@see \App\Service\UserCounter このクラスは
1023 * ユーザーの数を数えます。}
1024 */"#
1025 .as_bytes();
1026
1027 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
1028 let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse Japanese PHPDoc");
1029
1030 let Element::Text(text) = &document.elements[0] else {
1031 panic!("Expected Element::Text, got {:?}", document.elements[0]);
1032 };
1033
1034 let has_see =
1035 text.segments.iter().any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == b"see" as &[u8]));
1036 assert!(has_see, "Expected InlineTag 'see' with Japanese content, got: {:?}", text.segments);
1037 }
1038
1039 #[test]
1040 fn test_multiline_inline_tag_arabic() {
1041 let arena = Bump::new();
1042 let phpdoc = "/**
1043 * طريقة للحصول على عدد المستخدمين.
1044 * {@internal يجب على المطورين ملاحظة أن هذه الطريقة
1045 * تضيف مستخدمًا إضافيًا بصمت.}
1046 *
1047 * @return int
1048 */"
1049 .as_bytes();
1050
1051 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
1052 let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse Arabic PHPDoc");
1053
1054 let Element::Text(text) = &document.elements[0] else {
1055 panic!("Expected Element::Text, got {:?}", document.elements[0]);
1056 };
1057
1058 let has_internal = text
1059 .segments
1060 .iter()
1061 .any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == b"internal" as &[u8]));
1062 assert!(has_internal, "Expected InlineTag 'internal' with Arabic content, got: {:?}", text.segments);
1063 }
1064
1065 #[test]
1066 fn test_multiline_inline_tag_mixed_scripts() {
1067 let arena = Bump::new();
1068 let phpdoc = "/**
1069 * Documentation with mixed scripts.
1070 * {@internal 注意: This method は静かに adds один
1071 * дополнительный элемент 요소를 추가합니다.}
1072 */"
1073 .as_bytes();
1074
1075 let span = Span::new(FileId::zero(), Position::new(0), Position::new(phpdoc.len() as u32));
1076 let document = parse_phpdoc_with_span(&arena, phpdoc, span).expect("Failed to parse mixed-script PHPDoc");
1077
1078 let Element::Text(text) = &document.elements[0] else {
1079 panic!("Expected Element::Text, got {:?}", document.elements[0]);
1080 };
1081
1082 let has_internal = text
1083 .segments
1084 .iter()
1085 .any(|seg| matches!(seg, TextSegment::InlineTag(tag) if tag.name == b"internal" as &[u8]));
1086 assert!(has_internal, "Expected InlineTag 'internal' with mixed-script content, got: {:?}", text.segments);
1087 }
1088}