1use super::code_spans::try_parse_code_span;
13use super::core::parse_inline_text;
14use super::inline_html::try_parse_inline_html;
15use crate::options::ParserOptions;
16use crate::syntax::SyntaxKind;
17use rowan::GreenNodeBuilder;
18
19use crate::parser::utils::attributes::try_parse_trailing_attributes;
21
22#[derive(Clone, Copy)]
40pub struct LinkScanContext {
41 pub skip_raw_html: bool,
42 pub skip_autolinks: bool,
43 pub disallow_inner_links: bool,
44 pub dialect: crate::options::Dialect,
48}
49
50impl Default for LinkScanContext {
51 fn default() -> Self {
52 Self {
53 skip_raw_html: false,
54 skip_autolinks: false,
55 disallow_inner_links: false,
56 dialect: crate::options::Dialect::Pandoc,
57 }
58 }
59}
60
61impl LinkScanContext {
62 pub fn from_options(config: &ParserOptions) -> Self {
63 let is_commonmark = config.dialect == crate::options::Dialect::CommonMark;
64 Self {
65 skip_raw_html: config.extensions.raw_html,
66 skip_autolinks: config.extensions.autolinks && is_commonmark,
67 disallow_inner_links: is_commonmark,
68 dialect: config.dialect,
69 }
70 }
71}
72
73fn find_link_close_bracket(text: &str, start: usize, ctx: LinkScanContext) -> Option<usize> {
84 let bytes = text.as_bytes();
85 let mut bracket_depth = 0;
86 let mut escape_next = false;
87 let mut i = start;
88
89 while i < bytes.len() {
90 let b = bytes[i];
91
92 if escape_next {
93 escape_next = false;
94 i += step(text, i);
95 continue;
96 }
97
98 match b {
99 b'\\' => {
100 escape_next = true;
101 i += 1;
102 }
103 b'`' => {
104 if let Some((len, _, _, _)) = try_parse_code_span(&text[i..]) {
105 i += len;
106 } else {
107 i += 1;
108 }
109 }
110 b'<' => {
111 if ctx.skip_autolinks
116 && let Some((len, _)) = try_parse_autolink(&text[i..], true)
117 {
118 i += len;
119 } else if ctx.skip_raw_html
120 && let Some(len) = try_parse_inline_html(&text[i..], ctx.dialect)
121 {
122 i += len;
123 } else {
124 i += 1;
125 }
126 }
127 b'[' => {
128 bracket_depth += 1;
129 i += 1;
130 }
131 b']' => {
132 if bracket_depth == 0 {
133 return Some(i);
134 }
135 bracket_depth -= 1;
136 i += 1;
137 }
138 _ => i += step(text, i),
139 }
140 }
141 None
142}
143
144fn find_dest_close_paren(remaining: &str) -> Option<usize> {
150 let bytes = remaining.as_bytes();
151 let mut paren_depth = 0;
152 let mut escape_next = false;
153 let mut in_quotes = false;
154 let mut in_angle = false;
155 let mut i = 0;
156
157 while i < bytes.len() {
158 let b = bytes[i];
159
160 if escape_next {
161 escape_next = false;
162 i += step(remaining, i);
163 continue;
164 }
165
166 match b {
167 b'\\' => {
168 escape_next = true;
169 i += 1;
170 }
171 b'<' if !in_quotes && !in_angle => {
172 in_angle = true;
173 i += 1;
174 }
175 b'>' if in_angle => {
176 in_angle = false;
177 i += 1;
178 }
179 b'"' if !in_angle => {
180 in_quotes = !in_quotes;
181 i += 1;
182 }
183 b'(' if !in_quotes && !in_angle => {
184 paren_depth += 1;
185 i += 1;
186 }
187 b')' if !in_quotes && !in_angle => {
188 if paren_depth == 0 {
189 return Some(i);
190 }
191 paren_depth -= 1;
192 i += 1;
193 }
194 _ => i += step(remaining, i),
195 }
196 }
197 None
198}
199
200fn step(s: &str, i: usize) -> usize {
204 s[i..].chars().next().map(|c| c.len_utf8()).unwrap_or(1)
205}
206
207fn link_text_contains_inner_link(text: &str, ctx: LinkScanContext, strict_dest: bool) -> bool {
224 let bytes = text.as_bytes();
225 let mut i = 0;
226 let mut escape_next = false;
227 while i < bytes.len() {
228 let b = bytes[i];
229 if escape_next {
230 escape_next = false;
231 i += step(text, i);
232 continue;
233 }
234 match b {
235 b'\\' => {
236 escape_next = true;
237 i += 1;
238 }
239 b'`' => {
240 if let Some((len, _, _, _)) = try_parse_code_span(&text[i..]) {
241 i += len;
242 } else {
243 i += 1;
244 }
245 }
246 b'<' => {
247 if ctx.skip_autolinks
248 && let Some((len, _)) = try_parse_autolink(&text[i..], true)
249 {
250 i += len;
251 } else if ctx.skip_raw_html
252 && let Some(len) = try_parse_inline_html(&text[i..], ctx.dialect)
253 {
254 i += len;
255 } else {
256 i += 1;
257 }
258 }
259 b'!' if i + 1 < bytes.len() && bytes[i + 1] == b'[' => {
260 if let Some((len, alt, _, _)) = try_parse_inline_image(&text[i..], ctx) {
261 if link_text_contains_inner_link(alt, ctx, strict_dest) {
262 return true;
263 }
264 i += len;
265 } else {
266 i += 2;
267 }
268 }
269 b'[' => {
270 if try_parse_inline_link(&text[i..], strict_dest, ctx).is_some() {
271 return true;
272 }
273 i += 1;
274 }
275 _ => i += step(text, i),
276 }
277 }
278 false
279}
280
281pub fn try_parse_inline_image(
290 text: &str,
291 ctx: LinkScanContext,
292) -> Option<(usize, &str, &str, Option<&str>)> {
293 if !text.starts_with("![") {
294 return None;
295 }
296
297 let close_bracket = find_link_close_bracket(text, 2, ctx)?;
299 let alt_text = &text[2..close_bracket];
300
301 let after_bracket = close_bracket + 1;
303 if text.len() <= after_bracket || !text[after_bracket..].starts_with('(') {
304 return None;
305 }
306
307 let dest_start = after_bracket + 1;
309 let remaining = &text[dest_start..];
310
311 let close_paren = find_dest_close_paren(remaining)?;
312 let dest_content = &remaining[..close_paren];
313
314 let after_paren = dest_start + close_paren + 1;
316 let after_close = &text[after_paren..];
317
318 if after_close.starts_with('{') {
320 if let Some(close_brace_pos) = after_close.find('}') {
322 let attr_text = &after_close[..=close_brace_pos];
323 if let Some((_attrs, _)) = try_parse_trailing_attributes(attr_text) {
325 let total_len = after_paren + close_brace_pos + 1;
326 let raw_attrs = attr_text;
328 return Some((total_len, alt_text, dest_content, Some(raw_attrs)));
329 }
330 }
331 }
332
333 let total_len = after_paren;
335 Some((total_len, alt_text, dest_content, None))
336}
337
338pub fn emit_inline_image(
341 builder: &mut GreenNodeBuilder,
342 _text: &str,
343 alt_text: &str,
344 dest: &str,
345 raw_attributes: Option<&str>,
346 config: &ParserOptions,
347) {
348 builder.start_node(SyntaxKind::IMAGE_LINK.into());
349
350 builder.start_node(SyntaxKind::IMAGE_LINK_START.into());
352 builder.token(SyntaxKind::IMAGE_LINK_START.into(), "![");
353 builder.finish_node();
354
355 builder.start_node(SyntaxKind::IMAGE_ALT.into());
357 parse_inline_text(builder, alt_text, config, false);
360 builder.finish_node();
361
362 builder.token(SyntaxKind::IMAGE_ALT_END.into(), "]");
364
365 builder.token(SyntaxKind::IMAGE_DEST_START.into(), "(");
367
368 builder.start_node(SyntaxKind::LINK_DEST.into());
370 builder.token(SyntaxKind::TEXT.into(), dest);
371 builder.finish_node();
372
373 builder.token(SyntaxKind::IMAGE_DEST_END.into(), ")");
375
376 if let Some(raw_attrs) = raw_attributes {
378 builder.start_node(SyntaxKind::ATTRIBUTE.into());
379 builder.token(SyntaxKind::ATTRIBUTE.into(), raw_attrs);
380 builder.finish_node();
381 }
382
383 builder.finish_node();
384}
385
386pub fn try_parse_autolink(text: &str, is_commonmark: bool) -> Option<(usize, &str)> {
396 if !text.starts_with('<') {
397 return None;
398 }
399
400 let close_pos = text[1..].find('>')?;
401 let content = &text[1..1 + close_pos];
402
403 if content.is_empty() {
404 return None;
405 }
406 if content.contains(|c: char| c.is_whitespace()) {
407 return None;
408 }
409
410 if is_commonmark {
411 if !is_valid_uri_autolink(content) && !is_valid_email_autolink(content) {
412 return None;
413 }
414 } else if !content.contains(':') && !content.contains('@') {
415 return None;
416 }
417
418 Some((close_pos + 2, content))
419}
420
421fn is_valid_uri_autolink(s: &str) -> bool {
425 let bytes = s.as_bytes();
426 if bytes.is_empty() || !bytes[0].is_ascii_alphabetic() {
427 return false;
428 }
429 let mut i = 1;
430 while i < bytes.len() {
431 let b = bytes[i];
432 if b.is_ascii_alphanumeric() || b == b'+' || b == b'-' || b == b'.' {
433 i += 1;
434 } else {
435 break;
436 }
437 }
438 if !(2..=32).contains(&i) {
439 return false;
440 }
441 if i >= bytes.len() || bytes[i] != b':' {
442 return false;
443 }
444 for &b in &bytes[i + 1..] {
445 if b < 0x20 || b == 0x7f || b == b'<' || b == b'>' {
446 return false;
447 }
448 }
449 true
450}
451
452fn is_valid_email_autolink(s: &str) -> bool {
456 let Some(at) = s.find('@') else {
457 return false;
458 };
459 let local = &s[..at];
460 let domain = &s[at + 1..];
461 if local.is_empty() || !local.bytes().all(is_email_local_byte) {
462 return false;
463 }
464 if domain.is_empty() {
465 return false;
466 }
467 domain.split('.').all(is_valid_email_label)
468}
469
470fn is_email_local_byte(b: u8) -> bool {
471 matches!(
472 b,
473 b'a'..=b'z'
474 | b'A'..=b'Z'
475 | b'0'..=b'9'
476 | b'.'
477 | b'!'
478 | b'#'
479 | b'$'
480 | b'%'
481 | b'&'
482 | b'\''
483 | b'*'
484 | b'+'
485 | b'/'
486 | b'='
487 | b'?'
488 | b'^'
489 | b'_'
490 | b'`'
491 | b'{'
492 | b'|'
493 | b'}'
494 | b'~'
495 | b'-'
496 )
497}
498
499fn is_valid_email_label(label: &str) -> bool {
500 let bytes = label.as_bytes();
501 if bytes.is_empty() || bytes.len() > 63 {
502 return false;
503 }
504 if !bytes[0].is_ascii_alphanumeric() {
505 return false;
506 }
507 if !bytes[bytes.len() - 1].is_ascii_alphanumeric() {
508 return false;
509 }
510 bytes[1..bytes.len() - 1]
511 .iter()
512 .all(|b| b.is_ascii_alphanumeric() || *b == b'-')
513}
514
515pub fn emit_autolink(builder: &mut GreenNodeBuilder, _text: &str, url: &str) {
517 builder.start_node(SyntaxKind::AUTO_LINK.into());
518
519 builder.start_node(SyntaxKind::AUTO_LINK_MARKER.into());
521 builder.token(SyntaxKind::AUTO_LINK_MARKER.into(), "<");
522 builder.finish_node();
523
524 builder.token(SyntaxKind::TEXT.into(), url);
526
527 builder.start_node(SyntaxKind::AUTO_LINK_MARKER.into());
529 builder.token(SyntaxKind::AUTO_LINK_MARKER.into(), ">");
530 builder.finish_node();
531
532 builder.finish_node();
533}
534
535pub fn try_parse_bare_uri(text: &str) -> Option<(usize, &str)> {
536 let mut chars = text.char_indices();
537 let (_, first) = chars.next()?;
538 if !first.is_ascii_alphabetic() {
539 return None;
540 }
541
542 let mut scheme_end = None;
543 for (idx, ch) in text.char_indices() {
544 if ch == ':' {
545 scheme_end = Some(idx);
546 break;
547 }
548 if !ch.is_ascii_alphanumeric() && ch != '+' && ch != '-' && ch != '.' {
549 return None;
550 }
551 }
552 let scheme_end = scheme_end?;
553 if scheme_end == 0 {
554 return None;
555 }
556
557 let mut end = scheme_end + 1;
558 let bytes = text.as_bytes();
559 while end < text.len() {
560 let b = bytes[end];
561 if b.is_ascii_whitespace() {
562 break;
563 }
564 if matches!(b, b'<' | b'>' | b'`' | b'"' | b'\'') {
565 break;
566 }
567 end += 1;
568 }
569
570 if end == scheme_end + 1 {
571 return None;
572 }
573
574 let mut trimmed = end;
575 while trimmed > scheme_end + 1 {
576 let ch = text[..trimmed].chars().last().unwrap();
577 if matches!(ch, '.' | ',' | ';' | ':' | ')' | ']' | '}') {
578 trimmed -= ch.len_utf8();
579 } else {
580 break;
581 }
582 }
583
584 if trimmed <= scheme_end + 1 {
585 return None;
586 }
587
588 if text[..trimmed].ends_with('\\') {
591 return None;
592 }
593
594 Some((trimmed, &text[..trimmed]))
595}
596
597pub fn try_parse_inline_link(
609 text: &str,
610 strict_dest: bool,
611 ctx: LinkScanContext,
612) -> Option<(usize, &str, &str, Option<&str>)> {
613 if !text.starts_with('[') {
614 return None;
615 }
616
617 let close_bracket = find_link_close_bracket(text, 1, ctx)?;
619 let link_text = &text[1..close_bracket];
620
621 let after_bracket = close_bracket + 1;
623 if text.len() <= after_bracket || !text[after_bracket..].starts_with('(') {
624 return None;
625 }
626
627 let dest_start = after_bracket + 1;
629 let remaining = &text[dest_start..];
630
631 let close_paren = find_dest_close_paren(remaining)?;
632 let dest_content = &remaining[..close_paren];
633
634 if strict_dest && !dest_and_title_ok_commonmark(dest_content) {
635 return None;
636 }
637
638 if ctx.disallow_inner_links && link_text_contains_inner_link(link_text, ctx, strict_dest) {
641 return None;
642 }
643
644 let after_paren = dest_start + close_paren + 1;
646 let after_close = &text[after_paren..];
647
648 if after_close.starts_with('{') {
650 if let Some(close_brace_pos) = after_close.find('}') {
652 let attr_text = &after_close[..=close_brace_pos];
653 if let Some((_attrs, _)) = try_parse_trailing_attributes(attr_text) {
655 let total_len = after_paren + close_brace_pos + 1;
656 let raw_attrs = attr_text;
658 return Some((total_len, link_text, dest_content, Some(raw_attrs)));
659 }
660 }
661 }
662
663 let total_len = after_paren;
665 Some((total_len, link_text, dest_content, None))
666}
667
668fn dest_and_title_ok_commonmark(content: &str) -> bool {
677 let trimmed = trim_start_link_ws(content);
678 if trimmed.is_empty() {
679 return true;
680 }
681
682 let after_dest = if let Some(rest) = trimmed.strip_prefix('<') {
683 let mut escape = false;
684 let mut end_byte = None;
685 for (i, c) in rest.char_indices() {
686 if escape {
687 escape = false;
688 continue;
689 }
690 match c {
691 '\\' => escape = true,
692 '\n' | '<' => return false,
693 '>' => {
694 end_byte = Some(i);
695 break;
696 }
697 _ => {}
698 }
699 }
700 match end_byte {
701 Some(e) => &rest[e + 1..],
702 None => return false,
703 }
704 } else {
705 let mut escape = false;
706 let mut depth: i32 = 0;
707 let mut end = trimmed.len();
708 for (i, c) in trimmed.char_indices() {
709 if escape {
710 escape = false;
711 continue;
712 }
713 match c {
714 '\\' => escape = true,
715 ' ' | '\t' | '\n' => {
716 end = i;
717 break;
718 }
719 _ if c.is_ascii_control() => return false,
720 '(' => depth += 1,
721 ')' => {
722 if depth == 0 {
723 end = i;
724 break;
725 }
726 depth -= 1;
727 }
728 _ => {}
729 }
730 }
731 if depth != 0 {
732 return false;
733 }
734 if end == 0 {
735 return false;
737 }
738 &trimmed[end..]
739 };
740
741 let after_dest = trim_start_link_ws(after_dest);
742 if after_dest.is_empty() {
743 return true;
744 }
745
746 let bytes = after_dest.as_bytes();
747 let close = match bytes[0] {
748 b'"' => b'"',
749 b'\'' => b'\'',
750 b'(' => b')',
751 _ => return false,
752 };
753 let opens_paren = bytes[0] == b'(';
754 let mut escape = false;
755 let mut title_close_pos = None;
756 for (i, &b) in after_dest.as_bytes().iter().enumerate().skip(1) {
757 if escape {
758 escape = false;
759 continue;
760 }
761 if b == b'\\' {
762 escape = true;
763 continue;
764 }
765 if opens_paren && b == b'(' {
766 return false;
767 }
768 if b == close {
769 title_close_pos = Some(i);
770 break;
771 }
772 }
773 let close_idx = match title_close_pos {
774 Some(p) => p,
775 None => return false,
776 };
777
778 let after_title = &after_dest[close_idx + 1..];
779 is_link_ws_only(after_title)
780}
781
782#[inline]
787fn trim_start_link_ws(s: &str) -> &str {
788 let bytes = s.as_bytes();
789 let mut i = 0;
790 while i < bytes.len() {
791 let b = bytes[i];
792 if b == b' ' || b == b'\t' || b == b'\n' {
793 i += 1;
794 } else {
795 break;
796 }
797 }
798 unsafe { std::str::from_utf8_unchecked(&bytes[i..]) }
800}
801
802#[inline]
803fn is_link_ws_only(s: &str) -> bool {
804 s.as_bytes()
805 .iter()
806 .all(|&b| b == b' ' || b == b'\t' || b == b'\n')
807}
808
809pub fn emit_inline_link(
812 builder: &mut GreenNodeBuilder,
813 _text: &str,
814 link_text: &str,
815 dest: &str,
816 raw_attributes: Option<&str>,
817 config: &ParserOptions,
818) {
819 builder.start_node(SyntaxKind::LINK.into());
820
821 builder.start_node(SyntaxKind::LINK_START.into());
823 builder.token(SyntaxKind::LINK_START.into(), "[");
824 builder.finish_node();
825
826 builder.start_node(SyntaxKind::LINK_TEXT.into());
832 parse_inline_text(builder, link_text, config, true);
833 builder.finish_node();
834
835 builder.token(SyntaxKind::LINK_TEXT_END.into(), "]");
837
838 builder.token(SyntaxKind::LINK_DEST_START.into(), "(");
840
841 builder.start_node(SyntaxKind::LINK_DEST.into());
843 builder.token(SyntaxKind::TEXT.into(), dest);
844 builder.finish_node();
845
846 builder.token(SyntaxKind::LINK_DEST_END.into(), ")");
848
849 if let Some(raw_attrs) = raw_attributes {
851 builder.start_node(SyntaxKind::ATTRIBUTE.into());
852 builder.token(SyntaxKind::ATTRIBUTE.into(), raw_attrs);
853 builder.finish_node();
854 }
855
856 builder.finish_node();
857}
858
859pub fn emit_bare_uri_link(builder: &mut GreenNodeBuilder, uri: &str, _config: &ParserOptions) {
860 builder.start_node(SyntaxKind::LINK.into());
861
862 builder.start_node(SyntaxKind::LINK_START.into());
863 builder.token(SyntaxKind::LINK_START.into(), "[");
864 builder.finish_node();
865
866 builder.start_node(SyntaxKind::LINK_TEXT.into());
867 builder.token(SyntaxKind::TEXT.into(), uri);
868 builder.finish_node();
869
870 builder.token(SyntaxKind::LINK_TEXT_END.into(), "]");
871 builder.token(SyntaxKind::LINK_DEST_START.into(), "(");
872
873 builder.start_node(SyntaxKind::LINK_DEST.into());
874 builder.token(SyntaxKind::TEXT.into(), uri);
875 builder.finish_node();
876
877 builder.token(SyntaxKind::LINK_DEST_END.into(), ")");
878
879 builder.finish_node();
880}
881
882pub fn try_parse_reference_link(
892 text: &str,
893 allow_shortcut: bool,
894 inline_link_attempted: bool,
895 ctx: LinkScanContext,
896) -> Option<(usize, &str, String, bool)> {
897 if !text.starts_with('[') {
898 return None;
899 }
900
901 if text.len() > 1 {
903 let bytes = text.as_bytes();
904 if bytes[1] == b'@' {
905 return None;
906 }
907 if bytes[1] == b'-' && text.len() > 2 && bytes[2] == b'@' {
908 return None;
909 }
910 }
911
912 let close_bracket = find_link_close_bracket(text, 1, ctx)?;
918 let link_text = &text[1..close_bracket];
919
920 if ctx.disallow_inner_links
925 && link_text_contains_inner_link(link_text, ctx, ctx.disallow_inner_links)
926 {
927 return None;
928 }
929
930 let after_bracket = close_bracket + 1;
932
933 if after_bracket < text.len() && text[after_bracket..].starts_with('{') {
936 return None;
937 }
938
939 if after_bracket < text.len()
950 && text[after_bracket..].starts_with('(')
951 && (!allow_shortcut || !inline_link_attempted)
952 {
953 return None;
954 }
955
956 if after_bracket < text.len() && text[after_bracket..].starts_with('[') {
958 let label_start = after_bracket + 1;
960 let mut label_end = None;
961
962 for (i, ch) in text[label_start..].char_indices() {
963 if ch == ']' {
964 label_end = Some(i + label_start);
965 break;
966 }
967 if ch == '\n' {
969 return None;
970 }
971 }
972
973 let label_end = label_end?;
974 let label = &text[label_start..label_end];
975
976 let total_len = label_end + 1;
978
979 if label.is_empty() {
981 return Some((total_len, link_text, String::new(), false));
982 }
983
984 Some((total_len, link_text, label.to_string(), false))
986 } else if allow_shortcut {
987 if link_text.is_empty() {
990 return None;
991 }
992 Some((after_bracket, link_text, link_text.to_string(), true))
993 } else {
994 None
996 }
997}
998
999pub fn emit_reference_link(
1002 builder: &mut GreenNodeBuilder,
1003 link_text: &str,
1004 label: &str,
1005 is_shortcut: bool,
1006 config: &ParserOptions,
1007) {
1008 builder.start_node(SyntaxKind::LINK.into());
1009
1010 builder.start_node(SyntaxKind::LINK_START.into());
1012 builder.token(SyntaxKind::LINK_START.into(), "[");
1013 builder.finish_node();
1014
1015 builder.start_node(SyntaxKind::LINK_TEXT.into());
1020 parse_inline_text(builder, link_text, config, true);
1021 builder.finish_node();
1022
1023 builder.token(SyntaxKind::TEXT.into(), "]");
1025
1026 if !is_shortcut {
1027 builder.token(SyntaxKind::TEXT.into(), "[");
1029 builder.start_node(SyntaxKind::LINK_REF.into());
1030 if !label.is_empty() {
1033 builder.token(SyntaxKind::TEXT.into(), label);
1034 }
1035 builder.finish_node();
1036 builder.token(SyntaxKind::TEXT.into(), "]");
1037 }
1038 builder.finish_node();
1041}
1042
1043pub fn try_parse_reference_image(
1046 text: &str,
1047 allow_shortcut: bool,
1048) -> Option<(usize, &str, String, bool)> {
1049 let bytes = text.as_bytes();
1050 if bytes.len() < 4 || bytes[0] != b'!' || bytes[1] != b'[' {
1051 return None;
1052 }
1053
1054 let mut pos = 2;
1055 let mut bracket_depth = 1;
1056 let alt_start = pos;
1057
1058 while pos < bytes.len() && bracket_depth > 0 {
1060 match bytes[pos] {
1061 b'[' => bracket_depth += 1,
1062 b']' => bracket_depth -= 1,
1063 b'\\' if pos + 1 < bytes.len() => pos += 1, _ => {}
1065 }
1066 pos += 1;
1067 }
1068
1069 if bracket_depth > 0 {
1070 return None; }
1072
1073 let alt_text = &text[alt_start..pos - 1];
1074
1075 if pos >= bytes.len() {
1077 return None;
1078 }
1079
1080 if bytes[pos] == b'[' {
1082 pos += 1;
1083 let label_start = pos;
1084
1085 while pos < bytes.len() && bytes[pos] != b']' && bytes[pos] != b'\n' && bytes[pos] != b'\r'
1087 {
1088 pos += 1;
1089 }
1090
1091 if pos >= bytes.len() || bytes[pos] != b']' {
1092 return None;
1093 }
1094
1095 let label_text = &text[label_start..pos];
1096 pos += 1;
1097
1098 let label = if label_text.is_empty() {
1101 alt_text.to_string() } else {
1103 label_text.to_string() };
1105
1106 return Some((pos, alt_text, label, false));
1107 }
1108
1109 if allow_shortcut {
1112 if pos < bytes.len() && bytes[pos] == b'(' {
1114 return None;
1115 }
1116
1117 let label = alt_text.to_string();
1119 return Some((pos, alt_text, label, true));
1120 }
1121
1122 None
1123}
1124
1125pub fn emit_reference_image(
1127 builder: &mut GreenNodeBuilder,
1128 alt_text: &str,
1129 label: &str,
1130 is_shortcut: bool,
1131 config: &ParserOptions,
1132) {
1133 builder.start_node(SyntaxKind::IMAGE_LINK.into());
1134
1135 builder.start_node(SyntaxKind::IMAGE_LINK_START.into());
1137 builder.token(SyntaxKind::IMAGE_LINK_START.into(), "![");
1138 builder.finish_node();
1139
1140 builder.start_node(SyntaxKind::IMAGE_ALT.into());
1142 parse_inline_text(builder, alt_text, config, false);
1143 builder.finish_node();
1144
1145 builder.token(SyntaxKind::TEXT.into(), "]");
1147
1148 if !is_shortcut {
1149 builder.token(SyntaxKind::TEXT.into(), "[");
1151 builder.start_node(SyntaxKind::LINK_REF.into());
1152 if label != alt_text {
1154 builder.token(SyntaxKind::TEXT.into(), label);
1155 }
1156 builder.finish_node();
1157 builder.token(SyntaxKind::TEXT.into(), "]");
1158 }
1159 builder.finish_node();
1162}
1163
1164pub fn emit_unresolved_reference(
1175 builder: &mut GreenNodeBuilder,
1176 is_image: bool,
1177 text_content: &str,
1178 label_suffix: Option<&str>,
1179 config: &ParserOptions,
1180) {
1181 builder.start_node(SyntaxKind::UNRESOLVED_REFERENCE.into());
1182
1183 if is_image {
1184 builder.start_node(SyntaxKind::IMAGE_LINK_START.into());
1185 builder.token(SyntaxKind::IMAGE_LINK_START.into(), "![");
1186 builder.finish_node();
1187 builder.start_node(SyntaxKind::IMAGE_ALT.into());
1188 parse_inline_text(builder, text_content, config, false);
1189 builder.finish_node();
1190 } else {
1191 builder.start_node(SyntaxKind::LINK_START.into());
1192 builder.token(SyntaxKind::LINK_START.into(), "[");
1193 builder.finish_node();
1194 builder.start_node(SyntaxKind::LINK_TEXT.into());
1195 parse_inline_text(builder, text_content, config, true);
1196 builder.finish_node();
1197 }
1198
1199 builder.token(SyntaxKind::TEXT.into(), "]");
1200
1201 if let Some(suffix) = label_suffix {
1202 debug_assert!(suffix.starts_with('[') && suffix.ends_with(']'));
1206 builder.token(SyntaxKind::TEXT.into(), "[");
1207 let label = &suffix[1..suffix.len() - 1];
1208 builder.start_node(SyntaxKind::LINK_REF.into());
1209 if !label.is_empty() {
1210 builder.token(SyntaxKind::TEXT.into(), label);
1211 }
1212 builder.finish_node();
1213 builder.token(SyntaxKind::TEXT.into(), "]");
1214 }
1215
1216 builder.finish_node();
1217}
1218
1219#[cfg(test)]
1220mod tests {
1221 use super::*;
1222
1223 #[test]
1224 fn test_parse_autolink_url() {
1225 let input = "<https://example.com>";
1226 assert_eq!(
1227 try_parse_autolink(input, false),
1228 Some((21, "https://example.com"))
1229 );
1230 assert_eq!(
1231 try_parse_autolink(input, true),
1232 Some((21, "https://example.com"))
1233 );
1234 }
1235
1236 #[test]
1237 fn test_parse_autolink_email() {
1238 let input = "<user@example.com>";
1239 assert_eq!(
1240 try_parse_autolink(input, false),
1241 Some((18, "user@example.com"))
1242 );
1243 assert_eq!(
1244 try_parse_autolink(input, true),
1245 Some((18, "user@example.com"))
1246 );
1247 }
1248
1249 #[test]
1250 fn test_parse_autolink_no_close() {
1251 let input = "<https://example.com";
1252 assert_eq!(try_parse_autolink(input, false), None);
1253 assert_eq!(try_parse_autolink(input, true), None);
1254 }
1255
1256 #[test]
1257 fn test_parse_autolink_with_space() {
1258 let input = "<https://example.com >";
1259 assert_eq!(try_parse_autolink(input, false), None);
1260 assert_eq!(try_parse_autolink(input, true), None);
1261 }
1262
1263 #[test]
1264 fn test_parse_autolink_not_url_or_email() {
1265 let input = "<notaurl>";
1266 assert_eq!(try_parse_autolink(input, false), None);
1267 assert_eq!(try_parse_autolink(input, true), None);
1268 }
1269
1270 #[test]
1271 fn test_parse_autolink_commonmark_strict_scheme() {
1272 let input = "<m:abc>";
1275 assert_eq!(try_parse_autolink(input, true), None);
1276 assert_eq!(try_parse_autolink(input, false), Some((7, "m:abc")));
1277 }
1278
1279 #[test]
1280 fn test_parse_autolink_commonmark_email_disallows_backslash() {
1281 let input = "<foo\\+@bar.example.com>";
1282 assert_eq!(try_parse_autolink(input, true), None);
1283 assert_eq!(
1284 try_parse_autolink(input, false),
1285 Some((23, "foo\\+@bar.example.com"))
1286 );
1287 }
1288
1289 #[test]
1290 fn test_parse_inline_link_simple() {
1291 let input = "[text](url)";
1292 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1293 assert_eq!(result, Some((11, "text", "url", None)));
1294 }
1295
1296 #[test]
1297 fn test_parse_inline_link_with_title() {
1298 let input = r#"[text](url "title")"#;
1299 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1300 assert_eq!(result, Some((19, "text", r#"url "title""#, None)));
1301 }
1302
1303 #[test]
1304 fn test_parse_inline_link_with_nested_brackets() {
1305 let input = "[outer [inner] text](url)";
1306 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1307 assert_eq!(result, Some((25, "outer [inner] text", "url", None)));
1308 }
1309
1310 #[test]
1311 fn test_parse_inline_link_no_space_between_brackets_and_parens() {
1312 let input = "[text] (url)";
1313 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1314 assert_eq!(result, None);
1315 }
1316
1317 #[test]
1318 fn test_parse_inline_link_no_closing_bracket() {
1319 let input = "[text(url)";
1320 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1321 assert_eq!(result, None);
1322 }
1323
1324 #[test]
1325 fn test_parse_inline_link_no_closing_paren() {
1326 let input = "[text](url";
1327 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1328 assert_eq!(result, None);
1329 }
1330
1331 #[test]
1332 fn test_parse_inline_link_escaped_bracket() {
1333 let input = r"[text\]more](url)";
1334 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1335 assert_eq!(result, Some((17, r"text\]more", "url", None)));
1336 }
1337
1338 #[test]
1339 fn test_parse_inline_link_parens_in_url() {
1340 let input = "[text](url(with)parens)";
1341 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1342 assert_eq!(result, Some((23, "text", "url(with)parens", None)));
1343 }
1344
1345 #[test]
1346 fn test_parse_inline_image_simple() {
1347 let input = "";
1348 let result = try_parse_inline_image(input, LinkScanContext::default());
1349 assert_eq!(result, Some((17, "alt", "image.jpg", None)));
1350 }
1351
1352 #[test]
1353 fn test_parse_inline_image_with_title() {
1354 let input = r#""#;
1355 let result = try_parse_inline_image(input, LinkScanContext::default());
1356 assert_eq!(result, Some((27, "alt", r#"image.jpg "A title""#, None)));
1357 }
1358
1359 #[test]
1360 fn test_parse_inline_image_with_nested_brackets() {
1361 let input = "![outer [inner] alt](image.jpg)";
1362 let result = try_parse_inline_image(input, LinkScanContext::default());
1363 assert_eq!(result, Some((31, "outer [inner] alt", "image.jpg", None)));
1364 }
1365
1366 #[test]
1367 fn test_parse_bare_uri_rejects_dangling_backslash_after_trim() {
1368 let input = r"a:\]";
1369 let result = try_parse_bare_uri(input);
1370 assert_eq!(result, None);
1371 }
1372
1373 #[test]
1374 fn test_parse_inline_image_no_space_between_brackets_and_parens() {
1375 let input = "![alt] (image.jpg)";
1376 let result = try_parse_inline_image(input, LinkScanContext::default());
1377 assert_eq!(result, None);
1378 }
1379
1380 #[test]
1381 fn test_parse_inline_image_no_closing_bracket() {
1382 let input = "![alt(image.jpg)";
1383 let result = try_parse_inline_image(input, LinkScanContext::default());
1384 assert_eq!(result, None);
1385 }
1386
1387 #[test]
1388 fn test_parse_inline_image_no_closing_paren() {
1389 let input = ");
1391 assert_eq!(result, None);
1392 }
1393
1394 #[test]
1395 fn test_parse_inline_image_with_simple_class() {
1396 let input = "{.large}";
1397 let result = try_parse_inline_image(input, LinkScanContext::default());
1398 let (len, alt, dest, attrs) = result.unwrap();
1399 assert_eq!(len, 23);
1400 assert_eq!(alt, "alt");
1401 assert_eq!(dest, "img.png");
1402 assert!(attrs.is_some());
1403 let attrs = attrs.unwrap();
1404 assert_eq!(attrs, "{.large}");
1405 }
1406
1407 #[test]
1408 fn test_parse_inline_image_with_id() {
1409 let input = "{#fig-1}";
1410 let result = try_parse_inline_image(input, LinkScanContext::default());
1411 let (len, alt, dest, attrs) = result.unwrap();
1412 assert_eq!(len, 29);
1413 assert_eq!(alt, "Figure 1");
1414 assert_eq!(dest, "fig1.png");
1415 assert!(attrs.is_some());
1416 let attrs = attrs.unwrap();
1417 assert_eq!(attrs, "{#fig-1}");
1418 }
1419
1420 #[test]
1421 fn test_parse_inline_image_with_full_attributes() {
1422 let input = "{#fig .large width=\"80%\"}";
1423 let result = try_parse_inline_image(input, LinkScanContext::default());
1424 let (len, alt, dest, attrs) = result.unwrap();
1425 assert_eq!(len, 40);
1426 assert_eq!(alt, "alt");
1427 assert_eq!(dest, "img.png");
1428 assert!(attrs.is_some());
1429 let attrs = attrs.unwrap();
1430 assert_eq!(attrs, "{#fig .large width=\"80%\"}");
1431 }
1432
1433 #[test]
1434 fn test_parse_inline_image_attributes_must_be_adjacent() {
1435 let input = " {.large}";
1437 let result = try_parse_inline_image(input, LinkScanContext::default());
1438 assert_eq!(result, Some((15, "alt", "img.png", None)));
1439 }
1440
1441 #[test]
1443 fn test_parse_inline_link_with_id() {
1444 let input = "[text](url){#link-1}";
1445 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1446 let (len, text, dest, attrs) = result.unwrap();
1447 assert_eq!(len, 20);
1448 assert_eq!(text, "text");
1449 assert_eq!(dest, "url");
1450 assert!(attrs.is_some());
1451 let attrs = attrs.unwrap();
1452 assert_eq!(attrs, "{#link-1}");
1453 }
1454
1455 #[test]
1456 fn test_parse_inline_link_with_full_attributes() {
1457 let input = "[text](url){#link .external target=\"_blank\"}";
1458 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1459 let (len, text, dest, attrs) = result.unwrap();
1460 assert_eq!(len, 44);
1461 assert_eq!(text, "text");
1462 assert_eq!(dest, "url");
1463 assert!(attrs.is_some());
1464 let attrs = attrs.unwrap();
1465 assert_eq!(attrs, "{#link .external target=\"_blank\"}");
1466 }
1467
1468 #[test]
1469 fn test_parse_inline_link_attributes_must_be_adjacent() {
1470 let input = "[text](url) {.class}";
1472 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1473 assert_eq!(result, Some((11, "text", "url", None)));
1474 }
1475
1476 #[test]
1477 fn test_parse_inline_link_with_title_and_attributes() {
1478 let input = r#"[text](url "title"){.external}"#;
1479 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1480 let (len, text, dest, attrs) = result.unwrap();
1481 assert_eq!(len, 30);
1482 assert_eq!(text, "text");
1483 assert_eq!(dest, r#"url "title""#);
1484 assert!(attrs.is_some());
1485 let attrs = attrs.unwrap();
1486 assert_eq!(attrs, "{.external}");
1487 }
1488
1489 #[test]
1491 fn test_parse_reference_link_explicit() {
1492 let input = "[link text][label]";
1493 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1494 assert_eq!(result, Some((18, "link text", "label".to_string(), false)));
1495 }
1496
1497 #[test]
1498 fn test_parse_reference_link_implicit() {
1499 let input = "[link text][]";
1500 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1501 assert_eq!(result, Some((13, "link text", String::new(), false)));
1502 }
1503
1504 #[test]
1505 fn test_parse_reference_link_explicit_same_label_as_text() {
1506 let input = "[stack][stack]";
1507 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1508 assert_eq!(result, Some((14, "stack", "stack".to_string(), false)));
1509 }
1510
1511 #[test]
1512 fn test_parse_reference_link_shortcut() {
1513 let input = "[link text] rest";
1514 let result = try_parse_reference_link(input, true, true, LinkScanContext::default());
1515 assert_eq!(
1516 result,
1517 Some((11, "link text", "link text".to_string(), true))
1518 );
1519 }
1520
1521 #[test]
1522 fn test_parse_reference_link_shortcut_rejects_empty_label() {
1523 let input = "[] rest";
1524 let result = try_parse_reference_link(input, true, true, LinkScanContext::default());
1525 assert_eq!(result, None);
1526 }
1527
1528 #[test]
1529 fn test_parse_reference_link_shortcut_disabled() {
1530 let input = "[link text] rest";
1531 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1532 assert_eq!(result, None);
1533 }
1534
1535 #[test]
1536 fn test_parse_reference_link_not_inline_link() {
1537 let input = "[text](url)";
1540 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1541 assert_eq!(result, None);
1542 }
1543
1544 #[test]
1545 fn test_parse_reference_link_shortcut_falls_through_inline_link() {
1546 let input = "[text](url)";
1551 let result = try_parse_reference_link(input, true, true, LinkScanContext::default());
1552 assert_eq!(result, Some((6, "text", "text".to_string(), true)));
1553 }
1554
1555 #[test]
1556 fn test_parse_reference_link_with_nested_brackets() {
1557 let input = "[outer [inner] text][ref]";
1558 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1559 assert_eq!(
1560 result,
1561 Some((25, "outer [inner] text", "ref".to_string(), false))
1562 );
1563 }
1564
1565 #[test]
1566 fn test_parse_reference_link_label_no_newline() {
1567 let input = "[text][label\nmore]";
1568 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1569 assert_eq!(result, None);
1570 }
1571
1572 #[test]
1574 fn test_parse_reference_image_explicit() {
1575 let input = "![alt text][label]";
1576 let result = try_parse_reference_image(input, false);
1577 assert_eq!(result, Some((18, "alt text", "label".to_string(), false)));
1578 }
1579
1580 #[test]
1581 fn test_parse_reference_image_implicit() {
1582 let input = "![alt text][]";
1583 let result = try_parse_reference_image(input, false);
1584 assert_eq!(
1585 result,
1586 Some((13, "alt text", "alt text".to_string(), false))
1587 );
1588 }
1589
1590 #[test]
1591 fn test_parse_reference_image_shortcut() {
1592 let input = "![alt text] rest";
1593 let result = try_parse_reference_image(input, true);
1594 assert_eq!(result, Some((11, "alt text", "alt text".to_string(), true)));
1595 }
1596
1597 #[test]
1598 fn test_parse_reference_image_shortcut_disabled() {
1599 let input = "![alt text] rest";
1600 let result = try_parse_reference_image(input, false);
1601 assert_eq!(result, None);
1602 }
1603
1604 #[test]
1605 fn test_parse_reference_image_not_inline() {
1606 let input = "";
1608 let result = try_parse_reference_image(input, true);
1609 assert_eq!(result, None);
1610 }
1611
1612 #[test]
1613 fn test_parse_reference_image_with_nested_brackets() {
1614 let input = "![alt [nested] text][ref]";
1615 let result = try_parse_reference_image(input, false);
1616 assert_eq!(
1617 result,
1618 Some((25, "alt [nested] text", "ref".to_string(), false))
1619 );
1620 }
1621
1622 #[test]
1623 fn test_reference_link_label_with_crlf() {
1624 let input = "[foo\r\nbar]";
1626 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1627
1628 assert_eq!(
1630 result, None,
1631 "Should not parse reference link with CRLF in label"
1632 );
1633 }
1634
1635 #[test]
1636 fn test_reference_link_label_with_lf() {
1637 let input = "[foo\nbar]";
1639 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1640
1641 assert_eq!(
1643 result, None,
1644 "Should not parse reference link with LF in label"
1645 );
1646 }
1647
1648 #[test]
1650 fn test_parse_inline_link_multiline_text() {
1651 let input = "[text on\nline two](url)";
1653 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1654 assert_eq!(
1655 result,
1656 Some((23, "text on\nline two", "url", None)),
1657 "Link text should allow newlines"
1658 );
1659 }
1660
1661 #[test]
1662 fn test_parse_inline_link_multiline_with_formatting() {
1663 let input =
1665 "[A network graph. Different edges\nwith probability](../images/networkfig.png)";
1666 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1667 assert!(result.is_some(), "Link text with newlines should parse");
1668 let (len, text, _dest, _attrs) = result.unwrap();
1669 assert!(text.contains('\n'), "Link text should preserve newline");
1670 assert_eq!(len, input.len());
1671 }
1672
1673 #[test]
1674 fn test_parse_inline_image_multiline_alt() {
1675 let input = "";
1677 let result = try_parse_inline_image(input, LinkScanContext::default());
1678 assert_eq!(
1679 result,
1680 Some((27, "alt on\nline two", "img.png", None)),
1681 "Image alt text should allow newlines"
1682 );
1683 }
1684
1685 #[test]
1686 fn test_parse_inline_image_multiline_with_attributes() {
1687 let input = "{width=70%}";
1689 let result = try_parse_inline_image(input, LinkScanContext::default());
1690 assert!(
1691 result.is_some(),
1692 "Image alt with newlines and attributes should parse"
1693 );
1694 let (len, alt, dest, attrs) = result.unwrap();
1695 assert!(alt.contains('\n'), "Alt text should preserve newline");
1696 assert_eq!(dest, "../images/fig.png");
1697 assert_eq!(attrs, Some("{width=70%}"));
1698 assert_eq!(len, input.len());
1699 }
1700
1701 #[test]
1702 fn test_parse_inline_link_with_attributes_after_newline() {
1703 let input = "[A network graph.](../images/networkfig.png){width=70%}\nA word\n";
1706 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1707 assert!(
1708 result.is_some(),
1709 "Link with attributes should parse even with following text"
1710 );
1711 let (len, text, dest, attrs) = result.unwrap();
1712 assert_eq!(text, "A network graph.");
1713 assert_eq!(dest, "../images/networkfig.png");
1714 assert_eq!(attrs, Some("{width=70%}"), "Attributes should be captured");
1715 assert_eq!(
1716 len, 55,
1717 "Length should include attributes (up to closing brace)"
1718 );
1719 }
1720}