1use super::code_spans::try_parse_code_span;
13use super::core::parse_inline_text;
14use super::inline_html::try_parse_inline_html;
15use crate::options::ParserOptions;
16use crate::syntax::SyntaxKind;
17use rowan::GreenNodeBuilder;
18
19use crate::parser::utils::attributes::try_parse_trailing_attributes;
21
22#[derive(Clone, Copy)]
40pub struct LinkScanContext {
41 pub skip_raw_html: bool,
42 pub skip_autolinks: bool,
43 pub disallow_inner_links: bool,
44 pub dialect: crate::options::Dialect,
48}
49
50impl Default for LinkScanContext {
51 fn default() -> Self {
52 Self {
53 skip_raw_html: false,
54 skip_autolinks: false,
55 disallow_inner_links: false,
56 dialect: crate::options::Dialect::Pandoc,
57 }
58 }
59}
60
61impl LinkScanContext {
62 pub fn from_options(config: &ParserOptions) -> Self {
63 let is_commonmark = config.dialect == crate::options::Dialect::CommonMark;
64 Self {
65 skip_raw_html: config.extensions.raw_html,
66 skip_autolinks: config.extensions.autolinks && is_commonmark,
67 disallow_inner_links: is_commonmark,
68 dialect: config.dialect,
69 }
70 }
71}
72
73fn find_link_close_bracket(text: &str, start: usize, ctx: LinkScanContext) -> Option<usize> {
84 let bytes = text.as_bytes();
85 let mut bracket_depth = 0;
86 let mut escape_next = false;
87 let mut i = start;
88
89 while i < bytes.len() {
90 let b = bytes[i];
91
92 if escape_next {
93 escape_next = false;
94 i += step(text, i);
95 continue;
96 }
97
98 match b {
99 b'\\' => {
100 escape_next = true;
101 i += 1;
102 }
103 b'`' => {
104 if let Some((len, _, _, _)) = try_parse_code_span(&text[i..]) {
105 i += len;
106 } else {
107 i += 1;
108 }
109 }
110 b'<' => {
111 if ctx.skip_autolinks
116 && let Some((len, _)) = try_parse_autolink(&text[i..], true)
117 {
118 i += len;
119 } else if ctx.skip_raw_html
120 && let Some(len) = try_parse_inline_html(&text[i..], ctx.dialect)
121 {
122 i += len;
123 } else {
124 i += 1;
125 }
126 }
127 b'[' => {
128 bracket_depth += 1;
129 i += 1;
130 }
131 b']' => {
132 if bracket_depth == 0 {
133 return Some(i);
134 }
135 bracket_depth -= 1;
136 i += 1;
137 }
138 _ => i += step(text, i),
139 }
140 }
141 None
142}
143
144fn find_dest_close_paren(remaining: &str) -> Option<usize> {
150 let bytes = remaining.as_bytes();
151 let mut paren_depth = 0;
152 let mut escape_next = false;
153 let mut in_quotes = false;
154 let mut in_angle = false;
155 let mut i = 0;
156
157 while i < bytes.len() {
158 let b = bytes[i];
159
160 if escape_next {
161 escape_next = false;
162 i += step(remaining, i);
163 continue;
164 }
165
166 match b {
167 b'\\' => {
168 escape_next = true;
169 i += 1;
170 }
171 b'<' if !in_quotes && !in_angle => {
172 in_angle = true;
173 i += 1;
174 }
175 b'>' if in_angle => {
176 in_angle = false;
177 i += 1;
178 }
179 b'"' if !in_angle => {
180 in_quotes = !in_quotes;
181 i += 1;
182 }
183 b'(' if !in_quotes && !in_angle => {
184 paren_depth += 1;
185 i += 1;
186 }
187 b')' if !in_quotes && !in_angle => {
188 if paren_depth == 0 {
189 return Some(i);
190 }
191 paren_depth -= 1;
192 i += 1;
193 }
194 _ => i += step(remaining, i),
195 }
196 }
197 None
198}
199
200fn step(s: &str, i: usize) -> usize {
204 s[i..].chars().next().map(|c| c.len_utf8()).unwrap_or(1)
205}
206
207fn link_text_contains_inner_link(text: &str, ctx: LinkScanContext, strict_dest: bool) -> bool {
224 let bytes = text.as_bytes();
225 let mut i = 0;
226 let mut escape_next = false;
227 while i < bytes.len() {
228 let b = bytes[i];
229 if escape_next {
230 escape_next = false;
231 i += step(text, i);
232 continue;
233 }
234 match b {
235 b'\\' => {
236 escape_next = true;
237 i += 1;
238 }
239 b'`' => {
240 if let Some((len, _, _, _)) = try_parse_code_span(&text[i..]) {
241 i += len;
242 } else {
243 i += 1;
244 }
245 }
246 b'<' => {
247 if ctx.skip_autolinks
248 && let Some((len, _)) = try_parse_autolink(&text[i..], true)
249 {
250 i += len;
251 } else if ctx.skip_raw_html
252 && let Some(len) = try_parse_inline_html(&text[i..], ctx.dialect)
253 {
254 i += len;
255 } else {
256 i += 1;
257 }
258 }
259 b'!' if i + 1 < bytes.len() && bytes[i + 1] == b'[' => {
260 if let Some((len, alt, _, _)) = try_parse_inline_image(&text[i..], ctx) {
261 if link_text_contains_inner_link(alt, ctx, strict_dest) {
262 return true;
263 }
264 i += len;
265 } else {
266 i += 2;
267 }
268 }
269 b'[' => {
270 if try_parse_inline_link(&text[i..], strict_dest, ctx).is_some() {
271 return true;
272 }
273 i += 1;
274 }
275 _ => i += step(text, i),
276 }
277 }
278 false
279}
280
281pub fn try_parse_inline_image(
290 text: &str,
291 ctx: LinkScanContext,
292) -> Option<(usize, &str, &str, Option<&str>)> {
293 if !text.starts_with("![") {
294 return None;
295 }
296
297 let close_bracket = find_link_close_bracket(text, 2, ctx)?;
299 let alt_text = &text[2..close_bracket];
300
301 let after_bracket = close_bracket + 1;
303 if text.len() <= after_bracket || !text[after_bracket..].starts_with('(') {
304 return None;
305 }
306
307 let dest_start = after_bracket + 1;
309 let remaining = &text[dest_start..];
310
311 let close_paren = find_dest_close_paren(remaining)?;
312 let dest_content = &remaining[..close_paren];
313
314 let after_paren = dest_start + close_paren + 1;
316 let after_close = &text[after_paren..];
317
318 if after_close.starts_with('{') {
320 if let Some(close_brace_pos) = after_close.find('}') {
322 let attr_text = &after_close[..=close_brace_pos];
323 if let Some((_attrs, _)) = try_parse_trailing_attributes(attr_text) {
325 let total_len = after_paren + close_brace_pos + 1;
326 let raw_attrs = attr_text;
328 return Some((total_len, alt_text, dest_content, Some(raw_attrs)));
329 }
330 }
331 }
332
333 let total_len = after_paren;
335 Some((total_len, alt_text, dest_content, None))
336}
337
338pub fn emit_inline_image(
341 builder: &mut GreenNodeBuilder,
342 _text: &str,
343 alt_text: &str,
344 dest: &str,
345 raw_attributes: Option<&str>,
346 config: &ParserOptions,
347 suppress_footnote_refs: bool,
348) {
349 builder.start_node(SyntaxKind::IMAGE_LINK.into());
350
351 builder.start_node(SyntaxKind::IMAGE_LINK_START.into());
353 builder.token(SyntaxKind::IMAGE_LINK_START.into(), "![");
354 builder.finish_node();
355
356 builder.start_node(SyntaxKind::IMAGE_ALT.into());
358 parse_inline_text(builder, alt_text, config, false, suppress_footnote_refs);
361 builder.finish_node();
362
363 builder.token(SyntaxKind::IMAGE_ALT_END.into(), "]");
365
366 builder.token(SyntaxKind::IMAGE_DEST_START.into(), "(");
368
369 builder.start_node(SyntaxKind::LINK_DEST.into());
371 builder.token(SyntaxKind::TEXT.into(), dest);
372 builder.finish_node();
373
374 builder.token(SyntaxKind::IMAGE_DEST_END.into(), ")");
376
377 if let Some(raw_attrs) = raw_attributes {
379 builder.start_node(SyntaxKind::ATTRIBUTE.into());
380 builder.token(SyntaxKind::ATTRIBUTE.into(), raw_attrs);
381 builder.finish_node();
382 }
383
384 builder.finish_node();
385}
386
387pub fn try_parse_autolink(text: &str, is_commonmark: bool) -> Option<(usize, &str)> {
397 if !text.starts_with('<') {
398 return None;
399 }
400
401 let close_pos = text[1..].find('>')?;
402 let content = &text[1..1 + close_pos];
403
404 if content.is_empty() {
405 return None;
406 }
407 if content.contains(|c: char| c.is_whitespace()) {
408 return None;
409 }
410
411 if is_commonmark {
412 if !is_valid_uri_autolink(content) && !is_valid_email_autolink(content) {
413 return None;
414 }
415 } else if !content.contains(':') && !content.contains('@') {
416 return None;
417 }
418
419 Some((close_pos + 2, content))
420}
421
422fn is_valid_uri_autolink(s: &str) -> bool {
426 let bytes = s.as_bytes();
427 if bytes.is_empty() || !bytes[0].is_ascii_alphabetic() {
428 return false;
429 }
430 let mut i = 1;
431 while i < bytes.len() {
432 let b = bytes[i];
433 if b.is_ascii_alphanumeric() || b == b'+' || b == b'-' || b == b'.' {
434 i += 1;
435 } else {
436 break;
437 }
438 }
439 if !(2..=32).contains(&i) {
440 return false;
441 }
442 if i >= bytes.len() || bytes[i] != b':' {
443 return false;
444 }
445 for &b in &bytes[i + 1..] {
446 if b < 0x20 || b == 0x7f || b == b'<' || b == b'>' {
447 return false;
448 }
449 }
450 true
451}
452
453fn is_valid_email_autolink(s: &str) -> bool {
457 let Some(at) = s.find('@') else {
458 return false;
459 };
460 let local = &s[..at];
461 let domain = &s[at + 1..];
462 if local.is_empty() || !local.bytes().all(is_email_local_byte) {
463 return false;
464 }
465 if domain.is_empty() {
466 return false;
467 }
468 domain.split('.').all(is_valid_email_label)
469}
470
471fn is_email_local_byte(b: u8) -> bool {
472 matches!(
473 b,
474 b'a'..=b'z'
475 | b'A'..=b'Z'
476 | b'0'..=b'9'
477 | b'.'
478 | b'!'
479 | b'#'
480 | b'$'
481 | b'%'
482 | b'&'
483 | b'\''
484 | b'*'
485 | b'+'
486 | b'/'
487 | b'='
488 | b'?'
489 | b'^'
490 | b'_'
491 | b'`'
492 | b'{'
493 | b'|'
494 | b'}'
495 | b'~'
496 | b'-'
497 )
498}
499
500fn is_valid_email_label(label: &str) -> bool {
501 let bytes = label.as_bytes();
502 if bytes.is_empty() || bytes.len() > 63 {
503 return false;
504 }
505 if !bytes[0].is_ascii_alphanumeric() {
506 return false;
507 }
508 if !bytes[bytes.len() - 1].is_ascii_alphanumeric() {
509 return false;
510 }
511 bytes[1..bytes.len() - 1]
512 .iter()
513 .all(|b| b.is_ascii_alphanumeric() || *b == b'-')
514}
515
516pub fn emit_autolink(builder: &mut GreenNodeBuilder, _text: &str, url: &str) {
518 builder.start_node(SyntaxKind::AUTO_LINK.into());
519
520 builder.start_node(SyntaxKind::AUTO_LINK_MARKER.into());
522 builder.token(SyntaxKind::AUTO_LINK_MARKER.into(), "<");
523 builder.finish_node();
524
525 builder.token(SyntaxKind::TEXT.into(), url);
527
528 builder.start_node(SyntaxKind::AUTO_LINK_MARKER.into());
530 builder.token(SyntaxKind::AUTO_LINK_MARKER.into(), ">");
531 builder.finish_node();
532
533 builder.finish_node();
534}
535
536pub fn try_parse_bare_uri(text: &str) -> Option<(usize, &str)> {
537 let mut chars = text.char_indices();
538 let (_, first) = chars.next()?;
539 if !first.is_ascii_alphabetic() {
540 return None;
541 }
542
543 let mut scheme_end = None;
544 for (idx, ch) in text.char_indices() {
545 if ch == ':' {
546 scheme_end = Some(idx);
547 break;
548 }
549 if !ch.is_ascii_alphanumeric() && ch != '+' && ch != '-' && ch != '.' {
550 return None;
551 }
552 }
553 let scheme_end = scheme_end?;
554 if scheme_end == 0 {
555 return None;
556 }
557
558 let mut end = scheme_end + 1;
559 let bytes = text.as_bytes();
560 while end < text.len() {
561 let b = bytes[end];
562 if b.is_ascii_whitespace() {
563 break;
564 }
565 if matches!(b, b'<' | b'>' | b'`' | b'"' | b'\'') {
566 break;
567 }
568 end += 1;
569 }
570
571 if end == scheme_end + 1 {
572 return None;
573 }
574
575 let mut trimmed = end;
576 while trimmed > scheme_end + 1 {
577 let ch = text[..trimmed].chars().last().unwrap();
578 if matches!(ch, '.' | ',' | ';' | ':' | ')' | ']' | '}') {
579 trimmed -= ch.len_utf8();
580 } else {
581 break;
582 }
583 }
584
585 if trimmed <= scheme_end + 1 {
586 return None;
587 }
588
589 if text[..trimmed].ends_with('\\') {
592 return None;
593 }
594
595 Some((trimmed, &text[..trimmed]))
596}
597
598pub fn try_parse_inline_link(
610 text: &str,
611 strict_dest: bool,
612 ctx: LinkScanContext,
613) -> Option<(usize, &str, &str, Option<&str>)> {
614 if !text.starts_with('[') {
615 return None;
616 }
617
618 let close_bracket = find_link_close_bracket(text, 1, ctx)?;
620 let link_text = &text[1..close_bracket];
621
622 let after_bracket = close_bracket + 1;
624 if text.len() <= after_bracket || !text[after_bracket..].starts_with('(') {
625 return None;
626 }
627
628 let dest_start = after_bracket + 1;
630 let remaining = &text[dest_start..];
631
632 let close_paren = find_dest_close_paren(remaining)?;
633 let dest_content = &remaining[..close_paren];
634
635 if strict_dest && !dest_and_title_ok_commonmark(dest_content) {
636 return None;
637 }
638
639 if ctx.disallow_inner_links && link_text_contains_inner_link(link_text, ctx, strict_dest) {
642 return None;
643 }
644
645 let after_paren = dest_start + close_paren + 1;
647 let after_close = &text[after_paren..];
648
649 if after_close.starts_with('{') {
651 if let Some(close_brace_pos) = after_close.find('}') {
653 let attr_text = &after_close[..=close_brace_pos];
654 if let Some((_attrs, _)) = try_parse_trailing_attributes(attr_text) {
656 let total_len = after_paren + close_brace_pos + 1;
657 let raw_attrs = attr_text;
659 return Some((total_len, link_text, dest_content, Some(raw_attrs)));
660 }
661 }
662 }
663
664 let total_len = after_paren;
666 Some((total_len, link_text, dest_content, None))
667}
668
669fn dest_and_title_ok_commonmark(content: &str) -> bool {
678 let trimmed = trim_start_link_ws(content);
679 if trimmed.is_empty() {
680 return true;
681 }
682
683 let after_dest = if let Some(rest) = trimmed.strip_prefix('<') {
684 let mut escape = false;
685 let mut end_byte = None;
686 for (i, c) in rest.char_indices() {
687 if escape {
688 escape = false;
689 continue;
690 }
691 match c {
692 '\\' => escape = true,
693 '\n' | '<' => return false,
694 '>' => {
695 end_byte = Some(i);
696 break;
697 }
698 _ => {}
699 }
700 }
701 match end_byte {
702 Some(e) => &rest[e + 1..],
703 None => return false,
704 }
705 } else {
706 let mut escape = false;
707 let mut depth: i32 = 0;
708 let mut end = trimmed.len();
709 for (i, c) in trimmed.char_indices() {
710 if escape {
711 escape = false;
712 continue;
713 }
714 match c {
715 '\\' => escape = true,
716 ' ' | '\t' | '\n' => {
717 end = i;
718 break;
719 }
720 _ if c.is_ascii_control() => return false,
721 '(' => depth += 1,
722 ')' => {
723 if depth == 0 {
724 end = i;
725 break;
726 }
727 depth -= 1;
728 }
729 _ => {}
730 }
731 }
732 if depth != 0 {
733 return false;
734 }
735 if end == 0 {
736 return false;
738 }
739 &trimmed[end..]
740 };
741
742 let after_dest = trim_start_link_ws(after_dest);
743 if after_dest.is_empty() {
744 return true;
745 }
746
747 let bytes = after_dest.as_bytes();
748 let close = match bytes[0] {
749 b'"' => b'"',
750 b'\'' => b'\'',
751 b'(' => b')',
752 _ => return false,
753 };
754 let opens_paren = bytes[0] == b'(';
755 let mut escape = false;
756 let mut title_close_pos = None;
757 for (i, &b) in after_dest.as_bytes().iter().enumerate().skip(1) {
758 if escape {
759 escape = false;
760 continue;
761 }
762 if b == b'\\' {
763 escape = true;
764 continue;
765 }
766 if opens_paren && b == b'(' {
767 return false;
768 }
769 if b == close {
770 title_close_pos = Some(i);
771 break;
772 }
773 }
774 let close_idx = match title_close_pos {
775 Some(p) => p,
776 None => return false,
777 };
778
779 let after_title = &after_dest[close_idx + 1..];
780 is_link_ws_only(after_title)
781}
782
783#[inline]
788fn trim_start_link_ws(s: &str) -> &str {
789 let bytes = s.as_bytes();
790 let mut i = 0;
791 while i < bytes.len() {
792 let b = bytes[i];
793 if b == b' ' || b == b'\t' || b == b'\n' {
794 i += 1;
795 } else {
796 break;
797 }
798 }
799 unsafe { std::str::from_utf8_unchecked(&bytes[i..]) }
801}
802
803#[inline]
804fn is_link_ws_only(s: &str) -> bool {
805 s.as_bytes()
806 .iter()
807 .all(|&b| b == b' ' || b == b'\t' || b == b'\n')
808}
809
810pub fn emit_inline_link(
813 builder: &mut GreenNodeBuilder,
814 _text: &str,
815 link_text: &str,
816 dest: &str,
817 raw_attributes: Option<&str>,
818 config: &ParserOptions,
819 suppress_footnote_refs: bool,
820) {
821 builder.start_node(SyntaxKind::LINK.into());
822
823 builder.start_node(SyntaxKind::LINK_START.into());
825 builder.token(SyntaxKind::LINK_START.into(), "[");
826 builder.finish_node();
827
828 builder.start_node(SyntaxKind::LINK_TEXT.into());
834 parse_inline_text(builder, link_text, config, true, suppress_footnote_refs);
835 builder.finish_node();
836
837 builder.token(SyntaxKind::LINK_TEXT_END.into(), "]");
839
840 builder.token(SyntaxKind::LINK_DEST_START.into(), "(");
842
843 builder.start_node(SyntaxKind::LINK_DEST.into());
845 builder.token(SyntaxKind::TEXT.into(), dest);
846 builder.finish_node();
847
848 builder.token(SyntaxKind::LINK_DEST_END.into(), ")");
850
851 if let Some(raw_attrs) = raw_attributes {
853 builder.start_node(SyntaxKind::ATTRIBUTE.into());
854 builder.token(SyntaxKind::ATTRIBUTE.into(), raw_attrs);
855 builder.finish_node();
856 }
857
858 builder.finish_node();
859}
860
861pub fn emit_bare_uri_link(builder: &mut GreenNodeBuilder, uri: &str, _config: &ParserOptions) {
862 builder.start_node(SyntaxKind::LINK.into());
863
864 builder.start_node(SyntaxKind::LINK_START.into());
865 builder.token(SyntaxKind::LINK_START.into(), "[");
866 builder.finish_node();
867
868 builder.start_node(SyntaxKind::LINK_TEXT.into());
869 builder.token(SyntaxKind::TEXT.into(), uri);
870 builder.finish_node();
871
872 builder.token(SyntaxKind::LINK_TEXT_END.into(), "]");
873 builder.token(SyntaxKind::LINK_DEST_START.into(), "(");
874
875 builder.start_node(SyntaxKind::LINK_DEST.into());
876 builder.token(SyntaxKind::TEXT.into(), uri);
877 builder.finish_node();
878
879 builder.token(SyntaxKind::LINK_DEST_END.into(), ")");
880
881 builder.finish_node();
882}
883
884pub fn try_parse_reference_link(
894 text: &str,
895 allow_shortcut: bool,
896 inline_link_attempted: bool,
897 ctx: LinkScanContext,
898) -> Option<(usize, &str, String, bool)> {
899 if !text.starts_with('[') {
900 return None;
901 }
902
903 if text.len() > 1 {
905 let bytes = text.as_bytes();
906 if bytes[1] == b'@' {
907 return None;
908 }
909 if bytes[1] == b'-' && text.len() > 2 && bytes[2] == b'@' {
910 return None;
911 }
912 }
913
914 let close_bracket = find_link_close_bracket(text, 1, ctx)?;
920 let link_text = &text[1..close_bracket];
921
922 if ctx.disallow_inner_links
927 && link_text_contains_inner_link(link_text, ctx, ctx.disallow_inner_links)
928 {
929 return None;
930 }
931
932 let after_bracket = close_bracket + 1;
934
935 if after_bracket < text.len() && text[after_bracket..].starts_with('{') {
938 return None;
939 }
940
941 if after_bracket < text.len()
952 && text[after_bracket..].starts_with('(')
953 && (!allow_shortcut || !inline_link_attempted)
954 {
955 return None;
956 }
957
958 if after_bracket < text.len() && text[after_bracket..].starts_with('[') {
960 let label_start = after_bracket + 1;
962 let mut label_end = None;
963
964 for (i, ch) in text[label_start..].char_indices() {
965 if ch == ']' {
966 label_end = Some(i + label_start);
967 break;
968 }
969 if ch == '\n' {
971 return None;
972 }
973 }
974
975 let label_end = label_end?;
976 let label = &text[label_start..label_end];
977
978 let total_len = label_end + 1;
980
981 if label.is_empty() {
983 return Some((total_len, link_text, String::new(), false));
984 }
985
986 Some((total_len, link_text, label.to_string(), false))
988 } else if allow_shortcut {
989 if link_text.is_empty() {
992 return None;
993 }
994 Some((after_bracket, link_text, link_text.to_string(), true))
995 } else {
996 None
998 }
999}
1000
1001pub fn emit_reference_link(
1004 builder: &mut GreenNodeBuilder,
1005 link_text: &str,
1006 label: &str,
1007 is_shortcut: bool,
1008 config: &ParserOptions,
1009 suppress_footnote_refs: bool,
1010) {
1011 builder.start_node(SyntaxKind::LINK.into());
1012
1013 builder.start_node(SyntaxKind::LINK_START.into());
1015 builder.token(SyntaxKind::LINK_START.into(), "[");
1016 builder.finish_node();
1017
1018 builder.start_node(SyntaxKind::LINK_TEXT.into());
1023 parse_inline_text(builder, link_text, config, true, suppress_footnote_refs);
1024 builder.finish_node();
1025
1026 builder.token(SyntaxKind::TEXT.into(), "]");
1028
1029 if !is_shortcut {
1030 builder.token(SyntaxKind::TEXT.into(), "[");
1032 builder.start_node(SyntaxKind::LINK_REF.into());
1033 if !label.is_empty() {
1036 builder.token(SyntaxKind::TEXT.into(), label);
1037 }
1038 builder.finish_node();
1039 builder.token(SyntaxKind::TEXT.into(), "]");
1040 }
1041 builder.finish_node();
1044}
1045
1046pub fn try_parse_reference_image(
1049 text: &str,
1050 allow_shortcut: bool,
1051) -> Option<(usize, &str, String, bool)> {
1052 let bytes = text.as_bytes();
1053 if bytes.len() < 4 || bytes[0] != b'!' || bytes[1] != b'[' {
1054 return None;
1055 }
1056
1057 let mut pos = 2;
1058 let mut bracket_depth = 1;
1059 let alt_start = pos;
1060
1061 while pos < bytes.len() && bracket_depth > 0 {
1063 match bytes[pos] {
1064 b'[' => bracket_depth += 1,
1065 b']' => bracket_depth -= 1,
1066 b'\\' if pos + 1 < bytes.len() => pos += 1, _ => {}
1068 }
1069 pos += 1;
1070 }
1071
1072 if bracket_depth > 0 {
1073 return None; }
1075
1076 let alt_text = &text[alt_start..pos - 1];
1077
1078 if pos >= bytes.len() {
1080 return None;
1081 }
1082
1083 if bytes[pos] == b'[' {
1085 pos += 1;
1086 let label_start = pos;
1087
1088 while pos < bytes.len() && bytes[pos] != b']' && bytes[pos] != b'\n' && bytes[pos] != b'\r'
1090 {
1091 pos += 1;
1092 }
1093
1094 if pos >= bytes.len() || bytes[pos] != b']' {
1095 return None;
1096 }
1097
1098 let label_text = &text[label_start..pos];
1099 pos += 1;
1100
1101 let label = if label_text.is_empty() {
1104 alt_text.to_string() } else {
1106 label_text.to_string() };
1108
1109 return Some((pos, alt_text, label, false));
1110 }
1111
1112 if allow_shortcut {
1115 if pos < bytes.len() && bytes[pos] == b'(' {
1117 return None;
1118 }
1119
1120 let label = alt_text.to_string();
1122 return Some((pos, alt_text, label, true));
1123 }
1124
1125 None
1126}
1127
1128pub fn emit_reference_image(
1130 builder: &mut GreenNodeBuilder,
1131 alt_text: &str,
1132 label: &str,
1133 is_shortcut: bool,
1134 config: &ParserOptions,
1135 suppress_footnote_refs: bool,
1136) {
1137 builder.start_node(SyntaxKind::IMAGE_LINK.into());
1138
1139 builder.start_node(SyntaxKind::IMAGE_LINK_START.into());
1141 builder.token(SyntaxKind::IMAGE_LINK_START.into(), "![");
1142 builder.finish_node();
1143
1144 builder.start_node(SyntaxKind::IMAGE_ALT.into());
1146 parse_inline_text(builder, alt_text, config, false, suppress_footnote_refs);
1147 builder.finish_node();
1148
1149 builder.token(SyntaxKind::TEXT.into(), "]");
1151
1152 if !is_shortcut {
1153 builder.token(SyntaxKind::TEXT.into(), "[");
1155 builder.start_node(SyntaxKind::LINK_REF.into());
1156 if label != alt_text {
1158 builder.token(SyntaxKind::TEXT.into(), label);
1159 }
1160 builder.finish_node();
1161 builder.token(SyntaxKind::TEXT.into(), "]");
1162 }
1163 builder.finish_node();
1166}
1167
1168pub fn emit_unresolved_reference(
1179 builder: &mut GreenNodeBuilder,
1180 is_image: bool,
1181 text_content: &str,
1182 label_suffix: Option<&str>,
1183 config: &ParserOptions,
1184 suppress_footnote_refs: bool,
1185) {
1186 builder.start_node(SyntaxKind::UNRESOLVED_REFERENCE.into());
1187
1188 if is_image {
1189 builder.start_node(SyntaxKind::IMAGE_LINK_START.into());
1190 builder.token(SyntaxKind::IMAGE_LINK_START.into(), "![");
1191 builder.finish_node();
1192 builder.start_node(SyntaxKind::IMAGE_ALT.into());
1193 parse_inline_text(builder, text_content, config, false, suppress_footnote_refs);
1194 builder.finish_node();
1195 } else {
1196 builder.start_node(SyntaxKind::LINK_START.into());
1197 builder.token(SyntaxKind::LINK_START.into(), "[");
1198 builder.finish_node();
1199 builder.start_node(SyntaxKind::LINK_TEXT.into());
1200 parse_inline_text(builder, text_content, config, true, suppress_footnote_refs);
1201 builder.finish_node();
1202 }
1203
1204 builder.token(SyntaxKind::TEXT.into(), "]");
1205
1206 if let Some(suffix) = label_suffix {
1207 debug_assert!(suffix.starts_with('[') && suffix.ends_with(']'));
1211 builder.token(SyntaxKind::TEXT.into(), "[");
1212 let label = &suffix[1..suffix.len() - 1];
1213 builder.start_node(SyntaxKind::LINK_REF.into());
1214 if !label.is_empty() {
1215 builder.token(SyntaxKind::TEXT.into(), label);
1216 }
1217 builder.finish_node();
1218 builder.token(SyntaxKind::TEXT.into(), "]");
1219 }
1220
1221 builder.finish_node();
1222}
1223
1224#[cfg(test)]
1225mod tests {
1226 use super::*;
1227
1228 #[test]
1229 fn test_parse_autolink_url() {
1230 let input = "<https://example.com>";
1231 assert_eq!(
1232 try_parse_autolink(input, false),
1233 Some((21, "https://example.com"))
1234 );
1235 assert_eq!(
1236 try_parse_autolink(input, true),
1237 Some((21, "https://example.com"))
1238 );
1239 }
1240
1241 #[test]
1242 fn test_parse_autolink_email() {
1243 let input = "<user@example.com>";
1244 assert_eq!(
1245 try_parse_autolink(input, false),
1246 Some((18, "user@example.com"))
1247 );
1248 assert_eq!(
1249 try_parse_autolink(input, true),
1250 Some((18, "user@example.com"))
1251 );
1252 }
1253
1254 #[test]
1255 fn test_parse_autolink_no_close() {
1256 let input = "<https://example.com";
1257 assert_eq!(try_parse_autolink(input, false), None);
1258 assert_eq!(try_parse_autolink(input, true), None);
1259 }
1260
1261 #[test]
1262 fn test_parse_autolink_with_space() {
1263 let input = "<https://example.com >";
1264 assert_eq!(try_parse_autolink(input, false), None);
1265 assert_eq!(try_parse_autolink(input, true), None);
1266 }
1267
1268 #[test]
1269 fn test_parse_autolink_not_url_or_email() {
1270 let input = "<notaurl>";
1271 assert_eq!(try_parse_autolink(input, false), None);
1272 assert_eq!(try_parse_autolink(input, true), None);
1273 }
1274
1275 #[test]
1276 fn test_parse_autolink_commonmark_strict_scheme() {
1277 let input = "<m:abc>";
1280 assert_eq!(try_parse_autolink(input, true), None);
1281 assert_eq!(try_parse_autolink(input, false), Some((7, "m:abc")));
1282 }
1283
1284 #[test]
1285 fn test_parse_autolink_commonmark_email_disallows_backslash() {
1286 let input = "<foo\\+@bar.example.com>";
1287 assert_eq!(try_parse_autolink(input, true), None);
1288 assert_eq!(
1289 try_parse_autolink(input, false),
1290 Some((23, "foo\\+@bar.example.com"))
1291 );
1292 }
1293
1294 #[test]
1295 fn test_parse_inline_link_simple() {
1296 let input = "[text](url)";
1297 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1298 assert_eq!(result, Some((11, "text", "url", None)));
1299 }
1300
1301 #[test]
1302 fn test_parse_inline_link_with_title() {
1303 let input = r#"[text](url "title")"#;
1304 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1305 assert_eq!(result, Some((19, "text", r#"url "title""#, None)));
1306 }
1307
1308 #[test]
1309 fn test_parse_inline_link_with_nested_brackets() {
1310 let input = "[outer [inner] text](url)";
1311 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1312 assert_eq!(result, Some((25, "outer [inner] text", "url", None)));
1313 }
1314
1315 #[test]
1316 fn test_parse_inline_link_no_space_between_brackets_and_parens() {
1317 let input = "[text] (url)";
1318 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1319 assert_eq!(result, None);
1320 }
1321
1322 #[test]
1323 fn test_parse_inline_link_no_closing_bracket() {
1324 let input = "[text(url)";
1325 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1326 assert_eq!(result, None);
1327 }
1328
1329 #[test]
1330 fn test_parse_inline_link_no_closing_paren() {
1331 let input = "[text](url";
1332 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1333 assert_eq!(result, None);
1334 }
1335
1336 #[test]
1337 fn test_parse_inline_link_escaped_bracket() {
1338 let input = r"[text\]more](url)";
1339 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1340 assert_eq!(result, Some((17, r"text\]more", "url", None)));
1341 }
1342
1343 #[test]
1344 fn test_parse_inline_link_parens_in_url() {
1345 let input = "[text](url(with)parens)";
1346 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1347 assert_eq!(result, Some((23, "text", "url(with)parens", None)));
1348 }
1349
1350 #[test]
1351 fn test_parse_inline_image_simple() {
1352 let input = "";
1353 let result = try_parse_inline_image(input, LinkScanContext::default());
1354 assert_eq!(result, Some((17, "alt", "image.jpg", None)));
1355 }
1356
1357 #[test]
1358 fn test_parse_inline_image_with_title() {
1359 let input = r#""#;
1360 let result = try_parse_inline_image(input, LinkScanContext::default());
1361 assert_eq!(result, Some((27, "alt", r#"image.jpg "A title""#, None)));
1362 }
1363
1364 #[test]
1365 fn test_parse_inline_image_with_nested_brackets() {
1366 let input = "![outer [inner] alt](image.jpg)";
1367 let result = try_parse_inline_image(input, LinkScanContext::default());
1368 assert_eq!(result, Some((31, "outer [inner] alt", "image.jpg", None)));
1369 }
1370
1371 #[test]
1372 fn test_parse_bare_uri_rejects_dangling_backslash_after_trim() {
1373 let input = r"a:\]";
1374 let result = try_parse_bare_uri(input);
1375 assert_eq!(result, None);
1376 }
1377
1378 #[test]
1379 fn test_parse_inline_image_no_space_between_brackets_and_parens() {
1380 let input = "![alt] (image.jpg)";
1381 let result = try_parse_inline_image(input, LinkScanContext::default());
1382 assert_eq!(result, None);
1383 }
1384
1385 #[test]
1386 fn test_parse_inline_image_no_closing_bracket() {
1387 let input = "![alt(image.jpg)";
1388 let result = try_parse_inline_image(input, LinkScanContext::default());
1389 assert_eq!(result, None);
1390 }
1391
1392 #[test]
1393 fn test_parse_inline_image_no_closing_paren() {
1394 let input = ");
1396 assert_eq!(result, None);
1397 }
1398
1399 #[test]
1400 fn test_parse_inline_image_with_simple_class() {
1401 let input = "{.large}";
1402 let result = try_parse_inline_image(input, LinkScanContext::default());
1403 let (len, alt, dest, attrs) = result.unwrap();
1404 assert_eq!(len, 23);
1405 assert_eq!(alt, "alt");
1406 assert_eq!(dest, "img.png");
1407 assert!(attrs.is_some());
1408 let attrs = attrs.unwrap();
1409 assert_eq!(attrs, "{.large}");
1410 }
1411
1412 #[test]
1413 fn test_parse_inline_image_with_id() {
1414 let input = "{#fig-1}";
1415 let result = try_parse_inline_image(input, LinkScanContext::default());
1416 let (len, alt, dest, attrs) = result.unwrap();
1417 assert_eq!(len, 29);
1418 assert_eq!(alt, "Figure 1");
1419 assert_eq!(dest, "fig1.png");
1420 assert!(attrs.is_some());
1421 let attrs = attrs.unwrap();
1422 assert_eq!(attrs, "{#fig-1}");
1423 }
1424
1425 #[test]
1426 fn test_parse_inline_image_with_full_attributes() {
1427 let input = "{#fig .large width=\"80%\"}";
1428 let result = try_parse_inline_image(input, LinkScanContext::default());
1429 let (len, alt, dest, attrs) = result.unwrap();
1430 assert_eq!(len, 40);
1431 assert_eq!(alt, "alt");
1432 assert_eq!(dest, "img.png");
1433 assert!(attrs.is_some());
1434 let attrs = attrs.unwrap();
1435 assert_eq!(attrs, "{#fig .large width=\"80%\"}");
1436 }
1437
1438 #[test]
1439 fn test_parse_inline_image_attributes_must_be_adjacent() {
1440 let input = " {.large}";
1442 let result = try_parse_inline_image(input, LinkScanContext::default());
1443 assert_eq!(result, Some((15, "alt", "img.png", None)));
1444 }
1445
1446 #[test]
1448 fn test_parse_inline_link_with_id() {
1449 let input = "[text](url){#link-1}";
1450 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1451 let (len, text, dest, attrs) = result.unwrap();
1452 assert_eq!(len, 20);
1453 assert_eq!(text, "text");
1454 assert_eq!(dest, "url");
1455 assert!(attrs.is_some());
1456 let attrs = attrs.unwrap();
1457 assert_eq!(attrs, "{#link-1}");
1458 }
1459
1460 #[test]
1461 fn test_parse_inline_link_with_full_attributes() {
1462 let input = "[text](url){#link .external target=\"_blank\"}";
1463 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1464 let (len, text, dest, attrs) = result.unwrap();
1465 assert_eq!(len, 44);
1466 assert_eq!(text, "text");
1467 assert_eq!(dest, "url");
1468 assert!(attrs.is_some());
1469 let attrs = attrs.unwrap();
1470 assert_eq!(attrs, "{#link .external target=\"_blank\"}");
1471 }
1472
1473 #[test]
1474 fn test_parse_inline_link_attributes_must_be_adjacent() {
1475 let input = "[text](url) {.class}";
1477 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1478 assert_eq!(result, Some((11, "text", "url", None)));
1479 }
1480
1481 #[test]
1482 fn test_parse_inline_link_with_title_and_attributes() {
1483 let input = r#"[text](url "title"){.external}"#;
1484 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1485 let (len, text, dest, attrs) = result.unwrap();
1486 assert_eq!(len, 30);
1487 assert_eq!(text, "text");
1488 assert_eq!(dest, r#"url "title""#);
1489 assert!(attrs.is_some());
1490 let attrs = attrs.unwrap();
1491 assert_eq!(attrs, "{.external}");
1492 }
1493
1494 #[test]
1496 fn test_parse_reference_link_explicit() {
1497 let input = "[link text][label]";
1498 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1499 assert_eq!(result, Some((18, "link text", "label".to_string(), false)));
1500 }
1501
1502 #[test]
1503 fn test_parse_reference_link_implicit() {
1504 let input = "[link text][]";
1505 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1506 assert_eq!(result, Some((13, "link text", String::new(), false)));
1507 }
1508
1509 #[test]
1510 fn test_parse_reference_link_explicit_same_label_as_text() {
1511 let input = "[stack][stack]";
1512 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1513 assert_eq!(result, Some((14, "stack", "stack".to_string(), false)));
1514 }
1515
1516 #[test]
1517 fn test_parse_reference_link_shortcut() {
1518 let input = "[link text] rest";
1519 let result = try_parse_reference_link(input, true, true, LinkScanContext::default());
1520 assert_eq!(
1521 result,
1522 Some((11, "link text", "link text".to_string(), true))
1523 );
1524 }
1525
1526 #[test]
1527 fn test_parse_reference_link_shortcut_rejects_empty_label() {
1528 let input = "[] rest";
1529 let result = try_parse_reference_link(input, true, true, LinkScanContext::default());
1530 assert_eq!(result, None);
1531 }
1532
1533 #[test]
1534 fn test_parse_reference_link_shortcut_disabled() {
1535 let input = "[link text] rest";
1536 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1537 assert_eq!(result, None);
1538 }
1539
1540 #[test]
1541 fn test_parse_reference_link_not_inline_link() {
1542 let input = "[text](url)";
1545 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1546 assert_eq!(result, None);
1547 }
1548
1549 #[test]
1550 fn test_parse_reference_link_shortcut_falls_through_inline_link() {
1551 let input = "[text](url)";
1556 let result = try_parse_reference_link(input, true, true, LinkScanContext::default());
1557 assert_eq!(result, Some((6, "text", "text".to_string(), true)));
1558 }
1559
1560 #[test]
1561 fn test_parse_reference_link_with_nested_brackets() {
1562 let input = "[outer [inner] text][ref]";
1563 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1564 assert_eq!(
1565 result,
1566 Some((25, "outer [inner] text", "ref".to_string(), false))
1567 );
1568 }
1569
1570 #[test]
1571 fn test_parse_reference_link_label_no_newline() {
1572 let input = "[text][label\nmore]";
1573 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1574 assert_eq!(result, None);
1575 }
1576
1577 #[test]
1579 fn test_parse_reference_image_explicit() {
1580 let input = "![alt text][label]";
1581 let result = try_parse_reference_image(input, false);
1582 assert_eq!(result, Some((18, "alt text", "label".to_string(), false)));
1583 }
1584
1585 #[test]
1586 fn test_parse_reference_image_implicit() {
1587 let input = "![alt text][]";
1588 let result = try_parse_reference_image(input, false);
1589 assert_eq!(
1590 result,
1591 Some((13, "alt text", "alt text".to_string(), false))
1592 );
1593 }
1594
1595 #[test]
1596 fn test_parse_reference_image_shortcut() {
1597 let input = "![alt text] rest";
1598 let result = try_parse_reference_image(input, true);
1599 assert_eq!(result, Some((11, "alt text", "alt text".to_string(), true)));
1600 }
1601
1602 #[test]
1603 fn test_parse_reference_image_shortcut_disabled() {
1604 let input = "![alt text] rest";
1605 let result = try_parse_reference_image(input, false);
1606 assert_eq!(result, None);
1607 }
1608
1609 #[test]
1610 fn test_parse_reference_image_not_inline() {
1611 let input = "";
1613 let result = try_parse_reference_image(input, true);
1614 assert_eq!(result, None);
1615 }
1616
1617 #[test]
1618 fn test_parse_reference_image_with_nested_brackets() {
1619 let input = "![alt [nested] text][ref]";
1620 let result = try_parse_reference_image(input, false);
1621 assert_eq!(
1622 result,
1623 Some((25, "alt [nested] text", "ref".to_string(), false))
1624 );
1625 }
1626
1627 #[test]
1628 fn test_reference_link_label_with_crlf() {
1629 let input = "[foo\r\nbar]";
1631 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1632
1633 assert_eq!(
1635 result, None,
1636 "Should not parse reference link with CRLF in label"
1637 );
1638 }
1639
1640 #[test]
1641 fn test_reference_link_label_with_lf() {
1642 let input = "[foo\nbar]";
1644 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1645
1646 assert_eq!(
1648 result, None,
1649 "Should not parse reference link with LF in label"
1650 );
1651 }
1652
1653 #[test]
1655 fn test_parse_inline_link_multiline_text() {
1656 let input = "[text on\nline two](url)";
1658 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1659 assert_eq!(
1660 result,
1661 Some((23, "text on\nline two", "url", None)),
1662 "Link text should allow newlines"
1663 );
1664 }
1665
1666 #[test]
1667 fn test_parse_inline_link_multiline_with_formatting() {
1668 let input =
1670 "[A network graph. Different edges\nwith probability](../images/networkfig.png)";
1671 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1672 assert!(result.is_some(), "Link text with newlines should parse");
1673 let (len, text, _dest, _attrs) = result.unwrap();
1674 assert!(text.contains('\n'), "Link text should preserve newline");
1675 assert_eq!(len, input.len());
1676 }
1677
1678 #[test]
1679 fn test_parse_inline_image_multiline_alt() {
1680 let input = "";
1682 let result = try_parse_inline_image(input, LinkScanContext::default());
1683 assert_eq!(
1684 result,
1685 Some((27, "alt on\nline two", "img.png", None)),
1686 "Image alt text should allow newlines"
1687 );
1688 }
1689
1690 #[test]
1691 fn test_parse_inline_image_multiline_with_attributes() {
1692 let input = "{width=70%}";
1694 let result = try_parse_inline_image(input, LinkScanContext::default());
1695 assert!(
1696 result.is_some(),
1697 "Image alt with newlines and attributes should parse"
1698 );
1699 let (len, alt, dest, attrs) = result.unwrap();
1700 assert!(alt.contains('\n'), "Alt text should preserve newline");
1701 assert_eq!(dest, "../images/fig.png");
1702 assert_eq!(attrs, Some("{width=70%}"));
1703 assert_eq!(len, input.len());
1704 }
1705
1706 #[test]
1707 fn test_parse_inline_link_with_attributes_after_newline() {
1708 let input = "[A network graph.](../images/networkfig.png){width=70%}\nA word\n";
1711 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1712 assert!(
1713 result.is_some(),
1714 "Link with attributes should parse even with following text"
1715 );
1716 let (len, text, dest, attrs) = result.unwrap();
1717 assert_eq!(text, "A network graph.");
1718 assert_eq!(dest, "../images/networkfig.png");
1719 assert_eq!(attrs, Some("{width=70%}"), "Attributes should be captured");
1720 assert_eq!(
1721 len, 55,
1722 "Length should include attributes (up to closing brace)"
1723 );
1724 }
1725}