1use super::code_spans::try_parse_code_span;
13use super::core::parse_inline_text;
14use super::inline_html::try_parse_inline_html;
15use crate::options::ParserOptions;
16use crate::syntax::SyntaxKind;
17use rowan::GreenNodeBuilder;
18
19use crate::parser::utils::attributes::{emit_attribute_node, try_parse_trailing_attributes};
21
22#[derive(Clone, Copy)]
40pub struct LinkScanContext {
41 pub skip_raw_html: bool,
42 pub skip_autolinks: bool,
43 pub disallow_inner_links: bool,
44 pub dialect: crate::options::Dialect,
48}
49
50impl Default for LinkScanContext {
51 fn default() -> Self {
52 Self {
53 skip_raw_html: false,
54 skip_autolinks: false,
55 disallow_inner_links: false,
56 dialect: crate::options::Dialect::Pandoc,
57 }
58 }
59}
60
61impl LinkScanContext {
62 pub fn from_options(config: &ParserOptions) -> Self {
63 let is_commonmark = config.dialect == crate::options::Dialect::CommonMark;
64 Self {
65 skip_raw_html: config.extensions.raw_html,
66 skip_autolinks: config.extensions.autolinks && is_commonmark,
67 disallow_inner_links: is_commonmark,
68 dialect: config.dialect,
69 }
70 }
71}
72
73fn find_link_close_bracket(text: &str, start: usize, ctx: LinkScanContext) -> Option<usize> {
84 let bytes = text.as_bytes();
85 let mut bracket_depth = 0;
86 let mut escape_next = false;
87 let mut i = start;
88
89 while i < bytes.len() {
90 let b = bytes[i];
91
92 if escape_next {
93 escape_next = false;
94 i += step(text, i);
95 continue;
96 }
97
98 match b {
99 b'\\' => {
100 escape_next = true;
101 i += 1;
102 }
103 b'`' => {
104 if let Some((len, _, _, _)) = try_parse_code_span(&text[i..]) {
105 i += len;
106 } else {
107 i += 1;
108 }
109 }
110 b'<' => {
111 if ctx.skip_autolinks
116 && let Some((len, _)) = try_parse_autolink(&text[i..], true)
117 {
118 i += len;
119 } else if ctx.skip_raw_html
120 && let Some(len) = try_parse_inline_html(&text[i..], ctx.dialect)
121 {
122 i += len;
123 } else {
124 i += 1;
125 }
126 }
127 b'[' => {
128 bracket_depth += 1;
129 i += 1;
130 }
131 b']' => {
132 if bracket_depth == 0 {
133 return Some(i);
134 }
135 bracket_depth -= 1;
136 i += 1;
137 }
138 _ => i += step(text, i),
139 }
140 }
141 None
142}
143
144fn find_dest_close_paren(remaining: &str) -> Option<usize> {
150 let bytes = remaining.as_bytes();
151 let mut paren_depth = 0;
152 let mut escape_next = false;
153 let mut in_quotes = false;
154 let mut in_angle = false;
155 let mut i = 0;
156
157 while i < bytes.len() {
158 let b = bytes[i];
159
160 if escape_next {
161 escape_next = false;
162 i += step(remaining, i);
163 continue;
164 }
165
166 match b {
167 b'\\' => {
168 escape_next = true;
169 i += 1;
170 }
171 b'<' if !in_quotes && !in_angle => {
172 in_angle = true;
173 i += 1;
174 }
175 b'>' if in_angle => {
176 in_angle = false;
177 i += 1;
178 }
179 b'"' if !in_angle => {
180 in_quotes = !in_quotes;
181 i += 1;
182 }
183 b'(' if !in_quotes && !in_angle => {
184 paren_depth += 1;
185 i += 1;
186 }
187 b')' if !in_quotes && !in_angle => {
188 if paren_depth == 0 {
189 return Some(i);
190 }
191 paren_depth -= 1;
192 i += 1;
193 }
194 _ => i += step(remaining, i),
195 }
196 }
197 None
198}
199
200fn step(s: &str, i: usize) -> usize {
204 s[i..].chars().next().map(|c| c.len_utf8()).unwrap_or(1)
205}
206
207fn link_text_contains_inner_link(text: &str, ctx: LinkScanContext, strict_dest: bool) -> bool {
224 let bytes = text.as_bytes();
225 let mut i = 0;
226 let mut escape_next = false;
227 while i < bytes.len() {
228 let b = bytes[i];
229 if escape_next {
230 escape_next = false;
231 i += step(text, i);
232 continue;
233 }
234 match b {
235 b'\\' => {
236 escape_next = true;
237 i += 1;
238 }
239 b'`' => {
240 if let Some((len, _, _, _)) = try_parse_code_span(&text[i..]) {
241 i += len;
242 } else {
243 i += 1;
244 }
245 }
246 b'<' => {
247 if ctx.skip_autolinks
248 && let Some((len, _)) = try_parse_autolink(&text[i..], true)
249 {
250 i += len;
251 } else if ctx.skip_raw_html
252 && let Some(len) = try_parse_inline_html(&text[i..], ctx.dialect)
253 {
254 i += len;
255 } else {
256 i += 1;
257 }
258 }
259 b'!' if i + 1 < bytes.len() && bytes[i + 1] == b'[' => {
260 if let Some((len, alt, _, _)) = try_parse_inline_image(&text[i..], ctx) {
261 if link_text_contains_inner_link(alt, ctx, strict_dest) {
262 return true;
263 }
264 i += len;
265 } else {
266 i += 2;
267 }
268 }
269 b'[' => {
270 if try_parse_inline_link(&text[i..], strict_dest, ctx).is_some() {
271 return true;
272 }
273 i += 1;
274 }
275 _ => i += step(text, i),
276 }
277 }
278 false
279}
280
281pub fn try_parse_inline_image(
290 text: &str,
291 ctx: LinkScanContext,
292) -> Option<(usize, &str, &str, Option<&str>)> {
293 if !text.starts_with("![") {
294 return None;
295 }
296
297 let close_bracket = find_link_close_bracket(text, 2, ctx)?;
299 let alt_text = &text[2..close_bracket];
300
301 let after_bracket = close_bracket + 1;
303 if text.len() <= after_bracket || !text[after_bracket..].starts_with('(') {
304 return None;
305 }
306
307 let dest_start = after_bracket + 1;
309 let remaining = &text[dest_start..];
310
311 let close_paren = find_dest_close_paren(remaining)?;
312 let dest_content = &remaining[..close_paren];
313
314 let after_paren = dest_start + close_paren + 1;
316 let after_close = &text[after_paren..];
317
318 if after_close.starts_with('{') {
320 if let Some(close_brace_pos) = after_close.find('}') {
322 let attr_text = &after_close[..=close_brace_pos];
323 if let Some((_attrs, _)) = try_parse_trailing_attributes(attr_text) {
325 let total_len = after_paren + close_brace_pos + 1;
326 let raw_attrs = attr_text;
328 return Some((total_len, alt_text, dest_content, Some(raw_attrs)));
329 }
330 }
331 }
332
333 let total_len = after_paren;
335 Some((total_len, alt_text, dest_content, None))
336}
337
338pub fn emit_inline_image(
341 builder: &mut GreenNodeBuilder,
342 _text: &str,
343 alt_text: &str,
344 dest: &str,
345 raw_attributes: Option<&str>,
346 config: &ParserOptions,
347 suppress_footnote_refs: bool,
348) {
349 builder.start_node(SyntaxKind::IMAGE_LINK.into());
350
351 builder.start_node(SyntaxKind::IMAGE_LINK_START.into());
353 builder.token(SyntaxKind::IMAGE_LINK_START.into(), "![");
354 builder.finish_node();
355
356 builder.start_node(SyntaxKind::IMAGE_ALT.into());
358 parse_inline_text(builder, alt_text, config, false, suppress_footnote_refs);
361 builder.finish_node();
362
363 builder.token(SyntaxKind::IMAGE_ALT_END.into(), "]");
365
366 builder.token(SyntaxKind::IMAGE_DEST_START.into(), "(");
368
369 builder.start_node(SyntaxKind::LINK_DEST.into());
371 builder.token(SyntaxKind::TEXT.into(), dest);
372 builder.finish_node();
373
374 builder.token(SyntaxKind::IMAGE_DEST_END.into(), ")");
376
377 if let Some(raw_attrs) = raw_attributes {
379 emit_attribute_node(builder, raw_attrs);
380 }
381
382 builder.finish_node();
383}
384
385pub fn try_parse_autolink(text: &str, is_commonmark: bool) -> Option<(usize, &str)> {
395 if !text.starts_with('<') {
396 return None;
397 }
398
399 let close_pos = text[1..].find('>')?;
400 let content = &text[1..1 + close_pos];
401
402 if content.is_empty() {
403 return None;
404 }
405 if content.contains(|c: char| c.is_whitespace()) {
406 return None;
407 }
408
409 if is_commonmark {
410 if !is_valid_uri_autolink(content) && !is_valid_email_autolink(content) {
411 return None;
412 }
413 } else if !content.contains(':') && !content.contains('@') {
414 return None;
415 }
416
417 Some((close_pos + 2, content))
418}
419
420fn is_valid_uri_autolink(s: &str) -> bool {
424 let bytes = s.as_bytes();
425 if bytes.is_empty() || !bytes[0].is_ascii_alphabetic() {
426 return false;
427 }
428 let mut i = 1;
429 while i < bytes.len() {
430 let b = bytes[i];
431 if b.is_ascii_alphanumeric() || b == b'+' || b == b'-' || b == b'.' {
432 i += 1;
433 } else {
434 break;
435 }
436 }
437 if !(2..=32).contains(&i) {
438 return false;
439 }
440 if i >= bytes.len() || bytes[i] != b':' {
441 return false;
442 }
443 for &b in &bytes[i + 1..] {
444 if b < 0x20 || b == 0x7f || b == b'<' || b == b'>' {
445 return false;
446 }
447 }
448 true
449}
450
451fn is_valid_email_autolink(s: &str) -> bool {
455 let Some(at) = s.find('@') else {
456 return false;
457 };
458 let local = &s[..at];
459 let domain = &s[at + 1..];
460 if local.is_empty() || !local.bytes().all(is_email_local_byte) {
461 return false;
462 }
463 if domain.is_empty() {
464 return false;
465 }
466 domain.split('.').all(is_valid_email_label)
467}
468
469fn is_email_local_byte(b: u8) -> bool {
470 matches!(
471 b,
472 b'a'..=b'z'
473 | b'A'..=b'Z'
474 | b'0'..=b'9'
475 | b'.'
476 | b'!'
477 | b'#'
478 | b'$'
479 | b'%'
480 | b'&'
481 | b'\''
482 | b'*'
483 | b'+'
484 | b'/'
485 | b'='
486 | b'?'
487 | b'^'
488 | b'_'
489 | b'`'
490 | b'{'
491 | b'|'
492 | b'}'
493 | b'~'
494 | b'-'
495 )
496}
497
498fn is_valid_email_label(label: &str) -> bool {
499 let bytes = label.as_bytes();
500 if bytes.is_empty() || bytes.len() > 63 {
501 return false;
502 }
503 if !bytes[0].is_ascii_alphanumeric() {
504 return false;
505 }
506 if !bytes[bytes.len() - 1].is_ascii_alphanumeric() {
507 return false;
508 }
509 bytes[1..bytes.len() - 1]
510 .iter()
511 .all(|b| b.is_ascii_alphanumeric() || *b == b'-')
512}
513
514pub fn emit_autolink(builder: &mut GreenNodeBuilder, _text: &str, url: &str) {
516 builder.start_node(SyntaxKind::AUTO_LINK.into());
517
518 builder.start_node(SyntaxKind::AUTO_LINK_MARKER.into());
520 builder.token(SyntaxKind::AUTO_LINK_MARKER.into(), "<");
521 builder.finish_node();
522
523 builder.token(SyntaxKind::TEXT.into(), url);
525
526 builder.start_node(SyntaxKind::AUTO_LINK_MARKER.into());
528 builder.token(SyntaxKind::AUTO_LINK_MARKER.into(), ">");
529 builder.finish_node();
530
531 builder.finish_node();
532}
533
534include!(concat!(env!("OUT_DIR"), "/uri_schemes.rs"));
540
541fn is_known_bare_uri_scheme(scheme: &str) -> bool {
544 let lower = scheme.to_ascii_lowercase();
545 BARE_URI_SCHEMES.binary_search(&lower.as_str()).is_ok()
546}
547
548pub fn try_parse_bare_uri(text: &str) -> Option<(usize, &str)> {
549 let mut chars = text.char_indices();
550 let (_, first) = chars.next()?;
551 if !first.is_ascii_alphabetic() {
552 return None;
553 }
554
555 let mut scheme_end = None;
556 for (idx, ch) in text.char_indices() {
557 if ch == ':' {
558 scheme_end = Some(idx);
559 break;
560 }
561 if !ch.is_ascii_alphanumeric() && ch != '+' && ch != '-' && ch != '.' {
562 return None;
563 }
564 }
565 let scheme_end = scheme_end?;
566 if scheme_end == 0 {
567 return None;
568 }
569
570 if !is_known_bare_uri_scheme(&text[..scheme_end]) {
571 return None;
572 }
573
574 let mut end = scheme_end + 1;
575 let bytes = text.as_bytes();
576 while end < text.len() {
577 let b = bytes[end];
578 if b.is_ascii_whitespace() {
579 break;
580 }
581 if matches!(b, b'<' | b'>' | b'`' | b'"' | b'\'') {
582 break;
583 }
584 end += 1;
585 }
586
587 if end == scheme_end + 1 {
588 return None;
589 }
590
591 let mut trimmed = end;
592 while trimmed > scheme_end + 1 {
593 let ch = text[..trimmed].chars().last().unwrap();
594 if matches!(ch, '.' | ',' | ';' | ':' | ')' | ']' | '}') {
595 trimmed -= ch.len_utf8();
596 } else {
597 break;
598 }
599 }
600
601 if trimmed <= scheme_end + 1 {
602 return None;
603 }
604
605 if text[..trimmed].ends_with('\\') {
608 return None;
609 }
610
611 Some((trimmed, &text[..trimmed]))
612}
613
614pub fn try_parse_inline_link(
626 text: &str,
627 strict_dest: bool,
628 ctx: LinkScanContext,
629) -> Option<(usize, &str, &str, Option<&str>)> {
630 if !text.starts_with('[') {
631 return None;
632 }
633
634 let close_bracket = find_link_close_bracket(text, 1, ctx)?;
636 let link_text = &text[1..close_bracket];
637
638 let after_bracket = close_bracket + 1;
640 if text.len() <= after_bracket || !text[after_bracket..].starts_with('(') {
641 return None;
642 }
643
644 let dest_start = after_bracket + 1;
646 let remaining = &text[dest_start..];
647
648 let close_paren = find_dest_close_paren(remaining)?;
649 let dest_content = &remaining[..close_paren];
650
651 if strict_dest && !dest_and_title_ok_commonmark(dest_content) {
652 return None;
653 }
654
655 if ctx.disallow_inner_links && link_text_contains_inner_link(link_text, ctx, strict_dest) {
658 return None;
659 }
660
661 let after_paren = dest_start + close_paren + 1;
663 let after_close = &text[after_paren..];
664
665 if after_close.starts_with('{') {
667 if let Some(close_brace_pos) = after_close.find('}') {
669 let attr_text = &after_close[..=close_brace_pos];
670 if let Some((_attrs, _)) = try_parse_trailing_attributes(attr_text) {
672 let total_len = after_paren + close_brace_pos + 1;
673 let raw_attrs = attr_text;
675 return Some((total_len, link_text, dest_content, Some(raw_attrs)));
676 }
677 }
678 }
679
680 let total_len = after_paren;
682 Some((total_len, link_text, dest_content, None))
683}
684
685fn dest_and_title_ok_commonmark(content: &str) -> bool {
694 let trimmed = trim_start_link_ws(content);
695 if trimmed.is_empty() {
696 return true;
697 }
698
699 let after_dest = if let Some(rest) = trimmed.strip_prefix('<') {
700 let mut escape = false;
701 let mut end_byte = None;
702 for (i, c) in rest.char_indices() {
703 if escape {
704 escape = false;
705 continue;
706 }
707 match c {
708 '\\' => escape = true,
709 '\n' | '<' => return false,
710 '>' => {
711 end_byte = Some(i);
712 break;
713 }
714 _ => {}
715 }
716 }
717 match end_byte {
718 Some(e) => &rest[e + 1..],
719 None => return false,
720 }
721 } else {
722 let mut escape = false;
723 let mut depth: i32 = 0;
724 let mut end = trimmed.len();
725 for (i, c) in trimmed.char_indices() {
726 if escape {
727 escape = false;
728 continue;
729 }
730 match c {
731 '\\' => escape = true,
732 ' ' | '\t' | '\n' => {
733 end = i;
734 break;
735 }
736 _ if c.is_ascii_control() => return false,
737 '(' => depth += 1,
738 ')' => {
739 if depth == 0 {
740 end = i;
741 break;
742 }
743 depth -= 1;
744 }
745 _ => {}
746 }
747 }
748 if depth != 0 {
749 return false;
750 }
751 if end == 0 {
752 return false;
754 }
755 &trimmed[end..]
756 };
757
758 let after_dest = trim_start_link_ws(after_dest);
759 if after_dest.is_empty() {
760 return true;
761 }
762
763 let bytes = after_dest.as_bytes();
764 let close = match bytes[0] {
765 b'"' => b'"',
766 b'\'' => b'\'',
767 b'(' => b')',
768 _ => return false,
769 };
770 let opens_paren = bytes[0] == b'(';
771 let mut escape = false;
772 let mut title_close_pos = None;
773 for (i, &b) in after_dest.as_bytes().iter().enumerate().skip(1) {
774 if escape {
775 escape = false;
776 continue;
777 }
778 if b == b'\\' {
779 escape = true;
780 continue;
781 }
782 if opens_paren && b == b'(' {
783 return false;
784 }
785 if b == close {
786 title_close_pos = Some(i);
787 break;
788 }
789 }
790 let close_idx = match title_close_pos {
791 Some(p) => p,
792 None => return false,
793 };
794
795 let after_title = &after_dest[close_idx + 1..];
796 is_link_ws_only(after_title)
797}
798
799#[inline]
804fn trim_start_link_ws(s: &str) -> &str {
805 let bytes = s.as_bytes();
806 let mut i = 0;
807 while i < bytes.len() {
808 let b = bytes[i];
809 if b == b' ' || b == b'\t' || b == b'\n' {
810 i += 1;
811 } else {
812 break;
813 }
814 }
815 unsafe { std::str::from_utf8_unchecked(&bytes[i..]) }
817}
818
819#[inline]
820fn is_link_ws_only(s: &str) -> bool {
821 s.as_bytes()
822 .iter()
823 .all(|&b| b == b' ' || b == b'\t' || b == b'\n')
824}
825
826pub fn emit_inline_link(
829 builder: &mut GreenNodeBuilder,
830 _text: &str,
831 link_text: &str,
832 dest: &str,
833 raw_attributes: Option<&str>,
834 config: &ParserOptions,
835 suppress_footnote_refs: bool,
836) {
837 builder.start_node(SyntaxKind::LINK.into());
838
839 builder.start_node(SyntaxKind::LINK_START.into());
841 builder.token(SyntaxKind::LINK_START.into(), "[");
842 builder.finish_node();
843
844 builder.start_node(SyntaxKind::LINK_TEXT.into());
850 parse_inline_text(builder, link_text, config, true, suppress_footnote_refs);
851 builder.finish_node();
852
853 builder.token(SyntaxKind::LINK_TEXT_END.into(), "]");
855
856 builder.token(SyntaxKind::LINK_DEST_START.into(), "(");
858
859 builder.start_node(SyntaxKind::LINK_DEST.into());
861 builder.token(SyntaxKind::TEXT.into(), dest);
862 builder.finish_node();
863
864 builder.token(SyntaxKind::LINK_DEST_END.into(), ")");
866
867 if let Some(raw_attrs) = raw_attributes {
869 emit_attribute_node(builder, raw_attrs);
870 }
871
872 builder.finish_node();
873}
874
875pub fn emit_bare_uri_link(builder: &mut GreenNodeBuilder, uri: &str, _config: &ParserOptions) {
876 builder.start_node(SyntaxKind::LINK.into());
877
878 builder.start_node(SyntaxKind::LINK_START.into());
879 builder.token(SyntaxKind::LINK_START.into(), "[");
880 builder.finish_node();
881
882 builder.start_node(SyntaxKind::LINK_TEXT.into());
883 builder.token(SyntaxKind::TEXT.into(), uri);
884 builder.finish_node();
885
886 builder.token(SyntaxKind::LINK_TEXT_END.into(), "]");
887 builder.token(SyntaxKind::LINK_DEST_START.into(), "(");
888
889 builder.start_node(SyntaxKind::LINK_DEST.into());
890 builder.token(SyntaxKind::TEXT.into(), uri);
891 builder.finish_node();
892
893 builder.token(SyntaxKind::LINK_DEST_END.into(), ")");
894
895 builder.finish_node();
896}
897
898pub fn try_parse_reference_link(
908 text: &str,
909 allow_shortcut: bool,
910 inline_link_attempted: bool,
911 ctx: LinkScanContext,
912) -> Option<(usize, &str, String, bool)> {
913 if !text.starts_with('[') {
914 return None;
915 }
916
917 if text.len() > 1 {
919 let bytes = text.as_bytes();
920 if bytes[1] == b'@' {
921 return None;
922 }
923 if bytes[1] == b'-' && text.len() > 2 && bytes[2] == b'@' {
924 return None;
925 }
926 }
927
928 let close_bracket = find_link_close_bracket(text, 1, ctx)?;
934 let link_text = &text[1..close_bracket];
935
936 if ctx.disallow_inner_links
941 && link_text_contains_inner_link(link_text, ctx, ctx.disallow_inner_links)
942 {
943 return None;
944 }
945
946 let after_bracket = close_bracket + 1;
948
949 if after_bracket < text.len() && text[after_bracket..].starts_with('{') {
952 return None;
953 }
954
955 if after_bracket < text.len()
966 && text[after_bracket..].starts_with('(')
967 && (!allow_shortcut || !inline_link_attempted)
968 {
969 return None;
970 }
971
972 if after_bracket < text.len() && text[after_bracket..].starts_with('[') {
974 let label_start = after_bracket + 1;
976 let mut label_end = None;
977
978 for (i, ch) in text[label_start..].char_indices() {
979 if ch == ']' {
980 label_end = Some(i + label_start);
981 break;
982 }
983 if ch == '\n' {
985 return None;
986 }
987 }
988
989 let label_end = label_end?;
990 let label = &text[label_start..label_end];
991
992 let total_len = label_end + 1;
994
995 if label.is_empty() {
997 return Some((total_len, link_text, String::new(), false));
998 }
999
1000 Some((total_len, link_text, label.to_string(), false))
1002 } else if allow_shortcut {
1003 if link_text.is_empty() {
1006 return None;
1007 }
1008 Some((after_bracket, link_text, link_text.to_string(), true))
1009 } else {
1010 None
1012 }
1013}
1014
1015pub fn emit_reference_link(
1018 builder: &mut GreenNodeBuilder,
1019 link_text: &str,
1020 label: &str,
1021 is_shortcut: bool,
1022 config: &ParserOptions,
1023 suppress_footnote_refs: bool,
1024) {
1025 builder.start_node(SyntaxKind::LINK.into());
1026
1027 builder.start_node(SyntaxKind::LINK_START.into());
1029 builder.token(SyntaxKind::LINK_START.into(), "[");
1030 builder.finish_node();
1031
1032 builder.start_node(SyntaxKind::LINK_TEXT.into());
1037 parse_inline_text(builder, link_text, config, true, suppress_footnote_refs);
1038 builder.finish_node();
1039
1040 builder.token(SyntaxKind::TEXT.into(), "]");
1042
1043 if !is_shortcut {
1044 builder.token(SyntaxKind::TEXT.into(), "[");
1046 builder.start_node(SyntaxKind::LINK_REF.into());
1047 if !label.is_empty() {
1050 builder.token(SyntaxKind::TEXT.into(), label);
1051 }
1052 builder.finish_node();
1053 builder.token(SyntaxKind::TEXT.into(), "]");
1054 }
1055 builder.finish_node();
1058}
1059
1060pub fn try_parse_reference_image(
1063 text: &str,
1064 allow_shortcut: bool,
1065) -> Option<(usize, &str, String, bool)> {
1066 let bytes = text.as_bytes();
1067 if bytes.len() < 4 || bytes[0] != b'!' || bytes[1] != b'[' {
1068 return None;
1069 }
1070
1071 let mut pos = 2;
1072 let mut bracket_depth = 1;
1073 let alt_start = pos;
1074
1075 while pos < bytes.len() && bracket_depth > 0 {
1077 match bytes[pos] {
1078 b'[' => bracket_depth += 1,
1079 b']' => bracket_depth -= 1,
1080 b'\\' if pos + 1 < bytes.len() => pos += 1, _ => {}
1082 }
1083 pos += 1;
1084 }
1085
1086 if bracket_depth > 0 {
1087 return None; }
1089
1090 let alt_text = &text[alt_start..pos - 1];
1091
1092 if pos >= bytes.len() {
1094 return None;
1095 }
1096
1097 if bytes[pos] == b'[' {
1099 pos += 1;
1100 let label_start = pos;
1101
1102 while pos < bytes.len() && bytes[pos] != b']' && bytes[pos] != b'\n' && bytes[pos] != b'\r'
1104 {
1105 pos += 1;
1106 }
1107
1108 if pos >= bytes.len() || bytes[pos] != b']' {
1109 return None;
1110 }
1111
1112 let label_text = &text[label_start..pos];
1113 pos += 1;
1114
1115 let label = if label_text.is_empty() {
1118 alt_text.to_string() } else {
1120 label_text.to_string() };
1122
1123 return Some((pos, alt_text, label, false));
1124 }
1125
1126 if allow_shortcut {
1129 if pos < bytes.len() && bytes[pos] == b'(' {
1131 return None;
1132 }
1133
1134 let label = alt_text.to_string();
1136 return Some((pos, alt_text, label, true));
1137 }
1138
1139 None
1140}
1141
1142pub fn emit_reference_image(
1144 builder: &mut GreenNodeBuilder,
1145 alt_text: &str,
1146 label: &str,
1147 is_shortcut: bool,
1148 config: &ParserOptions,
1149 suppress_footnote_refs: bool,
1150) {
1151 builder.start_node(SyntaxKind::IMAGE_LINK.into());
1152
1153 builder.start_node(SyntaxKind::IMAGE_LINK_START.into());
1155 builder.token(SyntaxKind::IMAGE_LINK_START.into(), "![");
1156 builder.finish_node();
1157
1158 builder.start_node(SyntaxKind::IMAGE_ALT.into());
1160 parse_inline_text(builder, alt_text, config, false, suppress_footnote_refs);
1161 builder.finish_node();
1162
1163 builder.token(SyntaxKind::TEXT.into(), "]");
1165
1166 if !is_shortcut {
1167 builder.token(SyntaxKind::TEXT.into(), "[");
1169 builder.start_node(SyntaxKind::LINK_REF.into());
1170 if label != alt_text {
1172 builder.token(SyntaxKind::TEXT.into(), label);
1173 }
1174 builder.finish_node();
1175 builder.token(SyntaxKind::TEXT.into(), "]");
1176 }
1177 builder.finish_node();
1180}
1181
1182pub fn emit_unresolved_reference(
1193 builder: &mut GreenNodeBuilder,
1194 is_image: bool,
1195 text_content: &str,
1196 label_suffix: Option<&str>,
1197 config: &ParserOptions,
1198 suppress_footnote_refs: bool,
1199) {
1200 builder.start_node(SyntaxKind::UNRESOLVED_REFERENCE.into());
1201
1202 if is_image {
1203 builder.start_node(SyntaxKind::IMAGE_LINK_START.into());
1204 builder.token(SyntaxKind::IMAGE_LINK_START.into(), "![");
1205 builder.finish_node();
1206 builder.start_node(SyntaxKind::IMAGE_ALT.into());
1207 parse_inline_text(builder, text_content, config, false, suppress_footnote_refs);
1208 builder.finish_node();
1209 } else {
1210 builder.start_node(SyntaxKind::LINK_START.into());
1211 builder.token(SyntaxKind::LINK_START.into(), "[");
1212 builder.finish_node();
1213 builder.start_node(SyntaxKind::LINK_TEXT.into());
1214 parse_inline_text(builder, text_content, config, true, suppress_footnote_refs);
1215 builder.finish_node();
1216 }
1217
1218 builder.token(SyntaxKind::TEXT.into(), "]");
1219
1220 if let Some(suffix) = label_suffix {
1221 debug_assert!(suffix.starts_with('[') && suffix.ends_with(']'));
1225 builder.token(SyntaxKind::TEXT.into(), "[");
1226 let label = &suffix[1..suffix.len() - 1];
1227 builder.start_node(SyntaxKind::LINK_REF.into());
1228 if !label.is_empty() {
1229 builder.token(SyntaxKind::TEXT.into(), label);
1230 }
1231 builder.finish_node();
1232 builder.token(SyntaxKind::TEXT.into(), "]");
1233 }
1234
1235 builder.finish_node();
1236}
1237
1238#[cfg(test)]
1239mod tests {
1240 use super::*;
1241
1242 #[test]
1243 fn test_parse_autolink_url() {
1244 let input = "<https://example.com>";
1245 assert_eq!(
1246 try_parse_autolink(input, false),
1247 Some((21, "https://example.com"))
1248 );
1249 assert_eq!(
1250 try_parse_autolink(input, true),
1251 Some((21, "https://example.com"))
1252 );
1253 }
1254
1255 #[test]
1256 fn test_parse_autolink_email() {
1257 let input = "<user@example.com>";
1258 assert_eq!(
1259 try_parse_autolink(input, false),
1260 Some((18, "user@example.com"))
1261 );
1262 assert_eq!(
1263 try_parse_autolink(input, true),
1264 Some((18, "user@example.com"))
1265 );
1266 }
1267
1268 #[test]
1269 fn test_parse_autolink_no_close() {
1270 let input = "<https://example.com";
1271 assert_eq!(try_parse_autolink(input, false), None);
1272 assert_eq!(try_parse_autolink(input, true), None);
1273 }
1274
1275 #[test]
1276 fn test_parse_autolink_with_space() {
1277 let input = "<https://example.com >";
1278 assert_eq!(try_parse_autolink(input, false), None);
1279 assert_eq!(try_parse_autolink(input, true), None);
1280 }
1281
1282 #[test]
1283 fn test_parse_autolink_not_url_or_email() {
1284 let input = "<notaurl>";
1285 assert_eq!(try_parse_autolink(input, false), None);
1286 assert_eq!(try_parse_autolink(input, true), None);
1287 }
1288
1289 #[test]
1290 fn test_parse_autolink_commonmark_strict_scheme() {
1291 let input = "<m:abc>";
1294 assert_eq!(try_parse_autolink(input, true), None);
1295 assert_eq!(try_parse_autolink(input, false), Some((7, "m:abc")));
1296 }
1297
1298 #[test]
1299 fn test_parse_autolink_commonmark_email_disallows_backslash() {
1300 let input = "<foo\\+@bar.example.com>";
1301 assert_eq!(try_parse_autolink(input, true), None);
1302 assert_eq!(
1303 try_parse_autolink(input, false),
1304 Some((23, "foo\\+@bar.example.com"))
1305 );
1306 }
1307
1308 #[test]
1309 fn test_parse_inline_link_simple() {
1310 let input = "[text](url)";
1311 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1312 assert_eq!(result, Some((11, "text", "url", None)));
1313 }
1314
1315 #[test]
1316 fn test_parse_inline_link_with_title() {
1317 let input = r#"[text](url "title")"#;
1318 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1319 assert_eq!(result, Some((19, "text", r#"url "title""#, None)));
1320 }
1321
1322 #[test]
1323 fn test_parse_inline_link_with_nested_brackets() {
1324 let input = "[outer [inner] text](url)";
1325 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1326 assert_eq!(result, Some((25, "outer [inner] text", "url", None)));
1327 }
1328
1329 #[test]
1330 fn test_parse_inline_link_no_space_between_brackets_and_parens() {
1331 let input = "[text] (url)";
1332 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1333 assert_eq!(result, None);
1334 }
1335
1336 #[test]
1337 fn test_parse_inline_link_no_closing_bracket() {
1338 let input = "[text(url)";
1339 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1340 assert_eq!(result, None);
1341 }
1342
1343 #[test]
1344 fn test_parse_inline_link_no_closing_paren() {
1345 let input = "[text](url";
1346 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1347 assert_eq!(result, None);
1348 }
1349
1350 #[test]
1351 fn test_parse_inline_link_escaped_bracket() {
1352 let input = r"[text\]more](url)";
1353 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1354 assert_eq!(result, Some((17, r"text\]more", "url", None)));
1355 }
1356
1357 #[test]
1358 fn test_parse_inline_link_parens_in_url() {
1359 let input = "[text](url(with)parens)";
1360 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1361 assert_eq!(result, Some((23, "text", "url(with)parens", None)));
1362 }
1363
1364 #[test]
1365 fn test_parse_inline_image_simple() {
1366 let input = "";
1367 let result = try_parse_inline_image(input, LinkScanContext::default());
1368 assert_eq!(result, Some((17, "alt", "image.jpg", None)));
1369 }
1370
1371 #[test]
1372 fn test_parse_inline_image_with_title() {
1373 let input = r#""#;
1374 let result = try_parse_inline_image(input, LinkScanContext::default());
1375 assert_eq!(result, Some((27, "alt", r#"image.jpg "A title""#, None)));
1376 }
1377
1378 #[test]
1379 fn test_parse_inline_image_with_nested_brackets() {
1380 let input = "![outer [inner] alt](image.jpg)";
1381 let result = try_parse_inline_image(input, LinkScanContext::default());
1382 assert_eq!(result, Some((31, "outer [inner] alt", "image.jpg", None)));
1383 }
1384
1385 #[test]
1386 fn test_parse_bare_uri_rejects_dangling_backslash_after_trim() {
1387 let input = r"a:\]";
1388 let result = try_parse_bare_uri(input);
1389 assert_eq!(result, None);
1390 }
1391
1392 #[test]
1393 fn test_parse_bare_uri_rejects_unknown_scheme() {
1394 assert_eq!(try_parse_bare_uri("Note:**"), None);
1395 assert_eq!(try_parse_bare_uri("Note:foo"), None);
1396 assert_eq!(try_parse_bare_uri("foo:bar"), None);
1397 }
1398
1399 #[test]
1400 fn test_parse_bare_uri_accepts_known_schemes() {
1401 assert_eq!(
1402 try_parse_bare_uri("http://example.com"),
1403 Some((18, "http://example.com"))
1404 );
1405 assert_eq!(
1406 try_parse_bare_uri("HTTPS://EXAMPLE.COM"),
1407 Some((19, "HTTPS://EXAMPLE.COM"))
1408 );
1409 assert_eq!(
1410 try_parse_bare_uri("mailto:a@b.com"),
1411 Some((14, "mailto:a@b.com"))
1412 );
1413 assert_eq!(try_parse_bare_uri("doi:10.1/x"), Some((10, "doi:10.1/x")));
1414 }
1415
1416 #[test]
1417 fn bare_uri_scheme_table_is_well_formed() {
1418 assert!(
1419 BARE_URI_SCHEMES.len() > 300,
1420 "only {} schemes",
1421 BARE_URI_SCHEMES.len()
1422 );
1423 assert!(BARE_URI_SCHEMES.windows(2).all(|w| w[0] < w[1]));
1424 for known in ["http", "https", "mailto", "ftp", "mongodb", "shttp"] {
1425 assert!(is_known_bare_uri_scheme(known), "missing scheme {known}");
1426 }
1427 for extra in ["doi", "gemini", "isbn", "pmid"] {
1428 assert!(is_known_bare_uri_scheme(extra), "missing scheme {extra}");
1429 }
1430 assert!(!is_known_bare_uri_scheme("note"));
1431 }
1432
1433 #[test]
1434 fn test_parse_inline_image_no_space_between_brackets_and_parens() {
1435 let input = "![alt] (image.jpg)";
1436 let result = try_parse_inline_image(input, LinkScanContext::default());
1437 assert_eq!(result, None);
1438 }
1439
1440 #[test]
1441 fn test_parse_inline_image_no_closing_bracket() {
1442 let input = "![alt(image.jpg)";
1443 let result = try_parse_inline_image(input, LinkScanContext::default());
1444 assert_eq!(result, None);
1445 }
1446
1447 #[test]
1448 fn test_parse_inline_image_no_closing_paren() {
1449 let input = ");
1451 assert_eq!(result, None);
1452 }
1453
1454 #[test]
1455 fn test_parse_inline_image_with_simple_class() {
1456 let input = "{.large}";
1457 let result = try_parse_inline_image(input, LinkScanContext::default());
1458 let (len, alt, dest, attrs) = result.unwrap();
1459 assert_eq!(len, 23);
1460 assert_eq!(alt, "alt");
1461 assert_eq!(dest, "img.png");
1462 assert!(attrs.is_some());
1463 let attrs = attrs.unwrap();
1464 assert_eq!(attrs, "{.large}");
1465 }
1466
1467 #[test]
1468 fn test_parse_inline_image_with_id() {
1469 let input = "{#fig-1}";
1470 let result = try_parse_inline_image(input, LinkScanContext::default());
1471 let (len, alt, dest, attrs) = result.unwrap();
1472 assert_eq!(len, 29);
1473 assert_eq!(alt, "Figure 1");
1474 assert_eq!(dest, "fig1.png");
1475 assert!(attrs.is_some());
1476 let attrs = attrs.unwrap();
1477 assert_eq!(attrs, "{#fig-1}");
1478 }
1479
1480 #[test]
1481 fn test_parse_inline_image_with_full_attributes() {
1482 let input = "{#fig .large width=\"80%\"}";
1483 let result = try_parse_inline_image(input, LinkScanContext::default());
1484 let (len, alt, dest, attrs) = result.unwrap();
1485 assert_eq!(len, 40);
1486 assert_eq!(alt, "alt");
1487 assert_eq!(dest, "img.png");
1488 assert!(attrs.is_some());
1489 let attrs = attrs.unwrap();
1490 assert_eq!(attrs, "{#fig .large width=\"80%\"}");
1491 }
1492
1493 #[test]
1494 fn test_parse_inline_image_attributes_must_be_adjacent() {
1495 let input = " {.large}";
1497 let result = try_parse_inline_image(input, LinkScanContext::default());
1498 assert_eq!(result, Some((15, "alt", "img.png", None)));
1499 }
1500
1501 #[test]
1503 fn test_parse_inline_link_with_id() {
1504 let input = "[text](url){#link-1}";
1505 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1506 let (len, text, dest, attrs) = result.unwrap();
1507 assert_eq!(len, 20);
1508 assert_eq!(text, "text");
1509 assert_eq!(dest, "url");
1510 assert!(attrs.is_some());
1511 let attrs = attrs.unwrap();
1512 assert_eq!(attrs, "{#link-1}");
1513 }
1514
1515 #[test]
1516 fn test_parse_inline_link_with_full_attributes() {
1517 let input = "[text](url){#link .external target=\"_blank\"}";
1518 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1519 let (len, text, dest, attrs) = result.unwrap();
1520 assert_eq!(len, 44);
1521 assert_eq!(text, "text");
1522 assert_eq!(dest, "url");
1523 assert!(attrs.is_some());
1524 let attrs = attrs.unwrap();
1525 assert_eq!(attrs, "{#link .external target=\"_blank\"}");
1526 }
1527
1528 #[test]
1529 fn test_parse_inline_link_attributes_must_be_adjacent() {
1530 let input = "[text](url) {.class}";
1532 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1533 assert_eq!(result, Some((11, "text", "url", None)));
1534 }
1535
1536 #[test]
1537 fn test_parse_inline_link_with_title_and_attributes() {
1538 let input = r#"[text](url "title"){.external}"#;
1539 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1540 let (len, text, dest, attrs) = result.unwrap();
1541 assert_eq!(len, 30);
1542 assert_eq!(text, "text");
1543 assert_eq!(dest, r#"url "title""#);
1544 assert!(attrs.is_some());
1545 let attrs = attrs.unwrap();
1546 assert_eq!(attrs, "{.external}");
1547 }
1548
1549 #[test]
1551 fn test_parse_reference_link_explicit() {
1552 let input = "[link text][label]";
1553 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1554 assert_eq!(result, Some((18, "link text", "label".to_string(), false)));
1555 }
1556
1557 #[test]
1558 fn test_parse_reference_link_implicit() {
1559 let input = "[link text][]";
1560 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1561 assert_eq!(result, Some((13, "link text", String::new(), false)));
1562 }
1563
1564 #[test]
1565 fn test_parse_reference_link_explicit_same_label_as_text() {
1566 let input = "[stack][stack]";
1567 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1568 assert_eq!(result, Some((14, "stack", "stack".to_string(), false)));
1569 }
1570
1571 #[test]
1572 fn test_parse_reference_link_shortcut() {
1573 let input = "[link text] rest";
1574 let result = try_parse_reference_link(input, true, true, LinkScanContext::default());
1575 assert_eq!(
1576 result,
1577 Some((11, "link text", "link text".to_string(), true))
1578 );
1579 }
1580
1581 #[test]
1582 fn test_parse_reference_link_shortcut_rejects_empty_label() {
1583 let input = "[] rest";
1584 let result = try_parse_reference_link(input, true, true, LinkScanContext::default());
1585 assert_eq!(result, None);
1586 }
1587
1588 #[test]
1589 fn test_parse_reference_link_shortcut_disabled() {
1590 let input = "[link text] rest";
1591 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1592 assert_eq!(result, None);
1593 }
1594
1595 #[test]
1596 fn test_parse_reference_link_not_inline_link() {
1597 let input = "[text](url)";
1600 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1601 assert_eq!(result, None);
1602 }
1603
1604 #[test]
1605 fn test_parse_reference_link_shortcut_falls_through_inline_link() {
1606 let input = "[text](url)";
1611 let result = try_parse_reference_link(input, true, true, LinkScanContext::default());
1612 assert_eq!(result, Some((6, "text", "text".to_string(), true)));
1613 }
1614
1615 #[test]
1616 fn test_parse_reference_link_with_nested_brackets() {
1617 let input = "[outer [inner] text][ref]";
1618 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1619 assert_eq!(
1620 result,
1621 Some((25, "outer [inner] text", "ref".to_string(), false))
1622 );
1623 }
1624
1625 #[test]
1626 fn test_parse_reference_link_label_no_newline() {
1627 let input = "[text][label\nmore]";
1628 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1629 assert_eq!(result, None);
1630 }
1631
1632 #[test]
1634 fn test_parse_reference_image_explicit() {
1635 let input = "![alt text][label]";
1636 let result = try_parse_reference_image(input, false);
1637 assert_eq!(result, Some((18, "alt text", "label".to_string(), false)));
1638 }
1639
1640 #[test]
1641 fn test_parse_reference_image_implicit() {
1642 let input = "![alt text][]";
1643 let result = try_parse_reference_image(input, false);
1644 assert_eq!(
1645 result,
1646 Some((13, "alt text", "alt text".to_string(), false))
1647 );
1648 }
1649
1650 #[test]
1651 fn test_parse_reference_image_shortcut() {
1652 let input = "![alt text] rest";
1653 let result = try_parse_reference_image(input, true);
1654 assert_eq!(result, Some((11, "alt text", "alt text".to_string(), true)));
1655 }
1656
1657 #[test]
1658 fn test_parse_reference_image_shortcut_disabled() {
1659 let input = "![alt text] rest";
1660 let result = try_parse_reference_image(input, false);
1661 assert_eq!(result, None);
1662 }
1663
1664 #[test]
1665 fn test_parse_reference_image_not_inline() {
1666 let input = "";
1668 let result = try_parse_reference_image(input, true);
1669 assert_eq!(result, None);
1670 }
1671
1672 #[test]
1673 fn test_parse_reference_image_with_nested_brackets() {
1674 let input = "![alt [nested] text][ref]";
1675 let result = try_parse_reference_image(input, false);
1676 assert_eq!(
1677 result,
1678 Some((25, "alt [nested] text", "ref".to_string(), false))
1679 );
1680 }
1681
1682 #[test]
1683 fn test_reference_link_label_with_crlf() {
1684 let input = "[foo\r\nbar]";
1686 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1687
1688 assert_eq!(
1690 result, None,
1691 "Should not parse reference link with CRLF in label"
1692 );
1693 }
1694
1695 #[test]
1696 fn test_reference_link_label_with_lf() {
1697 let input = "[foo\nbar]";
1699 let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1700
1701 assert_eq!(
1703 result, None,
1704 "Should not parse reference link with LF in label"
1705 );
1706 }
1707
1708 #[test]
1710 fn test_parse_inline_link_multiline_text() {
1711 let input = "[text on\nline two](url)";
1713 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1714 assert_eq!(
1715 result,
1716 Some((23, "text on\nline two", "url", None)),
1717 "Link text should allow newlines"
1718 );
1719 }
1720
1721 #[test]
1722 fn test_parse_inline_link_multiline_with_formatting() {
1723 let input =
1725 "[A network graph. Different edges\nwith probability](../images/networkfig.png)";
1726 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1727 assert!(result.is_some(), "Link text with newlines should parse");
1728 let (len, text, _dest, _attrs) = result.unwrap();
1729 assert!(text.contains('\n'), "Link text should preserve newline");
1730 assert_eq!(len, input.len());
1731 }
1732
1733 #[test]
1734 fn test_parse_inline_image_multiline_alt() {
1735 let input = "";
1737 let result = try_parse_inline_image(input, LinkScanContext::default());
1738 assert_eq!(
1739 result,
1740 Some((27, "alt on\nline two", "img.png", None)),
1741 "Image alt text should allow newlines"
1742 );
1743 }
1744
1745 #[test]
1746 fn test_parse_inline_image_multiline_with_attributes() {
1747 let input = "{width=70%}";
1749 let result = try_parse_inline_image(input, LinkScanContext::default());
1750 assert!(
1751 result.is_some(),
1752 "Image alt with newlines and attributes should parse"
1753 );
1754 let (len, alt, dest, attrs) = result.unwrap();
1755 assert!(alt.contains('\n'), "Alt text should preserve newline");
1756 assert_eq!(dest, "../images/fig.png");
1757 assert_eq!(attrs, Some("{width=70%}"));
1758 assert_eq!(len, input.len());
1759 }
1760
1761 #[test]
1762 fn test_parse_inline_link_with_attributes_after_newline() {
1763 let input = "[A network graph.](../images/networkfig.png){width=70%}\nA word\n";
1766 let result = try_parse_inline_link(input, false, LinkScanContext::default());
1767 assert!(
1768 result.is_some(),
1769 "Link with attributes should parse even with following text"
1770 );
1771 let (len, text, dest, attrs) = result.unwrap();
1772 assert_eq!(text, "A network graph.");
1773 assert_eq!(dest, "../images/networkfig.png");
1774 assert_eq!(attrs, Some("{width=70%}"), "Attributes should be captured");
1775 assert_eq!(
1776 len, 55,
1777 "Length should include attributes (up to closing brace)"
1778 );
1779 }
1780}