Skip to main content

panache_parser/parser/inlines/
links.rs

1//! Parsing for links, images, and automatic links.
2//!
3//! Implements:
4//! - Automatic links: `<http://example.com>` and `<user@example.com>`
5//! - Inline links: `[text](url)` and `[text](url "title")`
6//! - Link attributes: `[text](url){#id .class key=value}`
7//! - Inline images: `![alt](url)` and `![alt](url "title")`
8//! - Image attributes: `![alt](url){#id .class key=value}`
9//! - Reference links: `[text][ref]`, `[text][]`, `[text]`
10//! - Reference images: `![alt][ref]`, `![alt][]`, `![alt]`
11
12use super::code_spans::try_parse_code_span;
13use super::core::parse_inline_text;
14use super::inline_html::try_parse_inline_html;
15use crate::options::ParserOptions;
16use crate::syntax::SyntaxKind;
17use rowan::GreenNodeBuilder;
18
19// Import attribute parsing
20use crate::parser::utils::attributes::try_parse_trailing_attributes;
21
22/// Flags that control which inline spans the link-bracket scanner treats as
23/// opaque (so a `]` inside them does not terminate the link/image text).
24///
25/// - `skip_raw_html` is universal across dialects: pandoc-markdown and
26///   CommonMark both refuse to close link text inside a raw HTML span (e.g.
27///   `[foo <bar attr="](baz)">`), per CommonMark spec example #524 / #536.
28/// - `skip_autolinks` is **CommonMark-only**. Pandoc-markdown does *not*
29///   treat `<scheme://...>` as opaque inside link text, so the same input
30///   produces a different parse under each dialect (CommonMark spec example
31///   #526 / #538). Always derive this from
32///   `extensions.autolinks && dialect == Dialect::CommonMark`.
33/// - `disallow_inner_links` is **CommonMark-only** structural rule (§6.4):
34///   "Links may not contain other links, at any level of nesting." When the
35///   candidate link/image text contains a valid inline link or image, the
36///   outer match is rejected so the inner-most definition is used instead
37///   (spec examples #518–#520, #532). Pandoc-markdown allows nested links,
38///   so the flag is `false` there.
39#[derive(Clone, Copy, Default)]
40pub struct LinkScanContext {
41    pub skip_raw_html: bool,
42    pub skip_autolinks: bool,
43    pub disallow_inner_links: bool,
44}
45
46impl LinkScanContext {
47    pub fn from_options(config: &ParserOptions) -> Self {
48        let is_commonmark = config.dialect == crate::options::Dialect::CommonMark;
49        Self {
50            skip_raw_html: config.extensions.raw_html,
51            skip_autolinks: config.extensions.autolinks && is_commonmark,
52            disallow_inner_links: is_commonmark,
53        }
54    }
55}
56
57/// Find the closing `]` of a link/image text span, starting from `start`.
58///
59/// Walks `text[start..]` tracking nested brackets and backslash escapes. When
60/// a backtick run starting a valid code span is encountered, the entire span
61/// (including any trailing attribute block) is skipped — per CommonMark §6
62/// precedence, code spans bind tighter than links/images, so a `]` *inside*
63/// a code span cannot terminate the link's text. The same opacity applies to
64/// raw HTML and (CommonMark-only) autolink spans gated through `ctx`.
65/// Returns the byte offset of the closing `]` within `text`, or `None` if no
66/// unmatched `]` is reached.
67fn find_link_close_bracket(text: &str, start: usize, ctx: LinkScanContext) -> Option<usize> {
68    let bytes = text.as_bytes();
69    let mut bracket_depth = 0;
70    let mut escape_next = false;
71    let mut i = start;
72
73    while i < bytes.len() {
74        let b = bytes[i];
75
76        if escape_next {
77            escape_next = false;
78            i += step(text, i);
79            continue;
80        }
81
82        match b {
83            b'\\' => {
84                escape_next = true;
85                i += 1;
86            }
87            b'`' => {
88                if let Some((len, _, _, _)) = try_parse_code_span(&text[i..]) {
89                    i += len;
90                } else {
91                    i += 1;
92                }
93            }
94            b'<' => {
95                // Order matters: autolinks are the more specific `<...>`
96                // shape (URI/email between angle brackets), so try that
97                // before falling through to general inline raw HTML which
98                // would also match `<bar attr="...">`-style tags.
99                if ctx.skip_autolinks
100                    && let Some((len, _)) = try_parse_autolink(&text[i..], true)
101                {
102                    i += len;
103                } else if ctx.skip_raw_html
104                    && let Some(len) = try_parse_inline_html(&text[i..])
105                {
106                    i += len;
107                } else {
108                    i += 1;
109                }
110            }
111            b'[' => {
112                bracket_depth += 1;
113                i += 1;
114            }
115            b']' => {
116                if bracket_depth == 0 {
117                    return Some(i);
118                }
119                bracket_depth -= 1;
120                i += 1;
121            }
122            _ => i += step(text, i),
123        }
124    }
125    None
126}
127
128/// Find the closing `)` of a link/image destination, given the text *after*
129/// the opening `(`. Tracks paren nesting, quoted titles, and angle-bracketed
130/// destinations (`<...>` may legitimately contain unbalanced parens — see
131/// spec example #499). Returns the byte offset of the closing `)` within the
132/// passed slice, or `None` if not found.
133fn find_dest_close_paren(remaining: &str) -> Option<usize> {
134    let bytes = remaining.as_bytes();
135    let mut paren_depth = 0;
136    let mut escape_next = false;
137    let mut in_quotes = false;
138    let mut in_angle = false;
139    let mut i = 0;
140
141    while i < bytes.len() {
142        let b = bytes[i];
143
144        if escape_next {
145            escape_next = false;
146            i += step(remaining, i);
147            continue;
148        }
149
150        match b {
151            b'\\' => {
152                escape_next = true;
153                i += 1;
154            }
155            b'<' if !in_quotes && !in_angle => {
156                in_angle = true;
157                i += 1;
158            }
159            b'>' if in_angle => {
160                in_angle = false;
161                i += 1;
162            }
163            b'"' if !in_angle => {
164                in_quotes = !in_quotes;
165                i += 1;
166            }
167            b'(' if !in_quotes && !in_angle => {
168                paren_depth += 1;
169                i += 1;
170            }
171            b')' if !in_quotes && !in_angle => {
172                if paren_depth == 0 {
173                    return Some(i);
174                }
175                paren_depth -= 1;
176                i += 1;
177            }
178            _ => i += step(remaining, i),
179        }
180    }
181    None
182}
183
184/// Byte length of the UTF-8 character starting at byte index `i` in `s`.
185/// Used to advance an index loop char-by-char without incurring `char_indices`
186/// overhead and without splitting on a UTF-8 boundary.
187fn step(s: &str, i: usize) -> usize {
188    s[i..].chars().next().map(|c| c.len_utf8()).unwrap_or(1)
189}
190
191/// CommonMark §6.4: "Links may not contain other links, at any level of
192/// nesting. If multiple otherwise valid link definitions appear nested inside
193/// each other, the inner-most definition is used." This helper scans a
194/// candidate link text for any `[` that starts a valid inline link; when
195/// found, the outer link must be rejected so the inner-most wins (spec
196/// examples #518–#519, #532).
197///
198/// Images themselves do not count as inner links — a link can contain an
199/// image (#517, #531). A link *inside* an image's alt text, however, still
200/// deactivates outer link openers per CommonMark's bracket-scanner rules, so
201/// the helper recurses into image alt text looking for inner links.
202///
203/// Reference-link nesting (#533, #569, #571) requires resolving labels
204/// against the document's reference-definition map, which the parser does
205/// not have at this point — those cases remain unhandled and need a later
206/// stack-based pass.
207fn link_text_contains_inner_link(text: &str, ctx: LinkScanContext, strict_dest: bool) -> bool {
208    let bytes = text.as_bytes();
209    let mut i = 0;
210    let mut escape_next = false;
211    while i < bytes.len() {
212        let b = bytes[i];
213        if escape_next {
214            escape_next = false;
215            i += step(text, i);
216            continue;
217        }
218        match b {
219            b'\\' => {
220                escape_next = true;
221                i += 1;
222            }
223            b'`' => {
224                if let Some((len, _, _, _)) = try_parse_code_span(&text[i..]) {
225                    i += len;
226                } else {
227                    i += 1;
228                }
229            }
230            b'<' => {
231                if ctx.skip_autolinks
232                    && let Some((len, _)) = try_parse_autolink(&text[i..], true)
233                {
234                    i += len;
235                } else if ctx.skip_raw_html
236                    && let Some(len) = try_parse_inline_html(&text[i..])
237                {
238                    i += len;
239                } else {
240                    i += 1;
241                }
242            }
243            b'!' if i + 1 < bytes.len() && bytes[i + 1] == b'[' => {
244                if let Some((len, alt, _, _)) = try_parse_inline_image(&text[i..], ctx) {
245                    if link_text_contains_inner_link(alt, ctx, strict_dest) {
246                        return true;
247                    }
248                    i += len;
249                } else {
250                    i += 2;
251                }
252            }
253            b'[' => {
254                if try_parse_inline_link(&text[i..], strict_dest, ctx).is_some() {
255                    return true;
256                }
257                i += 1;
258            }
259            _ => i += step(text, i),
260        }
261    }
262    false
263}
264
265/// Try to parse an inline image starting at the current position.
266///
267/// Inline images have the form `![alt](url)` or `![alt](url "title")`.
268/// Can also have trailing attributes: `![alt](url){#id .class}`.
269/// Returns Some((length, alt_text, dest_content, raw_attributes)) if a valid image is found.
270///
271/// `ctx` controls bracket-scanner opacity for raw HTML / autolink spans;
272/// see `LinkScanContext`.
273pub fn try_parse_inline_image(
274    text: &str,
275    ctx: LinkScanContext,
276) -> Option<(usize, &str, &str, Option<&str>)> {
277    if !text.starts_with("![") {
278        return None;
279    }
280
281    // Find the closing ]
282    let close_bracket = find_link_close_bracket(text, 2, ctx)?;
283    let alt_text = &text[2..close_bracket];
284
285    // Check for immediate ( after ]
286    let after_bracket = close_bracket + 1;
287    if text.len() <= after_bracket || !text[after_bracket..].starts_with('(') {
288        return None;
289    }
290
291    // Find closing ) for destination (reuse same logic as links)
292    let dest_start = after_bracket + 1;
293    let remaining = &text[dest_start..];
294
295    let close_paren = find_dest_close_paren(remaining)?;
296    let dest_content = &remaining[..close_paren];
297
298    // Check for trailing attributes {#id .class key=value}
299    let after_paren = dest_start + close_paren + 1;
300    let after_close = &text[after_paren..];
301
302    // Attributes must start immediately after closing paren (no whitespace/newlines)
303    if after_close.starts_with('{') {
304        // Find the closing brace
305        if let Some(close_brace_pos) = after_close.find('}') {
306            let attr_text = &after_close[..=close_brace_pos];
307            // Try to parse as attributes to validate
308            if let Some((_attrs, _)) = try_parse_trailing_attributes(attr_text) {
309                let total_len = after_paren + close_brace_pos + 1;
310                // Return raw attribute string for lossless parsing
311                let raw_attrs = attr_text;
312                return Some((total_len, alt_text, dest_content, Some(raw_attrs)));
313            }
314        }
315    }
316
317    // No attributes, just return the image
318    let total_len = after_paren;
319    Some((total_len, alt_text, dest_content, None))
320}
321
322/// Emit an inline image node to the builder.
323/// Note: alt_text may contain inline elements and should be parsed recursively.
324pub fn emit_inline_image(
325    builder: &mut GreenNodeBuilder,
326    _text: &str,
327    alt_text: &str,
328    dest: &str,
329    raw_attributes: Option<&str>,
330    config: &ParserOptions,
331) {
332    builder.start_node(SyntaxKind::IMAGE_LINK.into());
333
334    // Opening ![
335    builder.start_node(SyntaxKind::IMAGE_LINK_START.into());
336    builder.token(SyntaxKind::IMAGE_LINK_START.into(), "![");
337    builder.finish_node();
338
339    // Alt text (recursively parse inline elements)
340    builder.start_node(SyntaxKind::IMAGE_ALT.into());
341    // Use the standalone parse_inline_text function for recursive parsing
342    // Note: nested contexts don't resolve references
343    parse_inline_text(builder, alt_text, config, false);
344    builder.finish_node();
345
346    // Closing ]
347    builder.token(SyntaxKind::IMAGE_ALT_END.into(), "]");
348
349    // Opening (
350    builder.token(SyntaxKind::IMAGE_DEST_START.into(), "(");
351
352    // Destination
353    builder.start_node(SyntaxKind::LINK_DEST.into());
354    builder.token(SyntaxKind::TEXT.into(), dest);
355    builder.finish_node();
356
357    // Closing )
358    builder.token(SyntaxKind::IMAGE_DEST_END.into(), ")");
359
360    // Emit raw attributes if present (preserve original formatting)
361    if let Some(raw_attrs) = raw_attributes {
362        builder.start_node(SyntaxKind::ATTRIBUTE.into());
363        builder.token(SyntaxKind::ATTRIBUTE.into(), raw_attrs);
364        builder.finish_node();
365    }
366
367    builder.finish_node();
368}
369
370/// Try to parse an automatic link starting at the current position.
371///
372/// Automatic links have the form `<url>` (URI autolink) or `<email>`
373/// (email autolink) per CommonMark §6.4. Under `Dialect::CommonMark` the
374/// scheme/email grammar is enforced strictly (e.g. scheme must be 2-32
375/// ASCII chars; email local parts cannot contain backslashes). Pandoc
376/// markdown is laxer — it accepts Unicode in email addresses, for
377/// example — so non-CommonMark callers fall back to the heuristic
378/// "contains `:` or `@`" check that the parser used historically.
379pub fn try_parse_autolink(text: &str, is_commonmark: bool) -> Option<(usize, &str)> {
380    if !text.starts_with('<') {
381        return None;
382    }
383
384    let close_pos = text[1..].find('>')?;
385    let content = &text[1..1 + close_pos];
386
387    if content.is_empty() {
388        return None;
389    }
390    if content.contains(|c: char| c.is_whitespace()) {
391        return None;
392    }
393
394    if is_commonmark {
395        if !is_valid_uri_autolink(content) && !is_valid_email_autolink(content) {
396            return None;
397        }
398    } else if !content.contains(':') && !content.contains('@') {
399        return None;
400    }
401
402    Some((close_pos + 2, content))
403}
404
405/// CommonMark §6.4 URI autolink:
406/// scheme = 2-32 chars, ASCII letter then `[a-zA-Z0-9+.-]`, followed by `:`,
407/// followed by URI body (any char except control, space, `<`, `>`).
408fn is_valid_uri_autolink(s: &str) -> bool {
409    let bytes = s.as_bytes();
410    if bytes.is_empty() || !bytes[0].is_ascii_alphabetic() {
411        return false;
412    }
413    let mut i = 1;
414    while i < bytes.len() {
415        let b = bytes[i];
416        if b.is_ascii_alphanumeric() || b == b'+' || b == b'-' || b == b'.' {
417            i += 1;
418        } else {
419            break;
420        }
421    }
422    if !(2..=32).contains(&i) {
423        return false;
424    }
425    if i >= bytes.len() || bytes[i] != b':' {
426        return false;
427    }
428    for &b in &bytes[i + 1..] {
429        if b < 0x20 || b == 0x7f || b == b'<' || b == b'>' {
430            return false;
431        }
432    }
433    true
434}
435
436/// CommonMark §6.4 email autolink, matching the HTML5 non-normative regex:
437/// `^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
438///  (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$`.
439fn is_valid_email_autolink(s: &str) -> bool {
440    let Some(at) = s.find('@') else {
441        return false;
442    };
443    let local = &s[..at];
444    let domain = &s[at + 1..];
445    if local.is_empty() || !local.bytes().all(is_email_local_byte) {
446        return false;
447    }
448    if domain.is_empty() {
449        return false;
450    }
451    domain.split('.').all(is_valid_email_label)
452}
453
454fn is_email_local_byte(b: u8) -> bool {
455    matches!(
456        b,
457        b'a'..=b'z'
458            | b'A'..=b'Z'
459            | b'0'..=b'9'
460            | b'.'
461            | b'!'
462            | b'#'
463            | b'$'
464            | b'%'
465            | b'&'
466            | b'\''
467            | b'*'
468            | b'+'
469            | b'/'
470            | b'='
471            | b'?'
472            | b'^'
473            | b'_'
474            | b'`'
475            | b'{'
476            | b'|'
477            | b'}'
478            | b'~'
479            | b'-'
480    )
481}
482
483fn is_valid_email_label(label: &str) -> bool {
484    let bytes = label.as_bytes();
485    if bytes.is_empty() || bytes.len() > 63 {
486        return false;
487    }
488    if !bytes[0].is_ascii_alphanumeric() {
489        return false;
490    }
491    if !bytes[bytes.len() - 1].is_ascii_alphanumeric() {
492        return false;
493    }
494    bytes[1..bytes.len() - 1]
495        .iter()
496        .all(|b| b.is_ascii_alphanumeric() || *b == b'-')
497}
498
499/// Emit an automatic link node to the builder.
500pub fn emit_autolink(builder: &mut GreenNodeBuilder, _text: &str, url: &str) {
501    builder.start_node(SyntaxKind::AUTO_LINK.into());
502
503    // Opening <
504    builder.start_node(SyntaxKind::AUTO_LINK_MARKER.into());
505    builder.token(SyntaxKind::AUTO_LINK_MARKER.into(), "<");
506    builder.finish_node();
507
508    // URL content
509    builder.token(SyntaxKind::TEXT.into(), url);
510
511    // Closing >
512    builder.start_node(SyntaxKind::AUTO_LINK_MARKER.into());
513    builder.token(SyntaxKind::AUTO_LINK_MARKER.into(), ">");
514    builder.finish_node();
515
516    builder.finish_node();
517}
518
519pub fn try_parse_bare_uri(text: &str) -> Option<(usize, &str)> {
520    let mut chars = text.char_indices();
521    let (_, first) = chars.next()?;
522    if !first.is_ascii_alphabetic() {
523        return None;
524    }
525
526    let mut scheme_end = None;
527    for (idx, ch) in text.char_indices() {
528        if ch == ':' {
529            scheme_end = Some(idx);
530            break;
531        }
532        if !ch.is_ascii_alphanumeric() && ch != '+' && ch != '-' && ch != '.' {
533            return None;
534        }
535    }
536    let scheme_end = scheme_end?;
537    if scheme_end == 0 {
538        return None;
539    }
540
541    let mut end = scheme_end + 1;
542    let bytes = text.as_bytes();
543    while end < text.len() {
544        let b = bytes[end];
545        if b.is_ascii_whitespace() {
546            break;
547        }
548        if matches!(b, b'<' | b'>' | b'`' | b'"' | b'\'') {
549            break;
550        }
551        end += 1;
552    }
553
554    if end == scheme_end + 1 {
555        return None;
556    }
557
558    let mut trimmed = end;
559    while trimmed > scheme_end + 1 {
560        let ch = text[..trimmed].chars().last().unwrap();
561        if matches!(ch, '.' | ',' | ';' | ':' | ')' | ']' | '}') {
562            trimmed -= ch.len_utf8();
563        } else {
564            break;
565        }
566    }
567
568    if trimmed <= scheme_end + 1 {
569        return None;
570    }
571
572    // If trimming terminal punctuation leaves a dangling backslash, the match
573    // came from escaped punctuation (e.g., `a:\]`) and should stay literal.
574    if text[..trimmed].ends_with('\\') {
575        return None;
576    }
577
578    Some((trimmed, &text[..trimmed]))
579}
580
581/// Try to parse an inline link starting at the current position.
582///
583/// Inline links have the form `[text](url)` or `[text](url "title")`.
584/// Can also have trailing attributes: `[text](url){#id .class}`.
585/// Returns Some((length, text_content, dest_content, raw_attributes)) if a valid link is found.
586///
587/// `strict_dest` enables CommonMark §6.4 destination-and-title validation:
588/// the bare destination form may not contain spaces or ASCII control
589/// characters and must have balanced parentheses; if a title follows it
590/// must be properly delimited; only whitespace is allowed before/after.
591/// Pandoc-markdown is more permissive, so leave this off for that dialect.
592pub fn try_parse_inline_link(
593    text: &str,
594    strict_dest: bool,
595    ctx: LinkScanContext,
596) -> Option<(usize, &str, &str, Option<&str>)> {
597    if !text.starts_with('[') {
598        return None;
599    }
600
601    // Find the closing ]
602    let close_bracket = find_link_close_bracket(text, 1, ctx)?;
603    let link_text = &text[1..close_bracket];
604
605    // Check for immediate ( after ]
606    let after_bracket = close_bracket + 1;
607    if text.len() <= after_bracket || !text[after_bracket..].starts_with('(') {
608        return None;
609    }
610
611    // Find closing ) for destination
612    let dest_start = after_bracket + 1;
613    let remaining = &text[dest_start..];
614
615    let close_paren = find_dest_close_paren(remaining)?;
616    let dest_content = &remaining[..close_paren];
617
618    if strict_dest && !dest_and_title_ok_commonmark(dest_content) {
619        return None;
620    }
621
622    // CommonMark §6.4: outer link is rejected when its text contains a valid
623    // inner inline link or image, so the inner-most definition wins.
624    if ctx.disallow_inner_links && link_text_contains_inner_link(link_text, ctx, strict_dest) {
625        return None;
626    }
627
628    // Check for trailing attributes {#id .class key=value}
629    let after_paren = dest_start + close_paren + 1;
630    let after_close = &text[after_paren..];
631
632    // Attributes must start immediately after closing paren (no whitespace/newlines)
633    if after_close.starts_with('{') {
634        // Find the closing brace
635        if let Some(close_brace_pos) = after_close.find('}') {
636            let attr_text = &after_close[..=close_brace_pos];
637            // Try to parse as attributes to validate
638            if let Some((_attrs, _)) = try_parse_trailing_attributes(attr_text) {
639                let total_len = after_paren + close_brace_pos + 1;
640                // Return raw attribute string for lossless parsing
641                let raw_attrs = attr_text;
642                return Some((total_len, link_text, dest_content, Some(raw_attrs)));
643            }
644        }
645    }
646
647    // No attributes, just return the link
648    let total_len = after_paren;
649    Some((total_len, link_text, dest_content, None))
650}
651
652/// CommonMark §6.4 destination + optional title validation. The text passed
653/// in is whatever the parser captured between `(` and `)`. A valid form is:
654/// `[ws] destination [ws title [ws]]` where:
655/// - bare destination has no spaces, tabs, ASCII control chars, and balanced
656///   parentheses (escaped parens permitted);
657/// - bracketed destination is `<...>` with no newlines and no unescaped `<>`;
658/// - the optional title is delimited by `"..."`, `'...'`, or `(...)`;
659/// - any text outside that structure invalidates the link.
660fn dest_and_title_ok_commonmark(content: &str) -> bool {
661    let trimmed = trim_start_link_ws(content);
662    if trimmed.is_empty() {
663        return true;
664    }
665
666    let after_dest = if let Some(rest) = trimmed.strip_prefix('<') {
667        let mut escape = false;
668        let mut end_byte = None;
669        for (i, c) in rest.char_indices() {
670            if escape {
671                escape = false;
672                continue;
673            }
674            match c {
675                '\\' => escape = true,
676                '\n' | '<' => return false,
677                '>' => {
678                    end_byte = Some(i);
679                    break;
680                }
681                _ => {}
682            }
683        }
684        match end_byte {
685            Some(e) => &rest[e + 1..],
686            None => return false,
687        }
688    } else {
689        let mut escape = false;
690        let mut depth: i32 = 0;
691        let mut end = trimmed.len();
692        for (i, c) in trimmed.char_indices() {
693            if escape {
694                escape = false;
695                continue;
696            }
697            match c {
698                '\\' => escape = true,
699                ' ' | '\t' | '\n' => {
700                    end = i;
701                    break;
702                }
703                _ if c.is_ascii_control() => return false,
704                '(' => depth += 1,
705                ')' => {
706                    if depth == 0 {
707                        end = i;
708                        break;
709                    }
710                    depth -= 1;
711                }
712                _ => {}
713            }
714        }
715        if depth != 0 {
716            return false;
717        }
718        if end == 0 {
719            // bare destination must be nonempty if the field is non-blank
720            return false;
721        }
722        &trimmed[end..]
723    };
724
725    let after_dest = trim_start_link_ws(after_dest);
726    if after_dest.is_empty() {
727        return true;
728    }
729
730    let bytes = after_dest.as_bytes();
731    let close = match bytes[0] {
732        b'"' => b'"',
733        b'\'' => b'\'',
734        b'(' => b')',
735        _ => return false,
736    };
737    let opens_paren = bytes[0] == b'(';
738    let mut escape = false;
739    let mut title_close_pos = None;
740    for (i, &b) in after_dest.as_bytes().iter().enumerate().skip(1) {
741        if escape {
742            escape = false;
743            continue;
744        }
745        if b == b'\\' {
746            escape = true;
747            continue;
748        }
749        if opens_paren && b == b'(' {
750            return false;
751        }
752        if b == close {
753            title_close_pos = Some(i);
754            break;
755        }
756    }
757    let close_idx = match title_close_pos {
758        Some(p) => p,
759        None => return false,
760    };
761
762    let after_title = &after_dest[close_idx + 1..];
763    is_link_ws_only(after_title)
764}
765
766/// Strip leading ASCII space/tab/newline bytes. Byte-level equivalent of
767/// `s.trim_start_matches([' ', '\t', '\n'])`; called for every
768/// CommonMark inline-link destination/title scan, so the slice-pattern
769/// MultiCharEqSearcher overhead matters.
770#[inline]
771fn trim_start_link_ws(s: &str) -> &str {
772    let bytes = s.as_bytes();
773    let mut i = 0;
774    while i < bytes.len() {
775        let b = bytes[i];
776        if b == b' ' || b == b'\t' || b == b'\n' {
777            i += 1;
778        } else {
779            break;
780        }
781    }
782    // SAFETY: stripped only ASCII whitespace bytes.
783    unsafe { std::str::from_utf8_unchecked(&bytes[i..]) }
784}
785
786#[inline]
787fn is_link_ws_only(s: &str) -> bool {
788    s.as_bytes()
789        .iter()
790        .all(|&b| b == b' ' || b == b'\t' || b == b'\n')
791}
792
793/// Emit an inline link node to the builder.
794/// Note: link_text may contain inline elements and should be parsed recursively.
795pub fn emit_inline_link(
796    builder: &mut GreenNodeBuilder,
797    _text: &str,
798    link_text: &str,
799    dest: &str,
800    raw_attributes: Option<&str>,
801    config: &ParserOptions,
802) {
803    builder.start_node(SyntaxKind::LINK.into());
804
805    // Opening [
806    builder.start_node(SyntaxKind::LINK_START.into());
807    builder.token(SyntaxKind::LINK_START.into(), "[");
808    builder.finish_node();
809
810    // Link text (recursively parse inline elements). Pandoc-native:
811    // links cannot contain other links, so suppress inner LINK / ref-link
812    // recognition during the recursion. Images, emphasis, code, etc. are
813    // still recognised. CommonMark relies on outer-level process_brackets
814    // to prevent nested links, but the flag is harmless under CM.
815    builder.start_node(SyntaxKind::LINK_TEXT.into());
816    parse_inline_text(builder, link_text, config, true);
817    builder.finish_node();
818
819    // Closing ]
820    builder.token(SyntaxKind::LINK_TEXT_END.into(), "]");
821
822    // Opening (
823    builder.token(SyntaxKind::LINK_DEST_START.into(), "(");
824
825    // Destination
826    builder.start_node(SyntaxKind::LINK_DEST.into());
827    builder.token(SyntaxKind::TEXT.into(), dest);
828    builder.finish_node();
829
830    // Closing )
831    builder.token(SyntaxKind::LINK_DEST_END.into(), ")");
832
833    // Emit raw attributes if present (preserve original formatting)
834    if let Some(raw_attrs) = raw_attributes {
835        builder.start_node(SyntaxKind::ATTRIBUTE.into());
836        builder.token(SyntaxKind::ATTRIBUTE.into(), raw_attrs);
837        builder.finish_node();
838    }
839
840    builder.finish_node();
841}
842
843pub fn emit_bare_uri_link(builder: &mut GreenNodeBuilder, uri: &str, _config: &ParserOptions) {
844    builder.start_node(SyntaxKind::LINK.into());
845
846    builder.start_node(SyntaxKind::LINK_START.into());
847    builder.token(SyntaxKind::LINK_START.into(), "[");
848    builder.finish_node();
849
850    builder.start_node(SyntaxKind::LINK_TEXT.into());
851    builder.token(SyntaxKind::TEXT.into(), uri);
852    builder.finish_node();
853
854    builder.token(SyntaxKind::LINK_TEXT_END.into(), "]");
855    builder.token(SyntaxKind::LINK_DEST_START.into(), "(");
856
857    builder.start_node(SyntaxKind::LINK_DEST.into());
858    builder.token(SyntaxKind::TEXT.into(), uri);
859    builder.finish_node();
860
861    builder.token(SyntaxKind::LINK_DEST_END.into(), ")");
862
863    builder.finish_node();
864}
865
866/// Try to parse a reference link starting at the current position.
867///
868/// Reference links have three forms:
869/// - Explicit: `[text][label]`
870/// - Implicit: `[text][]` (label = text)
871/// - Shortcut: `[text]` (if shortcut_reference_links enabled)
872///
873/// Returns Some((length, text_content, label, is_shortcut)) if a valid reference link is found.
874/// The label is what should be looked up in the registry.
875pub fn try_parse_reference_link(
876    text: &str,
877    allow_shortcut: bool,
878    inline_link_attempted: bool,
879    ctx: LinkScanContext,
880) -> Option<(usize, &str, String, bool)> {
881    if !text.starts_with('[') {
882        return None;
883    }
884
885    // Don't match citations (which start with [@) or suppress-author citations (which start with [-@)
886    if text.len() > 1 {
887        let bytes = text.as_bytes();
888        if bytes[1] == b'@' {
889            return None;
890        }
891        if bytes[1] == b'-' && text.len() > 2 && bytes[2] == b'@' {
892            return None;
893        }
894    }
895
896    // Find the closing ] for the text. Uses the shared helper so that a
897    // `]` inside a code span doesn't terminate the link text (CommonMark
898    // §6 — code spans bind tighter than links). See spec examples #342
899    // and #525. Raw HTML and (CommonMark-only) autolink spans are also
900    // opaque per `ctx`.
901    let close_bracket = find_link_close_bracket(text, 1, ctx)?;
902    let link_text = &text[1..close_bracket];
903
904    // CommonMark §6.4: outer reference link is rejected when its text contains
905    // a valid inner inline link/image (spec example #532). Reference-link
906    // nesting (#533/#569/#571) is not handled here; it requires resolving
907    // labels against the document refdef map.
908    if ctx.disallow_inner_links
909        && link_text_contains_inner_link(link_text, ctx, ctx.disallow_inner_links)
910    {
911        return None;
912    }
913
914    // Check what follows the ]
915    let after_bracket = close_bracket + 1;
916
917    // `[content]{...}` is reserved for bracketed spans / attribute
918    // trailers, never a shortcut.
919    if after_bracket < text.len() && text[after_bracket..].starts_with('{') {
920        return None;
921    }
922
923    // `[text](...)` is the inline-link shape. CommonMark spec example
924    // #568 (`[foo](not a link)` with `[foo]: /url`) requires the shortcut
925    // to succeed for `[foo]`, leaving `(not a link)` as literal text when
926    // the upstream inline-link parse was rejected by `strict_dest`. We
927    // only fall through to shortcut here when the caller has already
928    // tried the inline-link form (`inline_link_attempted`) — otherwise
929    // disabling the `inline_links` extension would silently let
930    // `[text](url)` become a shortcut + literal text, which the
931    // `inline_links_disabled_keeps_inline_link_literal` test guards
932    // against.
933    if after_bracket < text.len()
934        && text[after_bracket..].starts_with('(')
935        && (!allow_shortcut || !inline_link_attempted)
936    {
937        return None;
938    }
939
940    // Check for explicit reference [text][label] or implicit [text][]
941    if after_bracket < text.len() && text[after_bracket..].starts_with('[') {
942        // Find the closing ] for the label
943        let label_start = after_bracket + 1;
944        let mut label_end = None;
945
946        for (i, ch) in text[label_start..].char_indices() {
947            if ch == ']' {
948                label_end = Some(i + label_start);
949                break;
950            }
951            // Labels can't contain newlines
952            if ch == '\n' {
953                return None;
954            }
955        }
956
957        let label_end = label_end?;
958        let label = &text[label_start..label_end];
959
960        // Total length includes both bracket pairs
961        let total_len = label_end + 1;
962
963        // Implicit reference: empty label means emit [text][]
964        if label.is_empty() {
965            return Some((total_len, link_text, String::new(), false));
966        }
967
968        // Explicit reference: use the provided label
969        Some((total_len, link_text, label.to_string(), false))
970    } else if allow_shortcut {
971        // Shortcut reference: [text] with no second bracket pair
972        // The text is both the display text and the label
973        if link_text.is_empty() {
974            return None;
975        }
976        Some((after_bracket, link_text, link_text.to_string(), true))
977    } else {
978        // No second bracket pair and shortcut not allowed - not a reference link
979        None
980    }
981}
982
983/// Emit a reference link node to the builder.
984/// Preserves the original reference syntax (explicit [text][ref], implicit [text][], or shortcut [text]).
985pub fn emit_reference_link(
986    builder: &mut GreenNodeBuilder,
987    link_text: &str,
988    label: &str,
989    is_shortcut: bool,
990    config: &ParserOptions,
991) {
992    builder.start_node(SyntaxKind::LINK.into());
993
994    // Opening [
995    builder.start_node(SyntaxKind::LINK_START.into());
996    builder.token(SyntaxKind::LINK_START.into(), "[");
997    builder.finish_node();
998
999    // Link text (recursively parse inline elements). Pandoc-native:
1000    // links cannot contain other links, so suppress inner LINK / ref-link
1001    // recognition during the recursion. Images, emphasis, code, etc. are
1002    // still recognised.
1003    builder.start_node(SyntaxKind::LINK_TEXT.into());
1004    parse_inline_text(builder, link_text, config, true);
1005    builder.finish_node();
1006
1007    // Closing ] and reference label
1008    builder.token(SyntaxKind::TEXT.into(), "]");
1009
1010    if !is_shortcut {
1011        // Explicit or implicit reference: [text][label] or [text][]
1012        builder.token(SyntaxKind::TEXT.into(), "[");
1013        builder.start_node(SyntaxKind::LINK_REF.into());
1014        // For implicit references, label is empty and we emit [text][]
1015        // For explicit references, emit the label to get [text][label]
1016        if !label.is_empty() {
1017            builder.token(SyntaxKind::TEXT.into(), label);
1018        }
1019        builder.finish_node();
1020        builder.token(SyntaxKind::TEXT.into(), "]");
1021    }
1022    // For shortcut references, just [text] - no second bracket pair
1023
1024    builder.finish_node();
1025}
1026
1027/// Try to parse a reference-style image: `![alt][ref]`, `![alt][]`, or `![alt]`
1028/// Returns (total_len, alt_text, label, is_shortcut) if successful.
1029pub fn try_parse_reference_image(
1030    text: &str,
1031    allow_shortcut: bool,
1032) -> Option<(usize, &str, String, bool)> {
1033    let bytes = text.as_bytes();
1034    if bytes.len() < 4 || bytes[0] != b'!' || bytes[1] != b'[' {
1035        return None;
1036    }
1037
1038    let mut pos = 2;
1039    let mut bracket_depth = 1;
1040    let alt_start = pos;
1041
1042    // Find the end of the alt text (allowing nested brackets)
1043    while pos < bytes.len() && bracket_depth > 0 {
1044        match bytes[pos] {
1045            b'[' => bracket_depth += 1,
1046            b']' => bracket_depth -= 1,
1047            b'\\' if pos + 1 < bytes.len() => pos += 1, // skip escaped char
1048            _ => {}
1049        }
1050        pos += 1;
1051    }
1052
1053    if bracket_depth > 0 {
1054        return None; // Unclosed brackets
1055    }
1056
1057    let alt_text = &text[alt_start..pos - 1];
1058
1059    // Now check for the label part
1060    if pos >= bytes.len() {
1061        return None;
1062    }
1063
1064    // Explicit reference: `![alt][label]`
1065    if bytes[pos] == b'[' {
1066        pos += 1;
1067        let label_start = pos;
1068
1069        // Find the end of the label (no nested brackets, no newlines)
1070        while pos < bytes.len() && bytes[pos] != b']' && bytes[pos] != b'\n' && bytes[pos] != b'\r'
1071        {
1072            pos += 1;
1073        }
1074
1075        if pos >= bytes.len() || bytes[pos] != b']' {
1076            return None;
1077        }
1078
1079        let label_text = &text[label_start..pos];
1080        pos += 1;
1081
1082        // Return the original label text for formatting preservation
1083        // Empty label means implicit reference
1084        let label = if label_text.is_empty() {
1085            alt_text.to_string() // For implicit references, use alt text as label for equality check
1086        } else {
1087            label_text.to_string() // Preserve original case
1088        };
1089
1090        return Some((pos, alt_text, label, false));
1091    }
1092
1093    // Shortcut reference: `![alt]` (only if enabled)
1094    // BUT not if followed by (url) - that's an inline image
1095    if allow_shortcut {
1096        // Check if next char is ( - if so, not a reference
1097        if pos < bytes.len() && bytes[pos] == b'(' {
1098            return None;
1099        }
1100
1101        // For shortcut references, use alt text as label for equality check
1102        let label = alt_text.to_string();
1103        return Some((pos, alt_text, label, true));
1104    }
1105
1106    None
1107}
1108
1109/// Emit a reference image node with registry lookup.
1110pub fn emit_reference_image(
1111    builder: &mut GreenNodeBuilder,
1112    alt_text: &str,
1113    label: &str,
1114    is_shortcut: bool,
1115    config: &ParserOptions,
1116) {
1117    builder.start_node(SyntaxKind::IMAGE_LINK.into());
1118
1119    // Emit as reference image (preserve original syntax)
1120    builder.start_node(SyntaxKind::IMAGE_LINK_START.into());
1121    builder.token(SyntaxKind::IMAGE_LINK_START.into(), "![");
1122    builder.finish_node();
1123
1124    // Alt text (recursively parse inline elements)
1125    builder.start_node(SyntaxKind::IMAGE_ALT.into());
1126    parse_inline_text(builder, alt_text, config, false);
1127    builder.finish_node();
1128
1129    // Closing ] and reference label
1130    builder.token(SyntaxKind::TEXT.into(), "]");
1131
1132    if !is_shortcut {
1133        // Explicit or implicit reference: ![alt][label] or ![alt][]
1134        builder.token(SyntaxKind::TEXT.into(), "[");
1135        builder.start_node(SyntaxKind::LINK_REF.into());
1136        // For implicit references, emit empty label (label == alt means implicit from parser)
1137        if label != alt_text {
1138            builder.token(SyntaxKind::TEXT.into(), label);
1139        }
1140        builder.finish_node();
1141        builder.token(SyntaxKind::TEXT.into(), "]");
1142    }
1143    // For shortcut references, just ![alt] - no second bracket pair
1144
1145    builder.finish_node();
1146}
1147
1148/// Emit an `UNRESOLVED_REFERENCE` node for a Pandoc bracket-shape
1149/// pattern whose label didn't resolve. The wrapper covers the original
1150/// bracket bytes; the inner text recurses through normal inline
1151/// parsing (with inner-link suppression so a stray inner inline link
1152/// doesn't reorder semantics relative to pandoc-native).
1153///
1154/// `source` is `text[start..end]` — the full bracket-shape pattern.
1155/// `text_content` is the inner text between the outer `[` and `]`
1156/// (the bytes used for inline recursion). `label_suffix` carries the
1157/// `[label]` / `[]` suffix bytes verbatim, or `None` for shortcut form.
1158pub fn emit_unresolved_reference(
1159    builder: &mut GreenNodeBuilder,
1160    is_image: bool,
1161    text_content: &str,
1162    label_suffix: Option<&str>,
1163    config: &ParserOptions,
1164) {
1165    builder.start_node(SyntaxKind::UNRESOLVED_REFERENCE.into());
1166
1167    if is_image {
1168        builder.start_node(SyntaxKind::IMAGE_LINK_START.into());
1169        builder.token(SyntaxKind::IMAGE_LINK_START.into(), "![");
1170        builder.finish_node();
1171        builder.start_node(SyntaxKind::IMAGE_ALT.into());
1172        parse_inline_text(builder, text_content, config, false);
1173        builder.finish_node();
1174    } else {
1175        builder.start_node(SyntaxKind::LINK_START.into());
1176        builder.token(SyntaxKind::LINK_START.into(), "[");
1177        builder.finish_node();
1178        builder.start_node(SyntaxKind::LINK_TEXT.into());
1179        parse_inline_text(builder, text_content, config, true);
1180        builder.finish_node();
1181    }
1182
1183    builder.token(SyntaxKind::TEXT.into(), "]");
1184
1185    if let Some(suffix) = label_suffix {
1186        // suffix is either "[label]" or "[]"; preserve original bytes.
1187        // Split as `[` + LINK_REF(label) + `]` so wrapper accessors find
1188        // the label via `support::child::<LinkRef>()`.
1189        debug_assert!(suffix.starts_with('[') && suffix.ends_with(']'));
1190        builder.token(SyntaxKind::TEXT.into(), "[");
1191        let label = &suffix[1..suffix.len() - 1];
1192        builder.start_node(SyntaxKind::LINK_REF.into());
1193        if !label.is_empty() {
1194            builder.token(SyntaxKind::TEXT.into(), label);
1195        }
1196        builder.finish_node();
1197        builder.token(SyntaxKind::TEXT.into(), "]");
1198    }
1199
1200    builder.finish_node();
1201}
1202
1203#[cfg(test)]
1204mod tests {
1205    use super::*;
1206
1207    #[test]
1208    fn test_parse_autolink_url() {
1209        let input = "<https://example.com>";
1210        assert_eq!(
1211            try_parse_autolink(input, false),
1212            Some((21, "https://example.com"))
1213        );
1214        assert_eq!(
1215            try_parse_autolink(input, true),
1216            Some((21, "https://example.com"))
1217        );
1218    }
1219
1220    #[test]
1221    fn test_parse_autolink_email() {
1222        let input = "<user@example.com>";
1223        assert_eq!(
1224            try_parse_autolink(input, false),
1225            Some((18, "user@example.com"))
1226        );
1227        assert_eq!(
1228            try_parse_autolink(input, true),
1229            Some((18, "user@example.com"))
1230        );
1231    }
1232
1233    #[test]
1234    fn test_parse_autolink_no_close() {
1235        let input = "<https://example.com";
1236        assert_eq!(try_parse_autolink(input, false), None);
1237        assert_eq!(try_parse_autolink(input, true), None);
1238    }
1239
1240    #[test]
1241    fn test_parse_autolink_with_space() {
1242        let input = "<https://example.com >";
1243        assert_eq!(try_parse_autolink(input, false), None);
1244        assert_eq!(try_parse_autolink(input, true), None);
1245    }
1246
1247    #[test]
1248    fn test_parse_autolink_not_url_or_email() {
1249        let input = "<notaurl>";
1250        assert_eq!(try_parse_autolink(input, false), None);
1251        assert_eq!(try_parse_autolink(input, true), None);
1252    }
1253
1254    #[test]
1255    fn test_parse_autolink_commonmark_strict_scheme() {
1256        // Scheme too short (1 char) — invalid under CommonMark, lax-accepted
1257        // under Pandoc dialect (matches historical behavior).
1258        let input = "<m:abc>";
1259        assert_eq!(try_parse_autolink(input, true), None);
1260        assert_eq!(try_parse_autolink(input, false), Some((7, "m:abc")));
1261    }
1262
1263    #[test]
1264    fn test_parse_autolink_commonmark_email_disallows_backslash() {
1265        let input = "<foo\\+@bar.example.com>";
1266        assert_eq!(try_parse_autolink(input, true), None);
1267        assert_eq!(
1268            try_parse_autolink(input, false),
1269            Some((23, "foo\\+@bar.example.com"))
1270        );
1271    }
1272
1273    #[test]
1274    fn test_parse_inline_link_simple() {
1275        let input = "[text](url)";
1276        let result = try_parse_inline_link(input, false, LinkScanContext::default());
1277        assert_eq!(result, Some((11, "text", "url", None)));
1278    }
1279
1280    #[test]
1281    fn test_parse_inline_link_with_title() {
1282        let input = r#"[text](url "title")"#;
1283        let result = try_parse_inline_link(input, false, LinkScanContext::default());
1284        assert_eq!(result, Some((19, "text", r#"url "title""#, None)));
1285    }
1286
1287    #[test]
1288    fn test_parse_inline_link_with_nested_brackets() {
1289        let input = "[outer [inner] text](url)";
1290        let result = try_parse_inline_link(input, false, LinkScanContext::default());
1291        assert_eq!(result, Some((25, "outer [inner] text", "url", None)));
1292    }
1293
1294    #[test]
1295    fn test_parse_inline_link_no_space_between_brackets_and_parens() {
1296        let input = "[text] (url)";
1297        let result = try_parse_inline_link(input, false, LinkScanContext::default());
1298        assert_eq!(result, None);
1299    }
1300
1301    #[test]
1302    fn test_parse_inline_link_no_closing_bracket() {
1303        let input = "[text(url)";
1304        let result = try_parse_inline_link(input, false, LinkScanContext::default());
1305        assert_eq!(result, None);
1306    }
1307
1308    #[test]
1309    fn test_parse_inline_link_no_closing_paren() {
1310        let input = "[text](url";
1311        let result = try_parse_inline_link(input, false, LinkScanContext::default());
1312        assert_eq!(result, None);
1313    }
1314
1315    #[test]
1316    fn test_parse_inline_link_escaped_bracket() {
1317        let input = r"[text\]more](url)";
1318        let result = try_parse_inline_link(input, false, LinkScanContext::default());
1319        assert_eq!(result, Some((17, r"text\]more", "url", None)));
1320    }
1321
1322    #[test]
1323    fn test_parse_inline_link_parens_in_url() {
1324        let input = "[text](url(with)parens)";
1325        let result = try_parse_inline_link(input, false, LinkScanContext::default());
1326        assert_eq!(result, Some((23, "text", "url(with)parens", None)));
1327    }
1328
1329    #[test]
1330    fn test_parse_inline_image_simple() {
1331        let input = "![alt](image.jpg)";
1332        let result = try_parse_inline_image(input, LinkScanContext::default());
1333        assert_eq!(result, Some((17, "alt", "image.jpg", None)));
1334    }
1335
1336    #[test]
1337    fn test_parse_inline_image_with_title() {
1338        let input = r#"![alt](image.jpg "A title")"#;
1339        let result = try_parse_inline_image(input, LinkScanContext::default());
1340        assert_eq!(result, Some((27, "alt", r#"image.jpg "A title""#, None)));
1341    }
1342
1343    #[test]
1344    fn test_parse_inline_image_with_nested_brackets() {
1345        let input = "![outer [inner] alt](image.jpg)";
1346        let result = try_parse_inline_image(input, LinkScanContext::default());
1347        assert_eq!(result, Some((31, "outer [inner] alt", "image.jpg", None)));
1348    }
1349
1350    #[test]
1351    fn test_parse_bare_uri_rejects_dangling_backslash_after_trim() {
1352        let input = r"a:\]";
1353        let result = try_parse_bare_uri(input);
1354        assert_eq!(result, None);
1355    }
1356
1357    #[test]
1358    fn test_parse_inline_image_no_space_between_brackets_and_parens() {
1359        let input = "![alt] (image.jpg)";
1360        let result = try_parse_inline_image(input, LinkScanContext::default());
1361        assert_eq!(result, None);
1362    }
1363
1364    #[test]
1365    fn test_parse_inline_image_no_closing_bracket() {
1366        let input = "![alt(image.jpg)";
1367        let result = try_parse_inline_image(input, LinkScanContext::default());
1368        assert_eq!(result, None);
1369    }
1370
1371    #[test]
1372    fn test_parse_inline_image_no_closing_paren() {
1373        let input = "![alt](image.jpg";
1374        let result = try_parse_inline_image(input, LinkScanContext::default());
1375        assert_eq!(result, None);
1376    }
1377
1378    #[test]
1379    fn test_parse_inline_image_with_simple_class() {
1380        let input = "![alt](img.png){.large}";
1381        let result = try_parse_inline_image(input, LinkScanContext::default());
1382        let (len, alt, dest, attrs) = result.unwrap();
1383        assert_eq!(len, 23);
1384        assert_eq!(alt, "alt");
1385        assert_eq!(dest, "img.png");
1386        assert!(attrs.is_some());
1387        let attrs = attrs.unwrap();
1388        assert_eq!(attrs, "{.large}");
1389    }
1390
1391    #[test]
1392    fn test_parse_inline_image_with_id() {
1393        let input = "![Figure 1](fig1.png){#fig-1}";
1394        let result = try_parse_inline_image(input, LinkScanContext::default());
1395        let (len, alt, dest, attrs) = result.unwrap();
1396        assert_eq!(len, 29);
1397        assert_eq!(alt, "Figure 1");
1398        assert_eq!(dest, "fig1.png");
1399        assert!(attrs.is_some());
1400        let attrs = attrs.unwrap();
1401        assert_eq!(attrs, "{#fig-1}");
1402    }
1403
1404    #[test]
1405    fn test_parse_inline_image_with_full_attributes() {
1406        let input = "![alt](img.png){#fig .large width=\"80%\"}";
1407        let result = try_parse_inline_image(input, LinkScanContext::default());
1408        let (len, alt, dest, attrs) = result.unwrap();
1409        assert_eq!(len, 40);
1410        assert_eq!(alt, "alt");
1411        assert_eq!(dest, "img.png");
1412        assert!(attrs.is_some());
1413        let attrs = attrs.unwrap();
1414        assert_eq!(attrs, "{#fig .large width=\"80%\"}");
1415    }
1416
1417    #[test]
1418    fn test_parse_inline_image_attributes_must_be_adjacent() {
1419        // Space between ) and { should not parse as attributes
1420        let input = "![alt](img.png) {.large}";
1421        let result = try_parse_inline_image(input, LinkScanContext::default());
1422        assert_eq!(result, Some((15, "alt", "img.png", None)));
1423    }
1424
1425    // Link attribute tests
1426    #[test]
1427    fn test_parse_inline_link_with_id() {
1428        let input = "[text](url){#link-1}";
1429        let result = try_parse_inline_link(input, false, LinkScanContext::default());
1430        let (len, text, dest, attrs) = result.unwrap();
1431        assert_eq!(len, 20);
1432        assert_eq!(text, "text");
1433        assert_eq!(dest, "url");
1434        assert!(attrs.is_some());
1435        let attrs = attrs.unwrap();
1436        assert_eq!(attrs, "{#link-1}");
1437    }
1438
1439    #[test]
1440    fn test_parse_inline_link_with_full_attributes() {
1441        let input = "[text](url){#link .external target=\"_blank\"}";
1442        let result = try_parse_inline_link(input, false, LinkScanContext::default());
1443        let (len, text, dest, attrs) = result.unwrap();
1444        assert_eq!(len, 44);
1445        assert_eq!(text, "text");
1446        assert_eq!(dest, "url");
1447        assert!(attrs.is_some());
1448        let attrs = attrs.unwrap();
1449        assert_eq!(attrs, "{#link .external target=\"_blank\"}");
1450    }
1451
1452    #[test]
1453    fn test_parse_inline_link_attributes_must_be_adjacent() {
1454        // Space between ) and { should not parse as attributes
1455        let input = "[text](url) {.class}";
1456        let result = try_parse_inline_link(input, false, LinkScanContext::default());
1457        assert_eq!(result, Some((11, "text", "url", None)));
1458    }
1459
1460    #[test]
1461    fn test_parse_inline_link_with_title_and_attributes() {
1462        let input = r#"[text](url "title"){.external}"#;
1463        let result = try_parse_inline_link(input, false, LinkScanContext::default());
1464        let (len, text, dest, attrs) = result.unwrap();
1465        assert_eq!(len, 30);
1466        assert_eq!(text, "text");
1467        assert_eq!(dest, r#"url "title""#);
1468        assert!(attrs.is_some());
1469        let attrs = attrs.unwrap();
1470        assert_eq!(attrs, "{.external}");
1471    }
1472
1473    // Reference link tests
1474    #[test]
1475    fn test_parse_reference_link_explicit() {
1476        let input = "[link text][label]";
1477        let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1478        assert_eq!(result, Some((18, "link text", "label".to_string(), false)));
1479    }
1480
1481    #[test]
1482    fn test_parse_reference_link_implicit() {
1483        let input = "[link text][]";
1484        let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1485        assert_eq!(result, Some((13, "link text", String::new(), false)));
1486    }
1487
1488    #[test]
1489    fn test_parse_reference_link_explicit_same_label_as_text() {
1490        let input = "[stack][stack]";
1491        let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1492        assert_eq!(result, Some((14, "stack", "stack".to_string(), false)));
1493    }
1494
1495    #[test]
1496    fn test_parse_reference_link_shortcut() {
1497        let input = "[link text] rest";
1498        let result = try_parse_reference_link(input, true, true, LinkScanContext::default());
1499        assert_eq!(
1500            result,
1501            Some((11, "link text", "link text".to_string(), true))
1502        );
1503    }
1504
1505    #[test]
1506    fn test_parse_reference_link_shortcut_rejects_empty_label() {
1507        let input = "[] rest";
1508        let result = try_parse_reference_link(input, true, true, LinkScanContext::default());
1509        assert_eq!(result, None);
1510    }
1511
1512    #[test]
1513    fn test_parse_reference_link_shortcut_disabled() {
1514        let input = "[link text] rest";
1515        let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1516        assert_eq!(result, None);
1517    }
1518
1519    #[test]
1520    fn test_parse_reference_link_not_inline_link() {
1521        // With shortcut disabled, `[text](url)` is rejected so the inline
1522        // link form upstream gets exclusive ownership.
1523        let input = "[text](url)";
1524        let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1525        assert_eq!(result, None);
1526    }
1527
1528    #[test]
1529    fn test_parse_reference_link_shortcut_falls_through_inline_link() {
1530        // CommonMark spec example #568: when an inline-link attempt would
1531        // fail (here we model the reachability — the caller tries inline
1532        // link first; if that returns None, we should still see `[text]`
1533        // as a shortcut and leave `(url)` to be parsed as following text).
1534        let input = "[text](url)";
1535        let result = try_parse_reference_link(input, true, true, LinkScanContext::default());
1536        assert_eq!(result, Some((6, "text", "text".to_string(), true)));
1537    }
1538
1539    #[test]
1540    fn test_parse_reference_link_with_nested_brackets() {
1541        let input = "[outer [inner] text][ref]";
1542        let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1543        assert_eq!(
1544            result,
1545            Some((25, "outer [inner] text", "ref".to_string(), false))
1546        );
1547    }
1548
1549    #[test]
1550    fn test_parse_reference_link_label_no_newline() {
1551        let input = "[text][label\nmore]";
1552        let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1553        assert_eq!(result, None);
1554    }
1555
1556    // Reference image tests
1557    #[test]
1558    fn test_parse_reference_image_explicit() {
1559        let input = "![alt text][label]";
1560        let result = try_parse_reference_image(input, false);
1561        assert_eq!(result, Some((18, "alt text", "label".to_string(), false)));
1562    }
1563
1564    #[test]
1565    fn test_parse_reference_image_implicit() {
1566        let input = "![alt text][]";
1567        let result = try_parse_reference_image(input, false);
1568        assert_eq!(
1569            result,
1570            Some((13, "alt text", "alt text".to_string(), false))
1571        );
1572    }
1573
1574    #[test]
1575    fn test_parse_reference_image_shortcut() {
1576        let input = "![alt text] rest";
1577        let result = try_parse_reference_image(input, true);
1578        assert_eq!(result, Some((11, "alt text", "alt text".to_string(), true)));
1579    }
1580
1581    #[test]
1582    fn test_parse_reference_image_shortcut_disabled() {
1583        let input = "![alt text] rest";
1584        let result = try_parse_reference_image(input, false);
1585        assert_eq!(result, None);
1586    }
1587
1588    #[test]
1589    fn test_parse_reference_image_not_inline() {
1590        // Should not match inline images with (url)
1591        let input = "![alt](url)";
1592        let result = try_parse_reference_image(input, true);
1593        assert_eq!(result, None);
1594    }
1595
1596    #[test]
1597    fn test_parse_reference_image_with_nested_brackets() {
1598        let input = "![alt [nested] text][ref]";
1599        let result = try_parse_reference_image(input, false);
1600        assert_eq!(
1601            result,
1602            Some((25, "alt [nested] text", "ref".to_string(), false))
1603        );
1604    }
1605
1606    #[test]
1607    fn test_reference_link_label_with_crlf() {
1608        // Reference link labels should not span lines with CRLF
1609        let input = "[foo\r\nbar]";
1610        let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1611
1612        // Should fail to parse because label contains line break
1613        assert_eq!(
1614            result, None,
1615            "Should not parse reference link with CRLF in label"
1616        );
1617    }
1618
1619    #[test]
1620    fn test_reference_link_label_with_lf() {
1621        // Reference link labels should not span lines with LF either
1622        let input = "[foo\nbar]";
1623        let result = try_parse_reference_link(input, false, true, LinkScanContext::default());
1624
1625        // Should fail to parse because label contains line break
1626        assert_eq!(
1627            result, None,
1628            "Should not parse reference link with LF in label"
1629        );
1630    }
1631
1632    // Multiline link text tests
1633    #[test]
1634    fn test_parse_inline_link_multiline_text() {
1635        // Per Pandoc spec, link text CAN contain newlines (soft breaks)
1636        let input = "[text on\nline two](url)";
1637        let result = try_parse_inline_link(input, false, LinkScanContext::default());
1638        assert_eq!(
1639            result,
1640            Some((23, "text on\nline two", "url", None)),
1641            "Link text should allow newlines"
1642        );
1643    }
1644
1645    #[test]
1646    fn test_parse_inline_link_multiline_with_formatting() {
1647        // Link text with newlines and other inline elements
1648        let input =
1649            "[A network graph. Different edges\nwith probability](../images/networkfig.png)";
1650        let result = try_parse_inline_link(input, false, LinkScanContext::default());
1651        assert!(result.is_some(), "Link text with newlines should parse");
1652        let (len, text, _dest, _attrs) = result.unwrap();
1653        assert!(text.contains('\n'), "Link text should preserve newline");
1654        assert_eq!(len, input.len());
1655    }
1656
1657    #[test]
1658    fn test_parse_inline_image_multiline_alt() {
1659        // Per Pandoc spec, image alt text CAN contain newlines
1660        let input = "![alt on\nline two](img.png)";
1661        let result = try_parse_inline_image(input, LinkScanContext::default());
1662        assert_eq!(
1663            result,
1664            Some((27, "alt on\nline two", "img.png", None)),
1665            "Image alt text should allow newlines"
1666        );
1667    }
1668
1669    #[test]
1670    fn test_parse_inline_image_multiline_with_attributes() {
1671        // Image with multiline alt text and attributes
1672        let input = "![network graph\ndiagram](../images/fig.png){width=70%}";
1673        let result = try_parse_inline_image(input, LinkScanContext::default());
1674        assert!(
1675            result.is_some(),
1676            "Image alt with newlines and attributes should parse"
1677        );
1678        let (len, alt, dest, attrs) = result.unwrap();
1679        assert!(alt.contains('\n'), "Alt text should preserve newline");
1680        assert_eq!(dest, "../images/fig.png");
1681        assert_eq!(attrs, Some("{width=70%}"));
1682        assert_eq!(len, input.len());
1683    }
1684
1685    #[test]
1686    fn test_parse_inline_link_with_attributes_after_newline() {
1687        // Test for regression: when text is concatenated with newlines,
1688        // attributes after ) should still be recognized
1689        let input = "[A network graph.](../images/networkfig.png){width=70%}\nA word\n";
1690        let result = try_parse_inline_link(input, false, LinkScanContext::default());
1691        assert!(
1692            result.is_some(),
1693            "Link with attributes should parse even with following text"
1694        );
1695        let (len, text, dest, attrs) = result.unwrap();
1696        assert_eq!(text, "A network graph.");
1697        assert_eq!(dest, "../images/networkfig.png");
1698        assert_eq!(attrs, Some("{width=70%}"), "Attributes should be captured");
1699        assert_eq!(
1700            len, 55,
1701            "Length should include attributes (up to closing brace)"
1702        );
1703    }
1704}