Skip to main content

panache_parser/parser/inlines/
links.rs

1//! Parsing for links, images, and automatic links.
2//!
3//! Implements:
4//! - Automatic links: `<http://example.com>` and `<user@example.com>`
5//! - Inline links: `[text](url)` and `[text](url "title")`
6//! - Link attributes: `[text](url){#id .class key=value}`
7//! - Inline images: `![alt](url)` and `![alt](url "title")`
8//! - Image attributes: `![alt](url){#id .class key=value}`
9//! - Reference links: `[text][ref]`, `[text][]`, `[text]`
10//! - Reference images: `![alt][ref]`, `![alt][]`, `![alt]`
11
12use super::core::parse_inline_text;
13use crate::options::ParserOptions;
14use crate::syntax::SyntaxKind;
15use rowan::GreenNodeBuilder;
16
17// Import attribute parsing
18use crate::parser::utils::attributes::try_parse_trailing_attributes;
19
20/// Try to parse an inline image starting at the current position.
21///
22/// Inline images have the form `![alt](url)` or `![alt](url "title")`.
23/// Can also have trailing attributes: `![alt](url){#id .class}`.
24/// Returns Some((length, alt_text, dest_content, raw_attributes)) if a valid image is found.
25pub fn try_parse_inline_image(text: &str) -> Option<(usize, &str, &str, Option<&str>)> {
26    if !text.starts_with("![") {
27        return None;
28    }
29
30    // Find the closing ]
31    let mut bracket_depth = 0;
32    let mut escape_next = false;
33    let mut close_bracket_pos = None;
34
35    for (i, ch) in text[2..].char_indices() {
36        if escape_next {
37            escape_next = false;
38            continue;
39        }
40
41        match ch {
42            '\\' => escape_next = true,
43            '[' => bracket_depth += 1,
44            ']' => {
45                if bracket_depth == 0 {
46                    close_bracket_pos = Some(i + 2);
47                    break;
48                }
49                bracket_depth -= 1;
50            }
51            _ => {}
52        }
53    }
54
55    let close_bracket = close_bracket_pos?;
56    let alt_text = &text[2..close_bracket];
57
58    // Check for immediate ( after ]
59    let after_bracket = close_bracket + 1;
60    if text.len() <= after_bracket || !text[after_bracket..].starts_with('(') {
61        return None;
62    }
63
64    // Find closing ) for destination (reuse same logic as links)
65    let dest_start = after_bracket + 1;
66    let remaining = &text[dest_start..];
67
68    let mut paren_depth = 0;
69    let mut escape_next = false;
70    let mut in_quotes = false;
71    let mut close_paren_pos = None;
72
73    for (i, ch) in remaining.char_indices() {
74        if escape_next {
75            escape_next = false;
76            continue;
77        }
78
79        match ch {
80            '\\' => escape_next = true,
81            '"' => in_quotes = !in_quotes,
82            '(' if !in_quotes => paren_depth += 1,
83            ')' if !in_quotes => {
84                if paren_depth == 0 {
85                    close_paren_pos = Some(i);
86                    break;
87                }
88                paren_depth -= 1;
89            }
90            _ => {}
91        }
92    }
93
94    let close_paren = close_paren_pos?;
95    let dest_content = &remaining[..close_paren];
96
97    // Check for trailing attributes {#id .class key=value}
98    let after_paren = dest_start + close_paren + 1;
99    let after_close = &text[after_paren..];
100
101    // Attributes must start immediately after closing paren (no whitespace/newlines)
102    if after_close.starts_with('{') {
103        // Find the closing brace
104        if let Some(close_brace_pos) = after_close.find('}') {
105            let attr_text = &after_close[..=close_brace_pos];
106            // Try to parse as attributes to validate
107            if let Some((_attrs, _)) = try_parse_trailing_attributes(attr_text) {
108                let total_len = after_paren + close_brace_pos + 1;
109                // Return raw attribute string for lossless parsing
110                let raw_attrs = attr_text;
111                return Some((total_len, alt_text, dest_content, Some(raw_attrs)));
112            }
113        }
114    }
115
116    // No attributes, just return the image
117    let total_len = after_paren;
118    Some((total_len, alt_text, dest_content, None))
119}
120
121/// Emit an inline image node to the builder.
122/// Note: alt_text may contain inline elements and should be parsed recursively.
123pub fn emit_inline_image(
124    builder: &mut GreenNodeBuilder,
125    _text: &str,
126    alt_text: &str,
127    dest: &str,
128    raw_attributes: Option<&str>,
129    config: &ParserOptions,
130) {
131    builder.start_node(SyntaxKind::IMAGE_LINK.into());
132
133    // Opening ![
134    builder.start_node(SyntaxKind::IMAGE_LINK_START.into());
135    builder.token(SyntaxKind::IMAGE_LINK_START.into(), "![");
136    builder.finish_node();
137
138    // Alt text (recursively parse inline elements)
139    builder.start_node(SyntaxKind::IMAGE_ALT.into());
140    // Use the standalone parse_inline_text function for recursive parsing
141    // Note: nested contexts don't resolve references
142    parse_inline_text(builder, alt_text, config, false);
143    builder.finish_node();
144
145    // Closing ]
146    builder.token(SyntaxKind::IMAGE_ALT_END.into(), "]");
147
148    // Opening (
149    builder.token(SyntaxKind::IMAGE_DEST_START.into(), "(");
150
151    // Destination
152    builder.start_node(SyntaxKind::LINK_DEST.into());
153    builder.token(SyntaxKind::TEXT.into(), dest);
154    builder.finish_node();
155
156    // Closing )
157    builder.token(SyntaxKind::IMAGE_DEST_END.into(), ")");
158
159    // Emit raw attributes if present (preserve original formatting)
160    if let Some(raw_attrs) = raw_attributes {
161        builder.start_node(SyntaxKind::ATTRIBUTE.into());
162        builder.token(SyntaxKind::ATTRIBUTE.into(), raw_attrs);
163        builder.finish_node();
164    }
165
166    builder.finish_node();
167}
168
169/// Try to parse an automatic link starting at the current position.
170///
171/// Automatic links have the form `<url>` or `<email@example.com>`.
172/// Returns Some((length, url_content)) if a valid automatic link is found.
173pub fn try_parse_autolink(text: &str) -> Option<(usize, &str)> {
174    if !text.starts_with('<') {
175        return None;
176    }
177
178    // Find the closing >
179    let close_pos = text[1..].find('>')?;
180    let content = &text[1..1 + close_pos];
181
182    // Automatic links cannot contain spaces or newlines
183    if content.contains(|c: char| c.is_whitespace()) {
184        return None;
185    }
186
187    // Must contain at least one character
188    if content.is_empty() {
189        return None;
190    }
191
192    // Basic validation: should look like a URL or email
193    // URL: contains :// or starts with scheme:
194    // Email: contains @
195    let is_url = content.contains("://") || content.contains(':');
196    let is_email = content.contains('@');
197
198    if !is_url && !is_email {
199        return None;
200    }
201
202    // Total length includes < and >
203    Some((close_pos + 2, content))
204}
205
206/// Emit an automatic link node to the builder.
207pub fn emit_autolink(builder: &mut GreenNodeBuilder, _text: &str, url: &str) {
208    builder.start_node(SyntaxKind::AUTO_LINK.into());
209
210    // Opening <
211    builder.start_node(SyntaxKind::AUTO_LINK_MARKER.into());
212    builder.token(SyntaxKind::AUTO_LINK_MARKER.into(), "<");
213    builder.finish_node();
214
215    // URL content
216    builder.token(SyntaxKind::TEXT.into(), url);
217
218    // Closing >
219    builder.start_node(SyntaxKind::AUTO_LINK_MARKER.into());
220    builder.token(SyntaxKind::AUTO_LINK_MARKER.into(), ">");
221    builder.finish_node();
222
223    builder.finish_node();
224}
225
226pub fn try_parse_bare_uri(text: &str) -> Option<(usize, &str)> {
227    let mut chars = text.char_indices();
228    let (_, first) = chars.next()?;
229    if !first.is_ascii_alphabetic() {
230        return None;
231    }
232
233    let mut scheme_end = None;
234    for (idx, ch) in text.char_indices() {
235        if ch == ':' {
236            scheme_end = Some(idx);
237            break;
238        }
239        if !ch.is_ascii_alphanumeric() && ch != '+' && ch != '-' && ch != '.' {
240            return None;
241        }
242    }
243    let scheme_end = scheme_end?;
244    if scheme_end == 0 {
245        return None;
246    }
247
248    let mut end = scheme_end + 1;
249    let bytes = text.as_bytes();
250    while end < text.len() {
251        let b = bytes[end];
252        if b.is_ascii_whitespace() {
253            break;
254        }
255        if matches!(b, b'<' | b'>' | b'`' | b'"' | b'\'') {
256            break;
257        }
258        end += 1;
259    }
260
261    if end == scheme_end + 1 {
262        return None;
263    }
264
265    let mut trimmed = end;
266    while trimmed > scheme_end + 1 {
267        let ch = text[..trimmed].chars().last().unwrap();
268        if matches!(ch, '.' | ',' | ';' | ':' | ')' | ']' | '}') {
269            trimmed -= ch.len_utf8();
270        } else {
271            break;
272        }
273    }
274
275    if trimmed <= scheme_end + 1 {
276        return None;
277    }
278
279    // If trimming terminal punctuation leaves a dangling backslash, the match
280    // came from escaped punctuation (e.g., `a:\]`) and should stay literal.
281    if text[..trimmed].ends_with('\\') {
282        return None;
283    }
284
285    Some((trimmed, &text[..trimmed]))
286}
287
288/// Try to parse an inline link starting at the current position.
289///
290/// Inline links have the form `[text](url)` or `[text](url "title")`.
291/// Can also have trailing attributes: `[text](url){#id .class}`.
292/// Returns Some((length, text_content, dest_content, raw_attributes)) if a valid link is found.
293pub fn try_parse_inline_link(text: &str) -> Option<(usize, &str, &str, Option<&str>)> {
294    if !text.starts_with('[') {
295        return None;
296    }
297
298    // Find the closing ]
299    let mut bracket_depth = 0;
300    let mut escape_next = false;
301    let mut close_bracket_pos = None;
302
303    for (i, ch) in text[1..].char_indices() {
304        if escape_next {
305            escape_next = false;
306            continue;
307        }
308
309        match ch {
310            '\\' => escape_next = true,
311            '[' => bracket_depth += 1,
312            ']' => {
313                if bracket_depth == 0 {
314                    close_bracket_pos = Some(i + 1);
315                    break;
316                }
317                bracket_depth -= 1;
318            }
319            _ => {}
320        }
321    }
322
323    let close_bracket = close_bracket_pos?;
324    let link_text = &text[1..close_bracket];
325
326    // Check for immediate ( after ]
327    let after_bracket = close_bracket + 1;
328    if text.len() <= after_bracket || !text[after_bracket..].starts_with('(') {
329        return None;
330    }
331
332    // Find closing ) for destination
333    let dest_start = after_bracket + 1;
334    let remaining = &text[dest_start..];
335
336    let mut paren_depth = 0;
337    let mut escape_next = false;
338    let mut in_quotes = false;
339    let mut close_paren_pos = None;
340
341    for (i, ch) in remaining.char_indices() {
342        if escape_next {
343            escape_next = false;
344            continue;
345        }
346
347        match ch {
348            '\\' => escape_next = true,
349            '"' => in_quotes = !in_quotes,
350            '(' if !in_quotes => paren_depth += 1,
351            ')' if !in_quotes => {
352                if paren_depth == 0 {
353                    close_paren_pos = Some(i);
354                    break;
355                }
356                paren_depth -= 1;
357            }
358            _ => {}
359        }
360    }
361
362    let close_paren = close_paren_pos?;
363    let dest_content = &remaining[..close_paren];
364
365    // Check for trailing attributes {#id .class key=value}
366    let after_paren = dest_start + close_paren + 1;
367    let after_close = &text[after_paren..];
368
369    // Attributes must start immediately after closing paren (no whitespace/newlines)
370    if after_close.starts_with('{') {
371        // Find the closing brace
372        if let Some(close_brace_pos) = after_close.find('}') {
373            let attr_text = &after_close[..=close_brace_pos];
374            // Try to parse as attributes to validate
375            if let Some((_attrs, _)) = try_parse_trailing_attributes(attr_text) {
376                let total_len = after_paren + close_brace_pos + 1;
377                // Return raw attribute string for lossless parsing
378                let raw_attrs = attr_text;
379                return Some((total_len, link_text, dest_content, Some(raw_attrs)));
380            }
381        }
382    }
383
384    // No attributes, just return the link
385    let total_len = after_paren;
386    Some((total_len, link_text, dest_content, None))
387}
388
389/// Emit an inline link node to the builder.
390/// Note: link_text may contain inline elements and should be parsed recursively.
391pub fn emit_inline_link(
392    builder: &mut GreenNodeBuilder,
393    _text: &str,
394    link_text: &str,
395    dest: &str,
396    raw_attributes: Option<&str>,
397    config: &ParserOptions,
398) {
399    builder.start_node(SyntaxKind::LINK.into());
400
401    // Opening [
402    builder.start_node(SyntaxKind::LINK_START.into());
403    builder.token(SyntaxKind::LINK_START.into(), "[");
404    builder.finish_node();
405
406    // Link text (recursively parse inline elements)
407    builder.start_node(SyntaxKind::LINK_TEXT.into());
408    // Use the standalone parse_inline_text function for recursive parsing
409    parse_inline_text(builder, link_text, config, false);
410    builder.finish_node();
411
412    // Closing ]
413    builder.token(SyntaxKind::LINK_TEXT_END.into(), "]");
414
415    // Opening (
416    builder.token(SyntaxKind::LINK_DEST_START.into(), "(");
417
418    // Destination
419    builder.start_node(SyntaxKind::LINK_DEST.into());
420    builder.token(SyntaxKind::TEXT.into(), dest);
421    builder.finish_node();
422
423    // Closing )
424    builder.token(SyntaxKind::LINK_DEST_END.into(), ")");
425
426    // Emit raw attributes if present (preserve original formatting)
427    if let Some(raw_attrs) = raw_attributes {
428        builder.start_node(SyntaxKind::ATTRIBUTE.into());
429        builder.token(SyntaxKind::ATTRIBUTE.into(), raw_attrs);
430        builder.finish_node();
431    }
432
433    builder.finish_node();
434}
435
436pub fn emit_bare_uri_link(builder: &mut GreenNodeBuilder, uri: &str, _config: &ParserOptions) {
437    builder.start_node(SyntaxKind::LINK.into());
438
439    builder.start_node(SyntaxKind::LINK_START.into());
440    builder.token(SyntaxKind::LINK_START.into(), "[");
441    builder.finish_node();
442
443    builder.start_node(SyntaxKind::LINK_TEXT.into());
444    builder.token(SyntaxKind::TEXT.into(), uri);
445    builder.finish_node();
446
447    builder.token(SyntaxKind::LINK_TEXT_END.into(), "]");
448    builder.token(SyntaxKind::LINK_DEST_START.into(), "(");
449
450    builder.start_node(SyntaxKind::LINK_DEST.into());
451    builder.token(SyntaxKind::TEXT.into(), uri);
452    builder.finish_node();
453
454    builder.token(SyntaxKind::LINK_DEST_END.into(), ")");
455
456    builder.finish_node();
457}
458
459/// Try to parse a reference link starting at the current position.
460///
461/// Reference links have three forms:
462/// - Explicit: `[text][label]`
463/// - Implicit: `[text][]` (label = text)
464/// - Shortcut: `[text]` (if shortcut_reference_links enabled)
465///
466/// Returns Some((length, text_content, label, is_shortcut)) if a valid reference link is found.
467/// The label is what should be looked up in the registry.
468pub fn try_parse_reference_link(
469    text: &str,
470    allow_shortcut: bool,
471) -> Option<(usize, &str, String, bool)> {
472    if !text.starts_with('[') {
473        return None;
474    }
475
476    // Don't match citations (which start with [@) or suppress-author citations (which start with [-@)
477    if text.len() > 1 {
478        let bytes = text.as_bytes();
479        if bytes[1] == b'@' {
480            return None;
481        }
482        if bytes[1] == b'-' && text.len() > 2 && bytes[2] == b'@' {
483            return None;
484        }
485    }
486
487    // Find the closing ] for the text
488    let mut bracket_depth = 0;
489    let mut escape_next = false;
490    let mut close_bracket_pos = None;
491
492    for (i, ch) in text[1..].char_indices() {
493        if escape_next {
494            escape_next = false;
495            continue;
496        }
497
498        match ch {
499            '\\' => escape_next = true,
500            '[' => bracket_depth += 1,
501            ']' => {
502                if bracket_depth == 0 {
503                    close_bracket_pos = Some(i + 1);
504                    break;
505                }
506                bracket_depth -= 1;
507            }
508            _ => {}
509        }
510    }
511
512    let close_bracket = close_bracket_pos?;
513    let link_text = &text[1..close_bracket];
514
515    // Check what follows the ]
516    let after_bracket = close_bracket + 1;
517
518    // Check if followed by ( - if so, this is an inline link, not a reference link
519    if after_bracket < text.len() && text[after_bracket..].starts_with('(') {
520        return None;
521    }
522
523    // Check if followed by { - if so, this is a bracketed span, not a reference link
524    if after_bracket < text.len() && text[after_bracket..].starts_with('{') {
525        return None;
526    }
527
528    // Check for explicit reference [text][label] or implicit [text][]
529    if after_bracket < text.len() && text[after_bracket..].starts_with('[') {
530        // Find the closing ] for the label
531        let label_start = after_bracket + 1;
532        let mut label_end = None;
533
534        for (i, ch) in text[label_start..].char_indices() {
535            if ch == ']' {
536                label_end = Some(i + label_start);
537                break;
538            }
539            // Labels can't contain newlines
540            if ch == '\n' {
541                return None;
542            }
543        }
544
545        let label_end = label_end?;
546        let label = &text[label_start..label_end];
547
548        // Total length includes both bracket pairs
549        let total_len = label_end + 1;
550
551        // Implicit reference: empty label means emit [text][]
552        if label.is_empty() {
553            return Some((total_len, link_text, String::new(), false));
554        }
555
556        // Explicit reference: use the provided label
557        Some((total_len, link_text, label.to_string(), false))
558    } else if allow_shortcut {
559        // Shortcut reference: [text] with no second bracket pair
560        // The text is both the display text and the label
561        if link_text.is_empty() {
562            return None;
563        }
564        Some((after_bracket, link_text, link_text.to_string(), true))
565    } else {
566        // No second bracket pair and shortcut not allowed - not a reference link
567        None
568    }
569}
570
571/// Emit a reference link node to the builder.
572/// Preserves the original reference syntax (explicit [text][ref], implicit [text][], or shortcut [text]).
573pub fn emit_reference_link(
574    builder: &mut GreenNodeBuilder,
575    link_text: &str,
576    label: &str,
577    is_shortcut: bool,
578    config: &ParserOptions,
579) {
580    builder.start_node(SyntaxKind::LINK.into());
581
582    // Opening [
583    builder.start_node(SyntaxKind::LINK_START.into());
584    builder.token(SyntaxKind::LINK_START.into(), "[");
585    builder.finish_node();
586
587    // Link text (recursively parse inline elements)
588    builder.start_node(SyntaxKind::LINK_TEXT.into());
589    parse_inline_text(builder, link_text, config, false);
590    builder.finish_node();
591
592    // Closing ] and reference label
593    builder.token(SyntaxKind::TEXT.into(), "]");
594
595    if !is_shortcut {
596        // Explicit or implicit reference: [text][label] or [text][]
597        builder.token(SyntaxKind::TEXT.into(), "[");
598        builder.start_node(SyntaxKind::LINK_REF.into());
599        // For implicit references, label is empty and we emit [text][]
600        // For explicit references, emit the label to get [text][label]
601        if !label.is_empty() {
602            builder.token(SyntaxKind::TEXT.into(), label);
603        }
604        builder.finish_node();
605        builder.token(SyntaxKind::TEXT.into(), "]");
606    }
607    // For shortcut references, just [text] - no second bracket pair
608
609    builder.finish_node();
610}
611
612/// Try to parse a reference-style image: `![alt][ref]`, `![alt][]`, or `![alt]`
613/// Returns (total_len, alt_text, label, is_shortcut) if successful.
614pub fn try_parse_reference_image(
615    text: &str,
616    allow_shortcut: bool,
617) -> Option<(usize, &str, String, bool)> {
618    let bytes = text.as_bytes();
619    if bytes.len() < 4 || bytes[0] != b'!' || bytes[1] != b'[' {
620        return None;
621    }
622
623    let mut pos = 2;
624    let mut bracket_depth = 1;
625    let alt_start = pos;
626
627    // Find the end of the alt text (allowing nested brackets)
628    while pos < bytes.len() && bracket_depth > 0 {
629        match bytes[pos] {
630            b'[' => bracket_depth += 1,
631            b']' => bracket_depth -= 1,
632            b'\\' if pos + 1 < bytes.len() => pos += 1, // skip escaped char
633            _ => {}
634        }
635        pos += 1;
636    }
637
638    if bracket_depth > 0 {
639        return None; // Unclosed brackets
640    }
641
642    let alt_text = &text[alt_start..pos - 1];
643
644    // Now check for the label part
645    if pos >= bytes.len() {
646        return None;
647    }
648
649    // Explicit reference: `![alt][label]`
650    if bytes[pos] == b'[' {
651        pos += 1;
652        let label_start = pos;
653
654        // Find the end of the label (no nested brackets, no newlines)
655        while pos < bytes.len() && bytes[pos] != b']' && bytes[pos] != b'\n' && bytes[pos] != b'\r'
656        {
657            pos += 1;
658        }
659
660        if pos >= bytes.len() || bytes[pos] != b']' {
661            return None;
662        }
663
664        let label_text = &text[label_start..pos];
665        pos += 1;
666
667        // Return the original label text for formatting preservation
668        // Empty label means implicit reference
669        let label = if label_text.is_empty() {
670            alt_text.to_string() // For implicit references, use alt text as label for equality check
671        } else {
672            label_text.to_string() // Preserve original case
673        };
674
675        return Some((pos, alt_text, label, false));
676    }
677
678    // Shortcut reference: `![alt]` (only if enabled)
679    // BUT not if followed by (url) - that's an inline image
680    if allow_shortcut {
681        // Check if next char is ( - if so, not a reference
682        if pos < bytes.len() && bytes[pos] == b'(' {
683            return None;
684        }
685
686        // For shortcut references, use alt text as label for equality check
687        let label = alt_text.to_string();
688        return Some((pos, alt_text, label, true));
689    }
690
691    None
692}
693
694/// Emit a reference image node with registry lookup.
695pub fn emit_reference_image(
696    builder: &mut GreenNodeBuilder,
697    alt_text: &str,
698    label: &str,
699    is_shortcut: bool,
700    config: &ParserOptions,
701) {
702    builder.start_node(SyntaxKind::IMAGE_LINK.into());
703
704    // Emit as reference image (preserve original syntax)
705    builder.start_node(SyntaxKind::IMAGE_LINK_START.into());
706    builder.token(SyntaxKind::IMAGE_LINK_START.into(), "![");
707    builder.finish_node();
708
709    // Alt text (recursively parse inline elements)
710    builder.start_node(SyntaxKind::IMAGE_ALT.into());
711    parse_inline_text(builder, alt_text, config, false);
712    builder.finish_node();
713
714    // Closing ] and reference label
715    builder.token(SyntaxKind::TEXT.into(), "]");
716
717    if !is_shortcut {
718        // Explicit or implicit reference: ![alt][label] or ![alt][]
719        builder.token(SyntaxKind::TEXT.into(), "[");
720        builder.start_node(SyntaxKind::LINK_REF.into());
721        // For implicit references, emit empty label (label == alt means implicit from parser)
722        if label != alt_text {
723            builder.token(SyntaxKind::TEXT.into(), label);
724        }
725        builder.finish_node();
726        builder.token(SyntaxKind::TEXT.into(), "]");
727    }
728    // For shortcut references, just ![alt] - no second bracket pair
729
730    builder.finish_node();
731}
732
733#[cfg(test)]
734mod tests {
735    use super::*;
736
737    #[test]
738    fn test_parse_autolink_url() {
739        let input = "<https://example.com>";
740        let result = try_parse_autolink(input);
741        assert_eq!(result, Some((21, "https://example.com")));
742    }
743
744    #[test]
745    fn test_parse_autolink_email() {
746        let input = "<user@example.com>";
747        let result = try_parse_autolink(input);
748        assert_eq!(result, Some((18, "user@example.com")));
749    }
750
751    #[test]
752    fn test_parse_autolink_no_close() {
753        let input = "<https://example.com";
754        let result = try_parse_autolink(input);
755        assert_eq!(result, None);
756    }
757
758    #[test]
759    fn test_parse_autolink_with_space() {
760        let input = "<https://example.com >";
761        let result = try_parse_autolink(input);
762        assert_eq!(result, None);
763    }
764
765    #[test]
766    fn test_parse_autolink_not_url_or_email() {
767        let input = "<notaurl>";
768        let result = try_parse_autolink(input);
769        assert_eq!(result, None);
770    }
771
772    #[test]
773    fn test_parse_inline_link_simple() {
774        let input = "[text](url)";
775        let result = try_parse_inline_link(input);
776        assert_eq!(result, Some((11, "text", "url", None)));
777    }
778
779    #[test]
780    fn test_parse_inline_link_with_title() {
781        let input = r#"[text](url "title")"#;
782        let result = try_parse_inline_link(input);
783        assert_eq!(result, Some((19, "text", r#"url "title""#, None)));
784    }
785
786    #[test]
787    fn test_parse_inline_link_with_nested_brackets() {
788        let input = "[outer [inner] text](url)";
789        let result = try_parse_inline_link(input);
790        assert_eq!(result, Some((25, "outer [inner] text", "url", None)));
791    }
792
793    #[test]
794    fn test_parse_inline_link_no_space_between_brackets_and_parens() {
795        let input = "[text] (url)";
796        let result = try_parse_inline_link(input);
797        assert_eq!(result, None);
798    }
799
800    #[test]
801    fn test_parse_inline_link_no_closing_bracket() {
802        let input = "[text(url)";
803        let result = try_parse_inline_link(input);
804        assert_eq!(result, None);
805    }
806
807    #[test]
808    fn test_parse_inline_link_no_closing_paren() {
809        let input = "[text](url";
810        let result = try_parse_inline_link(input);
811        assert_eq!(result, None);
812    }
813
814    #[test]
815    fn test_parse_inline_link_escaped_bracket() {
816        let input = r"[text\]more](url)";
817        let result = try_parse_inline_link(input);
818        assert_eq!(result, Some((17, r"text\]more", "url", None)));
819    }
820
821    #[test]
822    fn test_parse_inline_link_parens_in_url() {
823        let input = "[text](url(with)parens)";
824        let result = try_parse_inline_link(input);
825        assert_eq!(result, Some((23, "text", "url(with)parens", None)));
826    }
827
828    #[test]
829    fn test_parse_inline_image_simple() {
830        let input = "![alt](image.jpg)";
831        let result = try_parse_inline_image(input);
832        assert_eq!(result, Some((17, "alt", "image.jpg", None)));
833    }
834
835    #[test]
836    fn test_parse_inline_image_with_title() {
837        let input = r#"![alt](image.jpg "A title")"#;
838        let result = try_parse_inline_image(input);
839        assert_eq!(result, Some((27, "alt", r#"image.jpg "A title""#, None)));
840    }
841
842    #[test]
843    fn test_parse_inline_image_with_nested_brackets() {
844        let input = "![outer [inner] alt](image.jpg)";
845        let result = try_parse_inline_image(input);
846        assert_eq!(result, Some((31, "outer [inner] alt", "image.jpg", None)));
847    }
848
849    #[test]
850    fn test_parse_bare_uri_rejects_dangling_backslash_after_trim() {
851        let input = r"a:\]";
852        let result = try_parse_bare_uri(input);
853        assert_eq!(result, None);
854    }
855
856    #[test]
857    fn test_parse_inline_image_no_space_between_brackets_and_parens() {
858        let input = "![alt] (image.jpg)";
859        let result = try_parse_inline_image(input);
860        assert_eq!(result, None);
861    }
862
863    #[test]
864    fn test_parse_inline_image_no_closing_bracket() {
865        let input = "![alt(image.jpg)";
866        let result = try_parse_inline_image(input);
867        assert_eq!(result, None);
868    }
869
870    #[test]
871    fn test_parse_inline_image_no_closing_paren() {
872        let input = "![alt](image.jpg";
873        let result = try_parse_inline_image(input);
874        assert_eq!(result, None);
875    }
876
877    #[test]
878    fn test_parse_inline_image_with_simple_class() {
879        let input = "![alt](img.png){.large}";
880        let result = try_parse_inline_image(input);
881        let (len, alt, dest, attrs) = result.unwrap();
882        assert_eq!(len, 23);
883        assert_eq!(alt, "alt");
884        assert_eq!(dest, "img.png");
885        assert!(attrs.is_some());
886        let attrs = attrs.unwrap();
887        assert_eq!(attrs, "{.large}");
888    }
889
890    #[test]
891    fn test_parse_inline_image_with_id() {
892        let input = "![Figure 1](fig1.png){#fig-1}";
893        let result = try_parse_inline_image(input);
894        let (len, alt, dest, attrs) = result.unwrap();
895        assert_eq!(len, 29);
896        assert_eq!(alt, "Figure 1");
897        assert_eq!(dest, "fig1.png");
898        assert!(attrs.is_some());
899        let attrs = attrs.unwrap();
900        assert_eq!(attrs, "{#fig-1}");
901    }
902
903    #[test]
904    fn test_parse_inline_image_with_full_attributes() {
905        let input = "![alt](img.png){#fig .large width=\"80%\"}";
906        let result = try_parse_inline_image(input);
907        let (len, alt, dest, attrs) = result.unwrap();
908        assert_eq!(len, 40);
909        assert_eq!(alt, "alt");
910        assert_eq!(dest, "img.png");
911        assert!(attrs.is_some());
912        let attrs = attrs.unwrap();
913        assert_eq!(attrs, "{#fig .large width=\"80%\"}");
914    }
915
916    #[test]
917    fn test_parse_inline_image_attributes_must_be_adjacent() {
918        // Space between ) and { should not parse as attributes
919        let input = "![alt](img.png) {.large}";
920        let result = try_parse_inline_image(input);
921        assert_eq!(result, Some((15, "alt", "img.png", None)));
922    }
923
924    // Link attribute tests
925    #[test]
926    fn test_parse_inline_link_with_id() {
927        let input = "[text](url){#link-1}";
928        let result = try_parse_inline_link(input);
929        let (len, text, dest, attrs) = result.unwrap();
930        assert_eq!(len, 20);
931        assert_eq!(text, "text");
932        assert_eq!(dest, "url");
933        assert!(attrs.is_some());
934        let attrs = attrs.unwrap();
935        assert_eq!(attrs, "{#link-1}");
936    }
937
938    #[test]
939    fn test_parse_inline_link_with_full_attributes() {
940        let input = "[text](url){#link .external target=\"_blank\"}";
941        let result = try_parse_inline_link(input);
942        let (len, text, dest, attrs) = result.unwrap();
943        assert_eq!(len, 44);
944        assert_eq!(text, "text");
945        assert_eq!(dest, "url");
946        assert!(attrs.is_some());
947        let attrs = attrs.unwrap();
948        assert_eq!(attrs, "{#link .external target=\"_blank\"}");
949    }
950
951    #[test]
952    fn test_parse_inline_link_attributes_must_be_adjacent() {
953        // Space between ) and { should not parse as attributes
954        let input = "[text](url) {.class}";
955        let result = try_parse_inline_link(input);
956        assert_eq!(result, Some((11, "text", "url", None)));
957    }
958
959    #[test]
960    fn test_parse_inline_link_with_title_and_attributes() {
961        let input = r#"[text](url "title"){.external}"#;
962        let result = try_parse_inline_link(input);
963        let (len, text, dest, attrs) = result.unwrap();
964        assert_eq!(len, 30);
965        assert_eq!(text, "text");
966        assert_eq!(dest, r#"url "title""#);
967        assert!(attrs.is_some());
968        let attrs = attrs.unwrap();
969        assert_eq!(attrs, "{.external}");
970    }
971
972    // Reference link tests
973    #[test]
974    fn test_parse_reference_link_explicit() {
975        let input = "[link text][label]";
976        let result = try_parse_reference_link(input, false);
977        assert_eq!(result, Some((18, "link text", "label".to_string(), false)));
978    }
979
980    #[test]
981    fn test_parse_reference_link_implicit() {
982        let input = "[link text][]";
983        let result = try_parse_reference_link(input, false);
984        assert_eq!(result, Some((13, "link text", String::new(), false)));
985    }
986
987    #[test]
988    fn test_parse_reference_link_explicit_same_label_as_text() {
989        let input = "[stack][stack]";
990        let result = try_parse_reference_link(input, false);
991        assert_eq!(result, Some((14, "stack", "stack".to_string(), false)));
992    }
993
994    #[test]
995    fn test_parse_reference_link_shortcut() {
996        let input = "[link text] rest";
997        let result = try_parse_reference_link(input, true);
998        assert_eq!(
999            result,
1000            Some((11, "link text", "link text".to_string(), true))
1001        );
1002    }
1003
1004    #[test]
1005    fn test_parse_reference_link_shortcut_rejects_empty_label() {
1006        let input = "[] rest";
1007        let result = try_parse_reference_link(input, true);
1008        assert_eq!(result, None);
1009    }
1010
1011    #[test]
1012    fn test_parse_reference_link_shortcut_disabled() {
1013        let input = "[link text] rest";
1014        let result = try_parse_reference_link(input, false);
1015        assert_eq!(result, None);
1016    }
1017
1018    #[test]
1019    fn test_parse_reference_link_not_inline_link() {
1020        // Should not match inline links with (url)
1021        let input = "[text](url)";
1022        let result = try_parse_reference_link(input, true);
1023        assert_eq!(result, None);
1024    }
1025
1026    #[test]
1027    fn test_parse_reference_link_with_nested_brackets() {
1028        let input = "[outer [inner] text][ref]";
1029        let result = try_parse_reference_link(input, false);
1030        assert_eq!(
1031            result,
1032            Some((25, "outer [inner] text", "ref".to_string(), false))
1033        );
1034    }
1035
1036    #[test]
1037    fn test_parse_reference_link_label_no_newline() {
1038        let input = "[text][label\nmore]";
1039        let result = try_parse_reference_link(input, false);
1040        assert_eq!(result, None);
1041    }
1042
1043    // Reference image tests
1044    #[test]
1045    fn test_parse_reference_image_explicit() {
1046        let input = "![alt text][label]";
1047        let result = try_parse_reference_image(input, false);
1048        assert_eq!(result, Some((18, "alt text", "label".to_string(), false)));
1049    }
1050
1051    #[test]
1052    fn test_parse_reference_image_implicit() {
1053        let input = "![alt text][]";
1054        let result = try_parse_reference_image(input, false);
1055        assert_eq!(
1056            result,
1057            Some((13, "alt text", "alt text".to_string(), false))
1058        );
1059    }
1060
1061    #[test]
1062    fn test_parse_reference_image_shortcut() {
1063        let input = "![alt text] rest";
1064        let result = try_parse_reference_image(input, true);
1065        assert_eq!(result, Some((11, "alt text", "alt text".to_string(), true)));
1066    }
1067
1068    #[test]
1069    fn test_parse_reference_image_shortcut_disabled() {
1070        let input = "![alt text] rest";
1071        let result = try_parse_reference_image(input, false);
1072        assert_eq!(result, None);
1073    }
1074
1075    #[test]
1076    fn test_parse_reference_image_not_inline() {
1077        // Should not match inline images with (url)
1078        let input = "![alt](url)";
1079        let result = try_parse_reference_image(input, true);
1080        assert_eq!(result, None);
1081    }
1082
1083    #[test]
1084    fn test_parse_reference_image_with_nested_brackets() {
1085        let input = "![alt [nested] text][ref]";
1086        let result = try_parse_reference_image(input, false);
1087        assert_eq!(
1088            result,
1089            Some((25, "alt [nested] text", "ref".to_string(), false))
1090        );
1091    }
1092
1093    #[test]
1094    fn test_reference_link_label_with_crlf() {
1095        // Reference link labels should not span lines with CRLF
1096        let input = "[foo\r\nbar]";
1097        let result = try_parse_reference_link(input, false);
1098
1099        // Should fail to parse because label contains line break
1100        assert_eq!(
1101            result, None,
1102            "Should not parse reference link with CRLF in label"
1103        );
1104    }
1105
1106    #[test]
1107    fn test_reference_link_label_with_lf() {
1108        // Reference link labels should not span lines with LF either
1109        let input = "[foo\nbar]";
1110        let result = try_parse_reference_link(input, false);
1111
1112        // Should fail to parse because label contains line break
1113        assert_eq!(
1114            result, None,
1115            "Should not parse reference link with LF in label"
1116        );
1117    }
1118
1119    // Multiline link text tests
1120    #[test]
1121    fn test_parse_inline_link_multiline_text() {
1122        // Per Pandoc spec, link text CAN contain newlines (soft breaks)
1123        let input = "[text on\nline two](url)";
1124        let result = try_parse_inline_link(input);
1125        assert_eq!(
1126            result,
1127            Some((23, "text on\nline two", "url", None)),
1128            "Link text should allow newlines"
1129        );
1130    }
1131
1132    #[test]
1133    fn test_parse_inline_link_multiline_with_formatting() {
1134        // Link text with newlines and other inline elements
1135        let input =
1136            "[A network graph. Different edges\nwith probability](../images/networkfig.png)";
1137        let result = try_parse_inline_link(input);
1138        assert!(result.is_some(), "Link text with newlines should parse");
1139        let (len, text, _dest, _attrs) = result.unwrap();
1140        assert!(text.contains('\n'), "Link text should preserve newline");
1141        assert_eq!(len, input.len());
1142    }
1143
1144    #[test]
1145    fn test_parse_inline_image_multiline_alt() {
1146        // Per Pandoc spec, image alt text CAN contain newlines
1147        let input = "![alt on\nline two](img.png)";
1148        let result = try_parse_inline_image(input);
1149        assert_eq!(
1150            result,
1151            Some((27, "alt on\nline two", "img.png", None)),
1152            "Image alt text should allow newlines"
1153        );
1154    }
1155
1156    #[test]
1157    fn test_parse_inline_image_multiline_with_attributes() {
1158        // Image with multiline alt text and attributes
1159        let input = "![network graph\ndiagram](../images/fig.png){width=70%}";
1160        let result = try_parse_inline_image(input);
1161        assert!(
1162            result.is_some(),
1163            "Image alt with newlines and attributes should parse"
1164        );
1165        let (len, alt, dest, attrs) = result.unwrap();
1166        assert!(alt.contains('\n'), "Alt text should preserve newline");
1167        assert_eq!(dest, "../images/fig.png");
1168        assert_eq!(attrs, Some("{width=70%}"));
1169        assert_eq!(len, input.len());
1170    }
1171
1172    #[test]
1173    fn test_parse_inline_link_with_attributes_after_newline() {
1174        // Test for regression: when text is concatenated with newlines,
1175        // attributes after ) should still be recognized
1176        let input = "[A network graph.](../images/networkfig.png){width=70%}\nA word\n";
1177        let result = try_parse_inline_link(input);
1178        assert!(
1179            result.is_some(),
1180            "Link with attributes should parse even with following text"
1181        );
1182        let (len, text, dest, attrs) = result.unwrap();
1183        assert_eq!(text, "A network graph.");
1184        assert_eq!(dest, "../images/networkfig.png");
1185        assert_eq!(attrs, Some("{width=70%}"), "Attributes should be captured");
1186        assert_eq!(
1187            len, 55,
1188            "Length should include attributes (up to closing brace)"
1189        );
1190    }
1191}