Skip to main content

panache_parser/parser/inlines/
links.rs

1//! Parsing for links, images, and automatic links.
2//!
3//! Implements:
4//! - Automatic links: `<http://example.com>` and `<user@example.com>`
5//! - Inline links: `[text](url)` and `[text](url "title")`
6//! - Link attributes: `[text](url){#id .class key=value}`
7//! - Inline images: `![alt](url)` and `![alt](url "title")`
8//! - Image attributes: `![alt](url){#id .class key=value}`
9//! - Reference links: `[text][ref]`, `[text][]`, `[text]`
10//! - Reference images: `![alt][ref]`, `![alt][]`, `![alt]`
11
12use super::core::parse_inline_text;
13use crate::config::Config;
14use crate::syntax::SyntaxKind;
15use rowan::GreenNodeBuilder;
16
17// Import attribute parsing
18use crate::parser::utils::attributes::try_parse_trailing_attributes;
19
20/// Try to parse an inline image starting at the current position.
21///
22/// Inline images have the form `![alt](url)` or `![alt](url "title")`.
23/// Can also have trailing attributes: `![alt](url){#id .class}`.
24/// Returns Some((length, alt_text, dest_content, raw_attributes)) if a valid image is found.
25pub fn try_parse_inline_image(text: &str) -> Option<(usize, &str, &str, Option<&str>)> {
26    if !text.starts_with("![") {
27        return None;
28    }
29
30    // Find the closing ]
31    let mut bracket_depth = 0;
32    let mut escape_next = false;
33    let mut close_bracket_pos = None;
34
35    for (i, ch) in text[2..].char_indices() {
36        if escape_next {
37            escape_next = false;
38            continue;
39        }
40
41        match ch {
42            '\\' => escape_next = true,
43            '[' => bracket_depth += 1,
44            ']' => {
45                if bracket_depth == 0 {
46                    close_bracket_pos = Some(i + 2);
47                    break;
48                }
49                bracket_depth -= 1;
50            }
51            _ => {}
52        }
53    }
54
55    let close_bracket = close_bracket_pos?;
56    let alt_text = &text[2..close_bracket];
57
58    // Check for immediate ( after ]
59    let after_bracket = close_bracket + 1;
60    if text.len() <= after_bracket || !text[after_bracket..].starts_with('(') {
61        return None;
62    }
63
64    // Find closing ) for destination (reuse same logic as links)
65    let dest_start = after_bracket + 1;
66    let remaining = &text[dest_start..];
67
68    let mut paren_depth = 0;
69    let mut escape_next = false;
70    let mut in_quotes = false;
71    let mut close_paren_pos = None;
72
73    for (i, ch) in remaining.char_indices() {
74        if escape_next {
75            escape_next = false;
76            continue;
77        }
78
79        match ch {
80            '\\' => escape_next = true,
81            '"' => in_quotes = !in_quotes,
82            '(' if !in_quotes => paren_depth += 1,
83            ')' if !in_quotes => {
84                if paren_depth == 0 {
85                    close_paren_pos = Some(i);
86                    break;
87                }
88                paren_depth -= 1;
89            }
90            _ => {}
91        }
92    }
93
94    let close_paren = close_paren_pos?;
95    let dest_content = &remaining[..close_paren];
96
97    // Check for trailing attributes {#id .class key=value}
98    let after_paren = dest_start + close_paren + 1;
99    let after_close = &text[after_paren..];
100
101    // Attributes must start immediately after closing paren (no whitespace/newlines)
102    if after_close.starts_with('{') {
103        // Find the closing brace
104        if let Some(close_brace_pos) = after_close.find('}') {
105            let attr_text = &after_close[..=close_brace_pos];
106            // Try to parse as attributes to validate
107            if let Some((_attrs, _)) = try_parse_trailing_attributes(attr_text) {
108                let total_len = after_paren + close_brace_pos + 1;
109                // Return raw attribute string for lossless parsing
110                let raw_attrs = attr_text;
111                return Some((total_len, alt_text, dest_content, Some(raw_attrs)));
112            }
113        }
114    }
115
116    // No attributes, just return the image
117    let total_len = after_paren;
118    Some((total_len, alt_text, dest_content, None))
119}
120
121/// Emit an inline image node to the builder.
122/// Note: alt_text may contain inline elements and should be parsed recursively.
123pub fn emit_inline_image(
124    builder: &mut GreenNodeBuilder,
125    _text: &str,
126    alt_text: &str,
127    dest: &str,
128    raw_attributes: Option<&str>,
129    config: &Config,
130) {
131    builder.start_node(SyntaxKind::IMAGE_LINK.into());
132
133    // Opening ![
134    builder.start_node(SyntaxKind::IMAGE_LINK_START.into());
135    builder.token(SyntaxKind::IMAGE_LINK_START.into(), "![");
136    builder.finish_node();
137
138    // Alt text (recursively parse inline elements)
139    builder.start_node(SyntaxKind::IMAGE_ALT.into());
140    // Use the standalone parse_inline_text function for recursive parsing
141    // Note: nested contexts don't resolve references
142    parse_inline_text(builder, alt_text, config, false);
143    builder.finish_node();
144
145    // Closing ]
146    builder.token(SyntaxKind::IMAGE_ALT_END.into(), "]");
147
148    // Opening (
149    builder.token(SyntaxKind::IMAGE_DEST_START.into(), "(");
150
151    // Destination
152    builder.start_node(SyntaxKind::LINK_DEST.into());
153    builder.token(SyntaxKind::TEXT.into(), dest);
154    builder.finish_node();
155
156    // Closing )
157    builder.token(SyntaxKind::IMAGE_DEST_END.into(), ")");
158
159    // Emit raw attributes if present (preserve original formatting)
160    if let Some(raw_attrs) = raw_attributes {
161        builder.start_node(SyntaxKind::ATTRIBUTE.into());
162        builder.token(SyntaxKind::ATTRIBUTE.into(), raw_attrs);
163        builder.finish_node();
164    }
165
166    builder.finish_node();
167}
168
169/// Try to parse an automatic link starting at the current position.
170///
171/// Automatic links have the form `<url>` or `<email@example.com>`.
172/// Returns Some((length, url_content)) if a valid automatic link is found.
173pub fn try_parse_autolink(text: &str) -> Option<(usize, &str)> {
174    if !text.starts_with('<') {
175        return None;
176    }
177
178    // Find the closing >
179    let close_pos = text[1..].find('>')?;
180    let content = &text[1..1 + close_pos];
181
182    // Automatic links cannot contain spaces or newlines
183    if content.contains(|c: char| c.is_whitespace()) {
184        return None;
185    }
186
187    // Must contain at least one character
188    if content.is_empty() {
189        return None;
190    }
191
192    // Basic validation: should look like a URL or email
193    // URL: contains :// or starts with scheme:
194    // Email: contains @
195    let is_url = content.contains("://") || content.contains(':');
196    let is_email = content.contains('@');
197
198    if !is_url && !is_email {
199        return None;
200    }
201
202    // Total length includes < and >
203    Some((close_pos + 2, content))
204}
205
206/// Emit an automatic link node to the builder.
207pub fn emit_autolink(builder: &mut GreenNodeBuilder, _text: &str, url: &str) {
208    builder.start_node(SyntaxKind::AUTO_LINK.into());
209
210    // Opening <
211    builder.start_node(SyntaxKind::AUTO_LINK_MARKER.into());
212    builder.token(SyntaxKind::AUTO_LINK_MARKER.into(), "<");
213    builder.finish_node();
214
215    // URL content
216    builder.token(SyntaxKind::TEXT.into(), url);
217
218    // Closing >
219    builder.start_node(SyntaxKind::AUTO_LINK_MARKER.into());
220    builder.token(SyntaxKind::AUTO_LINK_MARKER.into(), ">");
221    builder.finish_node();
222
223    builder.finish_node();
224}
225
226pub fn try_parse_bare_uri(text: &str) -> Option<(usize, &str)> {
227    let mut chars = text.char_indices();
228    let (_, first) = chars.next()?;
229    if !first.is_ascii_alphabetic() {
230        return None;
231    }
232
233    let mut scheme_end = None;
234    for (idx, ch) in text.char_indices() {
235        if ch == ':' {
236            scheme_end = Some(idx);
237            break;
238        }
239        if !ch.is_ascii_alphanumeric() && ch != '+' && ch != '-' && ch != '.' {
240            return None;
241        }
242    }
243    let scheme_end = scheme_end?;
244    if scheme_end == 0 {
245        return None;
246    }
247
248    let mut end = scheme_end + 1;
249    let bytes = text.as_bytes();
250    while end < text.len() {
251        let b = bytes[end];
252        if b.is_ascii_whitespace() {
253            break;
254        }
255        if matches!(b, b'<' | b'>' | b'`' | b'"' | b'\'') {
256            break;
257        }
258        end += 1;
259    }
260
261    if end == scheme_end + 1 {
262        return None;
263    }
264
265    let mut trimmed = end;
266    while trimmed > scheme_end + 1 {
267        let ch = text[..trimmed].chars().last().unwrap();
268        if matches!(ch, '.' | ',' | ';' | ':' | ')' | ']' | '}') {
269            trimmed -= ch.len_utf8();
270        } else {
271            break;
272        }
273    }
274
275    if trimmed <= scheme_end + 1 {
276        return None;
277    }
278
279    Some((trimmed, &text[..trimmed]))
280}
281
282/// Try to parse an inline link starting at the current position.
283///
284/// Inline links have the form `[text](url)` or `[text](url "title")`.
285/// Can also have trailing attributes: `[text](url){#id .class}`.
286/// Returns Some((length, text_content, dest_content, raw_attributes)) if a valid link is found.
287pub fn try_parse_inline_link(text: &str) -> Option<(usize, &str, &str, Option<&str>)> {
288    if !text.starts_with('[') {
289        return None;
290    }
291
292    // Find the closing ]
293    let mut bracket_depth = 0;
294    let mut escape_next = false;
295    let mut close_bracket_pos = None;
296
297    for (i, ch) in text[1..].char_indices() {
298        if escape_next {
299            escape_next = false;
300            continue;
301        }
302
303        match ch {
304            '\\' => escape_next = true,
305            '[' => bracket_depth += 1,
306            ']' => {
307                if bracket_depth == 0 {
308                    close_bracket_pos = Some(i + 1);
309                    break;
310                }
311                bracket_depth -= 1;
312            }
313            _ => {}
314        }
315    }
316
317    let close_bracket = close_bracket_pos?;
318    let link_text = &text[1..close_bracket];
319
320    // Check for immediate ( after ]
321    let after_bracket = close_bracket + 1;
322    if text.len() <= after_bracket || !text[after_bracket..].starts_with('(') {
323        return None;
324    }
325
326    // Find closing ) for destination
327    let dest_start = after_bracket + 1;
328    let remaining = &text[dest_start..];
329
330    let mut paren_depth = 0;
331    let mut escape_next = false;
332    let mut in_quotes = false;
333    let mut close_paren_pos = None;
334
335    for (i, ch) in remaining.char_indices() {
336        if escape_next {
337            escape_next = false;
338            continue;
339        }
340
341        match ch {
342            '\\' => escape_next = true,
343            '"' => in_quotes = !in_quotes,
344            '(' if !in_quotes => paren_depth += 1,
345            ')' if !in_quotes => {
346                if paren_depth == 0 {
347                    close_paren_pos = Some(i);
348                    break;
349                }
350                paren_depth -= 1;
351            }
352            _ => {}
353        }
354    }
355
356    let close_paren = close_paren_pos?;
357    let dest_content = &remaining[..close_paren];
358
359    // Check for trailing attributes {#id .class key=value}
360    let after_paren = dest_start + close_paren + 1;
361    let after_close = &text[after_paren..];
362
363    // Attributes must start immediately after closing paren (no whitespace/newlines)
364    if after_close.starts_with('{') {
365        // Find the closing brace
366        if let Some(close_brace_pos) = after_close.find('}') {
367            let attr_text = &after_close[..=close_brace_pos];
368            // Try to parse as attributes to validate
369            if let Some((_attrs, _)) = try_parse_trailing_attributes(attr_text) {
370                let total_len = after_paren + close_brace_pos + 1;
371                // Return raw attribute string for lossless parsing
372                let raw_attrs = attr_text;
373                return Some((total_len, link_text, dest_content, Some(raw_attrs)));
374            }
375        }
376    }
377
378    // No attributes, just return the link
379    let total_len = after_paren;
380    Some((total_len, link_text, dest_content, None))
381}
382
383/// Emit an inline link node to the builder.
384/// Note: link_text may contain inline elements and should be parsed recursively.
385pub fn emit_inline_link(
386    builder: &mut GreenNodeBuilder,
387    _text: &str,
388    link_text: &str,
389    dest: &str,
390    raw_attributes: Option<&str>,
391    config: &Config,
392) {
393    builder.start_node(SyntaxKind::LINK.into());
394
395    // Opening [
396    builder.start_node(SyntaxKind::LINK_START.into());
397    builder.token(SyntaxKind::LINK_START.into(), "[");
398    builder.finish_node();
399
400    // Link text (recursively parse inline elements)
401    builder.start_node(SyntaxKind::LINK_TEXT.into());
402    // Use the standalone parse_inline_text function for recursive parsing
403    parse_inline_text(builder, link_text, config, false);
404    builder.finish_node();
405
406    // Closing ]
407    builder.token(SyntaxKind::LINK_TEXT_END.into(), "]");
408
409    // Opening (
410    builder.token(SyntaxKind::LINK_DEST_START.into(), "(");
411
412    // Destination
413    builder.start_node(SyntaxKind::LINK_DEST.into());
414    builder.token(SyntaxKind::TEXT.into(), dest);
415    builder.finish_node();
416
417    // Closing )
418    builder.token(SyntaxKind::LINK_DEST_END.into(), ")");
419
420    // Emit raw attributes if present (preserve original formatting)
421    if let Some(raw_attrs) = raw_attributes {
422        builder.start_node(SyntaxKind::ATTRIBUTE.into());
423        builder.token(SyntaxKind::ATTRIBUTE.into(), raw_attrs);
424        builder.finish_node();
425    }
426
427    builder.finish_node();
428}
429
430pub fn emit_bare_uri_link(builder: &mut GreenNodeBuilder, uri: &str, _config: &Config) {
431    builder.start_node(SyntaxKind::LINK.into());
432
433    builder.start_node(SyntaxKind::LINK_START.into());
434    builder.token(SyntaxKind::LINK_START.into(), "[");
435    builder.finish_node();
436
437    builder.start_node(SyntaxKind::LINK_TEXT.into());
438    builder.token(SyntaxKind::TEXT.into(), uri);
439    builder.finish_node();
440
441    builder.token(SyntaxKind::LINK_TEXT_END.into(), "]");
442    builder.token(SyntaxKind::LINK_DEST_START.into(), "(");
443
444    builder.start_node(SyntaxKind::LINK_DEST.into());
445    builder.token(SyntaxKind::TEXT.into(), uri);
446    builder.finish_node();
447
448    builder.token(SyntaxKind::LINK_DEST_END.into(), ")");
449
450    builder.finish_node();
451}
452
453/// Try to parse a reference link starting at the current position.
454///
455/// Reference links have three forms:
456/// - Explicit: `[text][label]`
457/// - Implicit: `[text][]` (label = text)
458/// - Shortcut: `[text]` (if shortcut_reference_links enabled)
459///
460/// Returns Some((length, text_content, label, is_shortcut)) if a valid reference link is found.
461/// The label is what should be looked up in the registry.
462pub fn try_parse_reference_link(
463    text: &str,
464    allow_shortcut: bool,
465) -> Option<(usize, &str, String, bool)> {
466    if !text.starts_with('[') {
467        return None;
468    }
469
470    // Don't match citations (which start with [@) or suppress-author citations (which start with [-@)
471    if text.len() > 1 {
472        let bytes = text.as_bytes();
473        if bytes[1] == b'@' {
474            return None;
475        }
476        if bytes[1] == b'-' && text.len() > 2 && bytes[2] == b'@' {
477            return None;
478        }
479    }
480
481    // Find the closing ] for the text
482    let mut bracket_depth = 0;
483    let mut escape_next = false;
484    let mut close_bracket_pos = None;
485
486    for (i, ch) in text[1..].char_indices() {
487        if escape_next {
488            escape_next = false;
489            continue;
490        }
491
492        match ch {
493            '\\' => escape_next = true,
494            '[' => bracket_depth += 1,
495            ']' => {
496                if bracket_depth == 0 {
497                    close_bracket_pos = Some(i + 1);
498                    break;
499                }
500                bracket_depth -= 1;
501            }
502            _ => {}
503        }
504    }
505
506    let close_bracket = close_bracket_pos?;
507    let link_text = &text[1..close_bracket];
508
509    // Check what follows the ]
510    let after_bracket = close_bracket + 1;
511
512    // Check if followed by ( - if so, this is an inline link, not a reference link
513    if after_bracket < text.len() && text[after_bracket..].starts_with('(') {
514        return None;
515    }
516
517    // Check if followed by { - if so, this is a bracketed span, not a reference link
518    if after_bracket < text.len() && text[after_bracket..].starts_with('{') {
519        return None;
520    }
521
522    // Check for explicit reference [text][label] or implicit [text][]
523    if after_bracket < text.len() && text[after_bracket..].starts_with('[') {
524        // Find the closing ] for the label
525        let label_start = after_bracket + 1;
526        let mut label_end = None;
527
528        for (i, ch) in text[label_start..].char_indices() {
529            if ch == ']' {
530                label_end = Some(i + label_start);
531                break;
532            }
533            // Labels can't contain newlines
534            if ch == '\n' {
535                return None;
536            }
537        }
538
539        let label_end = label_end?;
540        let label = &text[label_start..label_end];
541
542        // Total length includes both bracket pairs
543        let total_len = label_end + 1;
544
545        // Implicit reference: empty label means emit [text][]
546        if label.is_empty() {
547            return Some((total_len, link_text, String::new(), false));
548        }
549
550        // Explicit reference: use the provided label
551        Some((total_len, link_text, label.to_string(), false))
552    } else if allow_shortcut {
553        // Shortcut reference: [text] with no second bracket pair
554        // The text is both the display text and the label
555        if link_text.is_empty() {
556            return None;
557        }
558        Some((after_bracket, link_text, link_text.to_string(), true))
559    } else {
560        // No second bracket pair and shortcut not allowed - not a reference link
561        None
562    }
563}
564
565/// Emit a reference link node to the builder.
566/// Preserves the original reference syntax (explicit [text][ref], implicit [text][], or shortcut [text]).
567pub fn emit_reference_link(
568    builder: &mut GreenNodeBuilder,
569    link_text: &str,
570    label: &str,
571    is_shortcut: bool,
572    config: &Config,
573) {
574    builder.start_node(SyntaxKind::LINK.into());
575
576    // Opening [
577    builder.start_node(SyntaxKind::LINK_START.into());
578    builder.token(SyntaxKind::LINK_START.into(), "[");
579    builder.finish_node();
580
581    // Link text (recursively parse inline elements)
582    builder.start_node(SyntaxKind::LINK_TEXT.into());
583    parse_inline_text(builder, link_text, config, false);
584    builder.finish_node();
585
586    // Closing ] and reference label
587    builder.token(SyntaxKind::TEXT.into(), "]");
588
589    if !is_shortcut {
590        // Explicit or implicit reference: [text][label] or [text][]
591        builder.token(SyntaxKind::TEXT.into(), "[");
592        builder.start_node(SyntaxKind::LINK_REF.into());
593        // For implicit references, label is empty and we emit [text][]
594        // For explicit references, emit the label to get [text][label]
595        if !label.is_empty() {
596            builder.token(SyntaxKind::TEXT.into(), label);
597        }
598        builder.finish_node();
599        builder.token(SyntaxKind::TEXT.into(), "]");
600    }
601    // For shortcut references, just [text] - no second bracket pair
602
603    builder.finish_node();
604}
605
606/// Try to parse a reference-style image: `![alt][ref]`, `![alt][]`, or `![alt]`
607/// Returns (total_len, alt_text, label, is_shortcut) if successful.
608pub fn try_parse_reference_image(
609    text: &str,
610    allow_shortcut: bool,
611) -> Option<(usize, &str, String, bool)> {
612    let bytes = text.as_bytes();
613    if bytes.len() < 4 || bytes[0] != b'!' || bytes[1] != b'[' {
614        return None;
615    }
616
617    let mut pos = 2;
618    let mut bracket_depth = 1;
619    let alt_start = pos;
620
621    // Find the end of the alt text (allowing nested brackets)
622    while pos < bytes.len() && bracket_depth > 0 {
623        match bytes[pos] {
624            b'[' => bracket_depth += 1,
625            b']' => bracket_depth -= 1,
626            b'\\' if pos + 1 < bytes.len() => pos += 1, // skip escaped char
627            _ => {}
628        }
629        pos += 1;
630    }
631
632    if bracket_depth > 0 {
633        return None; // Unclosed brackets
634    }
635
636    let alt_text = &text[alt_start..pos - 1];
637
638    // Now check for the label part
639    if pos >= bytes.len() {
640        return None;
641    }
642
643    // Explicit reference: `![alt][label]`
644    if bytes[pos] == b'[' {
645        pos += 1;
646        let label_start = pos;
647
648        // Find the end of the label (no nested brackets, no newlines)
649        while pos < bytes.len() && bytes[pos] != b']' && bytes[pos] != b'\n' && bytes[pos] != b'\r'
650        {
651            pos += 1;
652        }
653
654        if pos >= bytes.len() || bytes[pos] != b']' {
655            return None;
656        }
657
658        let label_text = &text[label_start..pos];
659        pos += 1;
660
661        // Return the original label text for formatting preservation
662        // Empty label means implicit reference
663        let label = if label_text.is_empty() {
664            alt_text.to_string() // For implicit references, use alt text as label for equality check
665        } else {
666            label_text.to_string() // Preserve original case
667        };
668
669        return Some((pos, alt_text, label, false));
670    }
671
672    // Shortcut reference: `![alt]` (only if enabled)
673    // BUT not if followed by (url) - that's an inline image
674    if allow_shortcut {
675        // Check if next char is ( - if so, not a reference
676        if pos < bytes.len() && bytes[pos] == b'(' {
677            return None;
678        }
679
680        // For shortcut references, use alt text as label for equality check
681        let label = alt_text.to_string();
682        return Some((pos, alt_text, label, true));
683    }
684
685    None
686}
687
688/// Emit a reference image node with registry lookup.
689pub fn emit_reference_image(
690    builder: &mut GreenNodeBuilder,
691    alt_text: &str,
692    label: &str,
693    is_shortcut: bool,
694    config: &Config,
695) {
696    builder.start_node(SyntaxKind::IMAGE_LINK.into());
697
698    // Emit as reference image (preserve original syntax)
699    builder.start_node(SyntaxKind::IMAGE_LINK_START.into());
700    builder.token(SyntaxKind::IMAGE_LINK_START.into(), "![");
701    builder.finish_node();
702
703    // Alt text (recursively parse inline elements)
704    builder.start_node(SyntaxKind::IMAGE_ALT.into());
705    parse_inline_text(builder, alt_text, config, false);
706    builder.finish_node();
707
708    // Closing ] and reference label
709    builder.token(SyntaxKind::TEXT.into(), "]");
710
711    if !is_shortcut {
712        // Explicit or implicit reference: ![alt][label] or ![alt][]
713        builder.token(SyntaxKind::TEXT.into(), "[");
714        builder.start_node(SyntaxKind::LINK_REF.into());
715        // For implicit references, emit empty label (label == alt means implicit from parser)
716        if label != alt_text {
717            builder.token(SyntaxKind::TEXT.into(), label);
718        }
719        builder.finish_node();
720        builder.token(SyntaxKind::TEXT.into(), "]");
721    }
722    // For shortcut references, just ![alt] - no second bracket pair
723
724    builder.finish_node();
725}
726
727#[cfg(test)]
728mod tests {
729    use super::*;
730
731    #[test]
732    fn test_parse_autolink_url() {
733        let input = "<https://example.com>";
734        let result = try_parse_autolink(input);
735        assert_eq!(result, Some((21, "https://example.com")));
736    }
737
738    #[test]
739    fn test_parse_autolink_email() {
740        let input = "<user@example.com>";
741        let result = try_parse_autolink(input);
742        assert_eq!(result, Some((18, "user@example.com")));
743    }
744
745    #[test]
746    fn test_parse_autolink_no_close() {
747        let input = "<https://example.com";
748        let result = try_parse_autolink(input);
749        assert_eq!(result, None);
750    }
751
752    #[test]
753    fn test_parse_autolink_with_space() {
754        let input = "<https://example.com >";
755        let result = try_parse_autolink(input);
756        assert_eq!(result, None);
757    }
758
759    #[test]
760    fn test_parse_autolink_not_url_or_email() {
761        let input = "<notaurl>";
762        let result = try_parse_autolink(input);
763        assert_eq!(result, None);
764    }
765
766    #[test]
767    fn test_parse_inline_link_simple() {
768        let input = "[text](url)";
769        let result = try_parse_inline_link(input);
770        assert_eq!(result, Some((11, "text", "url", None)));
771    }
772
773    #[test]
774    fn test_parse_inline_link_with_title() {
775        let input = r#"[text](url "title")"#;
776        let result = try_parse_inline_link(input);
777        assert_eq!(result, Some((19, "text", r#"url "title""#, None)));
778    }
779
780    #[test]
781    fn test_parse_inline_link_with_nested_brackets() {
782        let input = "[outer [inner] text](url)";
783        let result = try_parse_inline_link(input);
784        assert_eq!(result, Some((25, "outer [inner] text", "url", None)));
785    }
786
787    #[test]
788    fn test_parse_inline_link_no_space_between_brackets_and_parens() {
789        let input = "[text] (url)";
790        let result = try_parse_inline_link(input);
791        assert_eq!(result, None);
792    }
793
794    #[test]
795    fn test_parse_inline_link_no_closing_bracket() {
796        let input = "[text(url)";
797        let result = try_parse_inline_link(input);
798        assert_eq!(result, None);
799    }
800
801    #[test]
802    fn test_parse_inline_link_no_closing_paren() {
803        let input = "[text](url";
804        let result = try_parse_inline_link(input);
805        assert_eq!(result, None);
806    }
807
808    #[test]
809    fn test_parse_inline_link_escaped_bracket() {
810        let input = r"[text\]more](url)";
811        let result = try_parse_inline_link(input);
812        assert_eq!(result, Some((17, r"text\]more", "url", None)));
813    }
814
815    #[test]
816    fn test_parse_inline_link_parens_in_url() {
817        let input = "[text](url(with)parens)";
818        let result = try_parse_inline_link(input);
819        assert_eq!(result, Some((23, "text", "url(with)parens", None)));
820    }
821
822    #[test]
823    fn test_parse_inline_image_simple() {
824        let input = "![alt](image.jpg)";
825        let result = try_parse_inline_image(input);
826        assert_eq!(result, Some((17, "alt", "image.jpg", None)));
827    }
828
829    #[test]
830    fn test_parse_inline_image_with_title() {
831        let input = r#"![alt](image.jpg "A title")"#;
832        let result = try_parse_inline_image(input);
833        assert_eq!(result, Some((27, "alt", r#"image.jpg "A title""#, None)));
834    }
835
836    #[test]
837    fn test_parse_inline_image_with_nested_brackets() {
838        let input = "![outer [inner] alt](image.jpg)";
839        let result = try_parse_inline_image(input);
840        assert_eq!(result, Some((31, "outer [inner] alt", "image.jpg", None)));
841    }
842
843    #[test]
844    fn test_parse_inline_image_no_space_between_brackets_and_parens() {
845        let input = "![alt] (image.jpg)";
846        let result = try_parse_inline_image(input);
847        assert_eq!(result, None);
848    }
849
850    #[test]
851    fn test_parse_inline_image_no_closing_bracket() {
852        let input = "![alt(image.jpg)";
853        let result = try_parse_inline_image(input);
854        assert_eq!(result, None);
855    }
856
857    #[test]
858    fn test_parse_inline_image_no_closing_paren() {
859        let input = "![alt](image.jpg";
860        let result = try_parse_inline_image(input);
861        assert_eq!(result, None);
862    }
863
864    #[test]
865    fn test_parse_inline_image_with_simple_class() {
866        let input = "![alt](img.png){.large}";
867        let result = try_parse_inline_image(input);
868        let (len, alt, dest, attrs) = result.unwrap();
869        assert_eq!(len, 23);
870        assert_eq!(alt, "alt");
871        assert_eq!(dest, "img.png");
872        assert!(attrs.is_some());
873        let attrs = attrs.unwrap();
874        assert_eq!(attrs, "{.large}");
875    }
876
877    #[test]
878    fn test_parse_inline_image_with_id() {
879        let input = "![Figure 1](fig1.png){#fig-1}";
880        let result = try_parse_inline_image(input);
881        let (len, alt, dest, attrs) = result.unwrap();
882        assert_eq!(len, 29);
883        assert_eq!(alt, "Figure 1");
884        assert_eq!(dest, "fig1.png");
885        assert!(attrs.is_some());
886        let attrs = attrs.unwrap();
887        assert_eq!(attrs, "{#fig-1}");
888    }
889
890    #[test]
891    fn test_parse_inline_image_with_full_attributes() {
892        let input = "![alt](img.png){#fig .large width=\"80%\"}";
893        let result = try_parse_inline_image(input);
894        let (len, alt, dest, attrs) = result.unwrap();
895        assert_eq!(len, 40);
896        assert_eq!(alt, "alt");
897        assert_eq!(dest, "img.png");
898        assert!(attrs.is_some());
899        let attrs = attrs.unwrap();
900        assert_eq!(attrs, "{#fig .large width=\"80%\"}");
901    }
902
903    #[test]
904    fn test_parse_inline_image_attributes_must_be_adjacent() {
905        // Space between ) and { should not parse as attributes
906        let input = "![alt](img.png) {.large}";
907        let result = try_parse_inline_image(input);
908        assert_eq!(result, Some((15, "alt", "img.png", None)));
909    }
910
911    // Link attribute tests
912    #[test]
913    fn test_parse_inline_link_with_id() {
914        let input = "[text](url){#link-1}";
915        let result = try_parse_inline_link(input);
916        let (len, text, dest, attrs) = result.unwrap();
917        assert_eq!(len, 20);
918        assert_eq!(text, "text");
919        assert_eq!(dest, "url");
920        assert!(attrs.is_some());
921        let attrs = attrs.unwrap();
922        assert_eq!(attrs, "{#link-1}");
923    }
924
925    #[test]
926    fn test_parse_inline_link_with_full_attributes() {
927        let input = "[text](url){#link .external target=\"_blank\"}";
928        let result = try_parse_inline_link(input);
929        let (len, text, dest, attrs) = result.unwrap();
930        assert_eq!(len, 44);
931        assert_eq!(text, "text");
932        assert_eq!(dest, "url");
933        assert!(attrs.is_some());
934        let attrs = attrs.unwrap();
935        assert_eq!(attrs, "{#link .external target=\"_blank\"}");
936    }
937
938    #[test]
939    fn test_parse_inline_link_attributes_must_be_adjacent() {
940        // Space between ) and { should not parse as attributes
941        let input = "[text](url) {.class}";
942        let result = try_parse_inline_link(input);
943        assert_eq!(result, Some((11, "text", "url", None)));
944    }
945
946    #[test]
947    fn test_parse_inline_link_with_title_and_attributes() {
948        let input = r#"[text](url "title"){.external}"#;
949        let result = try_parse_inline_link(input);
950        let (len, text, dest, attrs) = result.unwrap();
951        assert_eq!(len, 30);
952        assert_eq!(text, "text");
953        assert_eq!(dest, r#"url "title""#);
954        assert!(attrs.is_some());
955        let attrs = attrs.unwrap();
956        assert_eq!(attrs, "{.external}");
957    }
958
959    // Reference link tests
960    #[test]
961    fn test_parse_reference_link_explicit() {
962        let input = "[link text][label]";
963        let result = try_parse_reference_link(input, false);
964        assert_eq!(result, Some((18, "link text", "label".to_string(), false)));
965    }
966
967    #[test]
968    fn test_parse_reference_link_implicit() {
969        let input = "[link text][]";
970        let result = try_parse_reference_link(input, false);
971        assert_eq!(result, Some((13, "link text", String::new(), false)));
972    }
973
974    #[test]
975    fn test_parse_reference_link_explicit_same_label_as_text() {
976        let input = "[stack][stack]";
977        let result = try_parse_reference_link(input, false);
978        assert_eq!(result, Some((14, "stack", "stack".to_string(), false)));
979    }
980
981    #[test]
982    fn test_parse_reference_link_shortcut() {
983        let input = "[link text] rest";
984        let result = try_parse_reference_link(input, true);
985        assert_eq!(
986            result,
987            Some((11, "link text", "link text".to_string(), true))
988        );
989    }
990
991    #[test]
992    fn test_parse_reference_link_shortcut_rejects_empty_label() {
993        let input = "[] rest";
994        let result = try_parse_reference_link(input, true);
995        assert_eq!(result, None);
996    }
997
998    #[test]
999    fn test_parse_reference_link_shortcut_disabled() {
1000        let input = "[link text] rest";
1001        let result = try_parse_reference_link(input, false);
1002        assert_eq!(result, None);
1003    }
1004
1005    #[test]
1006    fn test_parse_reference_link_not_inline_link() {
1007        // Should not match inline links with (url)
1008        let input = "[text](url)";
1009        let result = try_parse_reference_link(input, true);
1010        assert_eq!(result, None);
1011    }
1012
1013    #[test]
1014    fn test_parse_reference_link_with_nested_brackets() {
1015        let input = "[outer [inner] text][ref]";
1016        let result = try_parse_reference_link(input, false);
1017        assert_eq!(
1018            result,
1019            Some((25, "outer [inner] text", "ref".to_string(), false))
1020        );
1021    }
1022
1023    #[test]
1024    fn test_parse_reference_link_label_no_newline() {
1025        let input = "[text][label\nmore]";
1026        let result = try_parse_reference_link(input, false);
1027        assert_eq!(result, None);
1028    }
1029
1030    // Reference image tests
1031    #[test]
1032    fn test_parse_reference_image_explicit() {
1033        let input = "![alt text][label]";
1034        let result = try_parse_reference_image(input, false);
1035        assert_eq!(result, Some((18, "alt text", "label".to_string(), false)));
1036    }
1037
1038    #[test]
1039    fn test_parse_reference_image_implicit() {
1040        let input = "![alt text][]";
1041        let result = try_parse_reference_image(input, false);
1042        assert_eq!(
1043            result,
1044            Some((13, "alt text", "alt text".to_string(), false))
1045        );
1046    }
1047
1048    #[test]
1049    fn test_parse_reference_image_shortcut() {
1050        let input = "![alt text] rest";
1051        let result = try_parse_reference_image(input, true);
1052        assert_eq!(result, Some((11, "alt text", "alt text".to_string(), true)));
1053    }
1054
1055    #[test]
1056    fn test_parse_reference_image_shortcut_disabled() {
1057        let input = "![alt text] rest";
1058        let result = try_parse_reference_image(input, false);
1059        assert_eq!(result, None);
1060    }
1061
1062    #[test]
1063    fn test_parse_reference_image_not_inline() {
1064        // Should not match inline images with (url)
1065        let input = "![alt](url)";
1066        let result = try_parse_reference_image(input, true);
1067        assert_eq!(result, None);
1068    }
1069
1070    #[test]
1071    fn test_parse_reference_image_with_nested_brackets() {
1072        let input = "![alt [nested] text][ref]";
1073        let result = try_parse_reference_image(input, false);
1074        assert_eq!(
1075            result,
1076            Some((25, "alt [nested] text", "ref".to_string(), false))
1077        );
1078    }
1079
1080    #[test]
1081    fn test_reference_link_label_with_crlf() {
1082        // Reference link labels should not span lines with CRLF
1083        let input = "[foo\r\nbar]";
1084        let result = try_parse_reference_link(input, false);
1085
1086        // Should fail to parse because label contains line break
1087        assert_eq!(
1088            result, None,
1089            "Should not parse reference link with CRLF in label"
1090        );
1091    }
1092
1093    #[test]
1094    fn test_reference_link_label_with_lf() {
1095        // Reference link labels should not span lines with LF either
1096        let input = "[foo\nbar]";
1097        let result = try_parse_reference_link(input, false);
1098
1099        // Should fail to parse because label contains line break
1100        assert_eq!(
1101            result, None,
1102            "Should not parse reference link with LF in label"
1103        );
1104    }
1105
1106    // Multiline link text tests
1107    #[test]
1108    fn test_parse_inline_link_multiline_text() {
1109        // Per Pandoc spec, link text CAN contain newlines (soft breaks)
1110        let input = "[text on\nline two](url)";
1111        let result = try_parse_inline_link(input);
1112        assert_eq!(
1113            result,
1114            Some((23, "text on\nline two", "url", None)),
1115            "Link text should allow newlines"
1116        );
1117    }
1118
1119    #[test]
1120    fn test_parse_inline_link_multiline_with_formatting() {
1121        // Link text with newlines and other inline elements
1122        let input =
1123            "[A network graph. Different edges\nwith probability](../images/networkfig.png)";
1124        let result = try_parse_inline_link(input);
1125        assert!(result.is_some(), "Link text with newlines should parse");
1126        let (len, text, _dest, _attrs) = result.unwrap();
1127        assert!(text.contains('\n'), "Link text should preserve newline");
1128        assert_eq!(len, input.len());
1129    }
1130
1131    #[test]
1132    fn test_parse_inline_image_multiline_alt() {
1133        // Per Pandoc spec, image alt text CAN contain newlines
1134        let input = "![alt on\nline two](img.png)";
1135        let result = try_parse_inline_image(input);
1136        assert_eq!(
1137            result,
1138            Some((27, "alt on\nline two", "img.png", None)),
1139            "Image alt text should allow newlines"
1140        );
1141    }
1142
1143    #[test]
1144    fn test_parse_inline_image_multiline_with_attributes() {
1145        // Image with multiline alt text and attributes
1146        let input = "![network graph\ndiagram](../images/fig.png){width=70%}";
1147        let result = try_parse_inline_image(input);
1148        assert!(
1149            result.is_some(),
1150            "Image alt with newlines and attributes should parse"
1151        );
1152        let (len, alt, dest, attrs) = result.unwrap();
1153        assert!(alt.contains('\n'), "Alt text should preserve newline");
1154        assert_eq!(dest, "../images/fig.png");
1155        assert_eq!(attrs, Some("{width=70%}"));
1156        assert_eq!(len, input.len());
1157    }
1158
1159    #[test]
1160    fn test_parse_inline_link_with_attributes_after_newline() {
1161        // Test for regression: when text is concatenated with newlines,
1162        // attributes after ) should still be recognized
1163        let input = "[A network graph.](../images/networkfig.png){width=70%}\nA word\n";
1164        let result = try_parse_inline_link(input);
1165        assert!(
1166            result.is_some(),
1167            "Link with attributes should parse even with following text"
1168        );
1169        let (len, text, dest, attrs) = result.unwrap();
1170        assert_eq!(text, "A network graph.");
1171        assert_eq!(dest, "../images/networkfig.png");
1172        assert_eq!(attrs, Some("{width=70%}"), "Attributes should be captured");
1173        assert_eq!(
1174            len, 55,
1175            "Length should include attributes (up to closing brace)"
1176        );
1177    }
1178}