Skip to main content

panache_parser/parser/blocks/
reference_links.rs

1//! Reference definition and footnote parsing functions.
2//!
3//! Reference definitions have the form:
4//! ```markdown
5//! [label]: url "optional title"
6//! [label]: url 'optional title'
7//! [label]: url (optional title)
8//! [label]: <url> "title"
9//! ```
10//!
11//! Footnote definitions have the form:
12//! ```markdown
13//! [^id]: Footnote content here.
14//!     Can continue on multiple lines
15//!     as long as they're indented.
16//! ```
17
18/// Try to parse a reference definition starting at the current position.
19/// Returns Some((length, label, url, title)) if successful.
20///
21/// Syntax:
22/// ```markdown
23/// [label]: url "title"
24/// [label]: <url> 'title'
25/// [label]: url
26///          (title on next line)
27/// ```
28pub fn try_parse_reference_definition(
29    text: &str,
30) -> Option<(usize, String, String, Option<String>)> {
31    let leading_spaces = text.chars().take_while(|&c| c == ' ').count();
32    if leading_spaces > 3 {
33        return None;
34    }
35    let text = &text[leading_spaces..];
36    let bytes = text.as_bytes();
37
38    // Must start at beginning of line with [
39    if bytes.is_empty() || bytes[0] != b'[' {
40        return None;
41    }
42
43    // Check if it's a footnote definition [^id]: - not a reference definition
44    if bytes.len() >= 2 && bytes[1] == b'^' {
45        return None;
46    }
47
48    // Find the closing ] for the label
49    let mut pos = 1;
50    let mut escape_next = false;
51
52    while pos < bytes.len() {
53        if escape_next {
54            escape_next = false;
55            pos += 1;
56            continue;
57        }
58
59        match bytes[pos] {
60            b'\\' => {
61                escape_next = true;
62                pos += 1;
63            }
64            b']' => {
65                break;
66            }
67            b'\n' => {
68                // Labels can't span lines
69                return None;
70            }
71            _ => {
72                pos += 1;
73            }
74        }
75    }
76
77    if pos >= bytes.len() || bytes[pos] != b']' {
78        return None;
79    }
80
81    let label = &text[1..pos];
82    if label.is_empty() {
83        return None;
84    }
85
86    pos += 1; // Skip ]
87
88    // Must be followed by :
89    if pos >= bytes.len() || bytes[pos] != b':' {
90        return None;
91    }
92    pos += 1;
93
94    // Skip whitespace
95    while pos < bytes.len() && matches!(bytes[pos], b' ' | b'\t') {
96        pos += 1;
97    }
98
99    // Parse URL
100    let url_start = pos;
101    let url_end;
102
103    // Check for angle-bracketed URL <url>
104    if pos < bytes.len() && bytes[pos] == b'<' {
105        pos += 1;
106        let url_content_start = pos;
107        // Find closing >
108        while pos < bytes.len() && bytes[pos] != b'>' && bytes[pos] != b'\n' && bytes[pos] != b'\r'
109        {
110            pos += 1;
111        }
112        if pos >= bytes.len() || bytes[pos] != b'>' {
113            return None;
114        }
115        url_end = pos;
116        let url = text[url_content_start..url_end].to_string();
117        pos += 1; // Skip >
118
119        // Parse optional title
120        let title = parse_title(text, bytes, &mut pos)?;
121
122        Some((pos, label.to_string(), url, title))
123    } else {
124        // Parse unbracketed URL (until whitespace or newline)
125        while pos < bytes.len() && !matches!(bytes[pos], b' ' | b'\t' | b'\n' | b'\r') {
126            pos += 1;
127        }
128
129        url_end = pos;
130        if url_start == url_end {
131            return None; // No URL found
132        }
133
134        let url = text[url_start..url_end].to_string();
135
136        // Parse optional title
137        let title = parse_title(text, bytes, &mut pos)?;
138
139        Some((pos, label.to_string(), url, title))
140    }
141}
142
143pub fn line_is_mmd_link_attribute_continuation(line: &str) -> bool {
144    if !(line.starts_with(' ') || line.starts_with('\t')) {
145        return false;
146    }
147
148    let trimmed = line.trim();
149    if trimmed.is_empty() {
150        return false;
151    }
152
153    let bytes = trimmed.as_bytes();
154    let mut pos = 0usize;
155    let len = bytes.len();
156    let mut saw_pair = false;
157
158    while pos < len {
159        // Skip inter-token whitespace.
160        while pos < len && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
161            pos += 1;
162        }
163        if pos >= len {
164            break;
165        }
166
167        // Parse key until '=' or whitespace.
168        let key_start = pos;
169        while pos < len && bytes[pos] != b'=' && bytes[pos] != b' ' && bytes[pos] != b'\t' {
170            pos += 1;
171        }
172        if pos == key_start || pos >= len || bytes[pos] != b'=' {
173            return false;
174        }
175        pos += 1; // skip '='
176
177        // Parse value (quoted or unquoted), require non-empty value.
178        if pos >= len {
179            return false;
180        }
181        if bytes[pos] == b'"' || bytes[pos] == b'\'' {
182            let quote = bytes[pos];
183            pos += 1;
184            let value_start = pos;
185            while pos < len && bytes[pos] != quote {
186                pos += 1;
187            }
188            if pos == value_start || pos >= len {
189                return false;
190            }
191            pos += 1; // skip closing quote
192        } else {
193            let value_start = pos;
194            while pos < len && bytes[pos] != b' ' && bytes[pos] != b'\t' {
195                pos += 1;
196            }
197            if pos == value_start {
198                return false;
199            }
200        }
201
202        saw_pair = true;
203    }
204
205    saw_pair
206}
207
208/// Parse an optional title after the URL.
209/// Titles can be in double quotes, single quotes, or parentheses.
210/// Returns Some(Some(title)) if title found, Some(None) if no title, None if malformed.
211fn parse_title(text: &str, bytes: &[u8], pos: &mut usize) -> Option<Option<String>> {
212    let base_pos = *pos;
213
214    // Skip whitespace (including newlines for multi-line titles)
215    while *pos < bytes.len() && matches!(bytes[*pos], b' ' | b'\t' | b'\n' | b'\r') {
216        *pos += 1;
217    }
218
219    // Check if there's a title
220    if *pos >= bytes.len() {
221        return Some(None);
222    }
223
224    let quote_char = bytes[*pos];
225    if !matches!(quote_char, b'"' | b'\'' | b'(') {
226        // No title, that's okay
227        *pos = base_pos; // Reset position
228        return Some(None);
229    }
230
231    let closing_char = if quote_char == b'(' { b')' } else { quote_char };
232
233    *pos += 1; // Skip opening quote
234    let title_start = *pos;
235
236    // Find closing quote
237    let mut escape_next = false;
238    while *pos < bytes.len() {
239        if escape_next {
240            escape_next = false;
241            *pos += 1;
242            continue;
243        }
244
245        match bytes[*pos] {
246            b'\\' => {
247                escape_next = true;
248                *pos += 1;
249            }
250            c if c == closing_char => {
251                let title_end = *pos;
252                *pos += 1; // Skip closing quote
253
254                // Skip trailing whitespace to end of line
255                while *pos < bytes.len() && matches!(bytes[*pos], b' ' | b'\t') {
256                    *pos += 1;
257                }
258
259                // Extract title from the original text using correct indices
260                let title = text[title_start..title_end].to_string();
261                return Some(Some(title));
262            }
263            b'\n' if quote_char == b'(' => {
264                // Parenthetical titles can span lines
265                *pos += 1;
266            }
267            _ => {
268                *pos += 1;
269            }
270        }
271    }
272
273    // No closing quote found
274    None
275}
276
277/// Try to parse just the footnote marker [^id]: from a line.
278/// Returns Some((id, content_start_col)) if the line starts with a footnote marker.
279///
280/// Syntax:
281/// ```markdown
282/// [^id]: Footnote content.
283/// ```
284pub fn try_parse_footnote_marker(line: &str) -> Option<(String, usize)> {
285    let bytes = line.as_bytes();
286
287    // Must start with [^
288    if bytes.len() < 4 || bytes[0] != b'[' || bytes[1] != b'^' {
289        return None;
290    }
291
292    // Find the closing ] for the ID
293    let mut pos = 2;
294    while pos < bytes.len() && bytes[pos] != b']' && bytes[pos] != b'\n' && bytes[pos] != b'\r' {
295        pos += 1;
296    }
297
298    if pos >= bytes.len() || bytes[pos] != b']' {
299        return None;
300    }
301
302    let id = &line[2..pos];
303    if id.is_empty() {
304        return None;
305    }
306
307    pos += 1; // Skip ]
308
309    // Must be followed by :
310    if pos >= bytes.len() || bytes[pos] != b':' {
311        return None;
312    }
313    pos += 1;
314
315    // Skip spaces/tabs until content (or end of line)
316    while pos < bytes.len() && matches!(bytes[pos], b' ' | b'\t') {
317        pos += 1;
318    }
319
320    Some((id.to_string(), pos))
321}
322
323#[cfg(test)]
324mod tests {
325    use super::{line_is_mmd_link_attribute_continuation, try_parse_reference_definition};
326    use crate::syntax::SyntaxKind;
327
328    #[test]
329    fn test_footnote_definition_body_layout_is_lossless() {
330        let input = "[^note-on-refs]:\n    Note that if `--file-scope` is used,\n";
331        let tree = crate::parse(input, Some(crate::ParserOptions::default()));
332        assert_eq!(tree.text().to_string(), input);
333    }
334
335    #[test]
336    fn test_footnote_definition_marker_emits_structural_tokens() {
337        let input = "[^note-on-refs]: body\n";
338        let tree = crate::parse(input, Some(crate::ParserOptions::default()));
339        let def = tree
340            .descendants()
341            .find(|n| n.kind() == SyntaxKind::FOOTNOTE_DEFINITION)
342            .expect("footnote definition");
343        let token_kinds: Vec<_> = def
344            .children_with_tokens()
345            .filter_map(|e| e.into_token())
346            .map(|t| t.kind())
347            .collect();
348        assert!(token_kinds.contains(&SyntaxKind::FOOTNOTE_LABEL_START));
349        assert!(token_kinds.contains(&SyntaxKind::FOOTNOTE_LABEL_ID));
350        assert!(token_kinds.contains(&SyntaxKind::FOOTNOTE_LABEL_END));
351        assert!(token_kinds.contains(&SyntaxKind::FOOTNOTE_LABEL_COLON));
352    }
353
354    #[test]
355    fn footnote_multiline_dollar_math_parses_as_display_math_not_tex_block() {
356        let input = "[^note]: Intro line before math:\n    $$\n    \\begin{aligned} a &= b \\\\ c &= d \\end{aligned}\n    $$\n";
357        let tree = crate::parse(input, Some(crate::ParserOptions::default()));
358
359        let def = tree
360            .descendants()
361            .find(|n| n.kind() == SyntaxKind::FOOTNOTE_DEFINITION)
362            .expect("footnote definition");
363
364        let has_display_math = def
365            .descendants()
366            .any(|n| n.kind() == SyntaxKind::DISPLAY_MATH);
367        let has_tex_block = def.descendants().any(|n| n.kind() == SyntaxKind::TEX_BLOCK);
368
369        assert!(
370            has_display_math,
371            "Expected DISPLAY_MATH in footnote definition, got:\n{}",
372            tree
373        );
374        assert!(
375            !has_tex_block,
376            "Did not expect TEX_BLOCK in footnote definition for $$...$$ math, got:\n{}",
377            tree
378        );
379    }
380
381    #[test]
382    fn test_reference_definition_with_up_to_three_leading_spaces() {
383        assert!(try_parse_reference_definition("   [foo]: #bar").is_some());
384        assert!(try_parse_reference_definition("    [foo]: #bar").is_none());
385    }
386
387    #[test]
388    fn mmd_link_attribute_continuation_detects_valid_tokens() {
389        assert!(line_is_mmd_link_attribute_continuation(
390            "    width=20px height=30px id=myId"
391        ));
392        assert!(line_is_mmd_link_attribute_continuation(
393            "\tclass=\"myClass1 myClass2\""
394        ));
395    }
396
397    #[test]
398    fn mmd_link_attribute_continuation_rejects_non_attribute_lines() {
399        assert!(!line_is_mmd_link_attribute_continuation(
400            "not-indented width=20px"
401        ));
402        assert!(!line_is_mmd_link_attribute_continuation(
403            "    not-an-attr token"
404        ));
405    }
406}