Skip to main content

panache_parser/syntax/
references.rs

1//! Reference link and footnote AST node wrappers.
2
3use super::ast::support;
4use super::links::Link;
5use super::{AstNode, PanacheLanguage, SyntaxKind, SyntaxNode};
6
7/// Strip a reference title's surrounding `"…"`, `'…'`, or `(…)` delimiters.
8/// Returns the input unchanged when it isn't delimiter-wrapped.
9fn strip_title_delimiters(raw: &str) -> String {
10    let bytes = raw.as_bytes();
11    if bytes.len() >= 2 {
12        let (open, close) = (bytes[0], bytes[bytes.len() - 1]);
13        let matched = matches!((open, close), (b'"', b'"') | (b'\'', b'\'') | (b'(', b')'));
14        if matched {
15            return raw[1..raw.len() - 1].to_string();
16        }
17    }
18    raw.to_string()
19}
20
21pub struct ReferenceDefinition(SyntaxNode);
22
23impl AstNode for ReferenceDefinition {
24    type Language = PanacheLanguage;
25
26    fn can_cast(kind: SyntaxKind) -> bool {
27        kind == SyntaxKind::REFERENCE_DEFINITION
28    }
29
30    fn cast(syntax: SyntaxNode) -> Option<Self> {
31        if Self::can_cast(syntax.kind()) {
32            Some(Self(syntax))
33        } else {
34            None
35        }
36    }
37
38    fn syntax(&self) -> &SyntaxNode {
39        &self.0
40    }
41}
42
43impl ReferenceDefinition {
44    /// Returns the link containing the label and URL.
45    pub fn link(&self) -> Option<Link> {
46        support::child(&self.0)
47    }
48
49    /// Extracts the label text.
50    pub fn label(&self) -> String {
51        self.link()
52            .and_then(|link| link.text())
53            .map(|text| text.text_content())
54            .unwrap_or_default()
55    }
56
57    /// Extracts the destination URL, with `<…>` angle brackets stripped.
58    ///
59    /// Reads the structured `REFERENCE_URL` node emitted by the parser rather
60    /// than re-parsing the post-`]` tail.
61    pub fn url(&self) -> Option<String> {
62        let node = self
63            .0
64            .children()
65            .find(|n| n.kind() == SyntaxKind::REFERENCE_URL)?;
66        let raw = node.text().to_string();
67        let stripped = raw
68            .strip_prefix('<')
69            .and_then(|r| r.strip_suffix('>'))
70            .unwrap_or(&raw);
71        Some(stripped.to_string())
72    }
73
74    /// Extracts the title with its surrounding `"`/`'`/`()` delimiters stripped,
75    /// or `None` when the definition has no title.
76    ///
77    /// Reads the structured `REFERENCE_TITLE` node.
78    pub fn title(&self) -> Option<String> {
79        let node = self
80            .0
81            .children()
82            .find(|n| n.kind() == SyntaxKind::REFERENCE_TITLE)?;
83        Some(strip_title_delimiters(&node.text().to_string()))
84    }
85
86    /// Raw destination text, including any `<…>` angle brackets (but no
87    /// trailing title). Retained for back-compat with consumers that strip
88    /// brackets / whitespace themselves (LSP document links, hover); prefer
89    /// [`url`](Self::url) for the angle-stripped destination.
90    pub fn destination(&self) -> Option<String> {
91        self.0
92            .children()
93            .find(|n| n.kind() == SyntaxKind::REFERENCE_URL)
94            .map(|n| n.text().to_string())
95    }
96
97    /// Returns the text range for the definition label value.
98    pub fn label_value_range(&self) -> Option<rowan::TextRange> {
99        let link = self.link()?;
100
101        if let Some(range) = link
102            .reference()
103            .and_then(|reference| reference.label_value_range())
104        {
105            return Some(range);
106        }
107
108        link.text()?
109            .syntax()
110            .descendants_with_tokens()
111            .find_map(|elem| {
112                elem.into_token()
113                    .filter(|token| token.kind() == SyntaxKind::TEXT)
114                    .map(|token| token.text_range())
115            })
116    }
117}
118
119pub struct FootnoteReference(SyntaxNode);
120
121impl AstNode for FootnoteReference {
122    type Language = PanacheLanguage;
123
124    fn can_cast(kind: SyntaxKind) -> bool {
125        kind == SyntaxKind::FOOTNOTE_REFERENCE
126    }
127
128    fn cast(syntax: SyntaxNode) -> Option<Self> {
129        if Self::can_cast(syntax.kind()) {
130            Some(Self(syntax))
131        } else {
132            None
133        }
134    }
135
136    fn syntax(&self) -> &SyntaxNode {
137        &self.0
138    }
139}
140
141impl FootnoteReference {
142    /// Extracts the footnote ID (e.g., "1" from a footnote reference).
143    pub fn id(&self) -> String {
144        if let Some(id) = self
145            .0
146            .children_with_tokens()
147            .filter_map(|child| child.into_token())
148            .find(|token| token.kind() == SyntaxKind::FOOTNOTE_LABEL_ID)
149        {
150            return id.text().to_string();
151        }
152
153        let tokens: Vec<_> = self
154            .0
155            .children_with_tokens()
156            .filter_map(|child| child.into_token())
157            .filter(|token| token.kind() == SyntaxKind::TEXT)
158            .map(|token| token.text().to_string())
159            .collect();
160
161        if tokens.len() >= 2 && tokens[0] == "[^" {
162            tokens[1].clone()
163        } else {
164            String::new()
165        }
166    }
167
168    /// Returns the full text range of this reference token.
169    pub fn id_range(&self) -> rowan::TextRange {
170        self.0.text_range()
171    }
172
173    /// Returns the text range for the footnote ID only (excluding `[^` and `]`).
174    pub fn id_value_range(&self) -> Option<rowan::TextRange> {
175        if let Some(id) = self
176            .0
177            .children_with_tokens()
178            .filter_map(|child| child.into_token())
179            .find(|token| token.kind() == SyntaxKind::FOOTNOTE_LABEL_ID)
180        {
181            return Some(id.text_range());
182        }
183
184        let tokens: Vec<_> = self
185            .0
186            .children_with_tokens()
187            .filter_map(|child| child.into_token())
188            .filter(|token| token.kind() == SyntaxKind::TEXT)
189            .collect();
190
191        if tokens.len() >= 2 && tokens[0].text() == "[^" {
192            Some(tokens[1].text_range())
193        } else {
194            None
195        }
196    }
197}
198
199pub struct FootnoteDefinition(SyntaxNode);
200
201impl AstNode for FootnoteDefinition {
202    type Language = PanacheLanguage;
203
204    fn can_cast(kind: SyntaxKind) -> bool {
205        kind == SyntaxKind::FOOTNOTE_DEFINITION
206    }
207
208    fn cast(syntax: SyntaxNode) -> Option<Self> {
209        if Self::can_cast(syntax.kind()) {
210            Some(Self(syntax))
211        } else {
212            None
213        }
214    }
215
216    fn syntax(&self) -> &SyntaxNode {
217        &self.0
218    }
219}
220
221impl FootnoteDefinition {
222    /// Extracts the footnote ID from the definition marker.
223    pub fn id(&self) -> String {
224        if let Some(id) = self
225            .0
226            .children_with_tokens()
227            .filter_map(|child| child.into_token())
228            .find(|token| token.kind() == SyntaxKind::FOOTNOTE_LABEL_ID)
229        {
230            return id.text().to_string();
231        }
232
233        self.0
234            .children_with_tokens()
235            .filter_map(|child| child.into_token())
236            .find(|token| token.kind() == SyntaxKind::FOOTNOTE_REFERENCE)
237            .and_then(|token| {
238                let text = token.text();
239                if text.starts_with("[^") && text.contains("]:") {
240                    text.trim_start_matches("[^")
241                        .split(']')
242                        .next()
243                        .map(String::from)
244                } else {
245                    None
246                }
247            })
248            .unwrap_or_default()
249    }
250
251    /// Returns the text range for the footnote ID only (excluding `[^`, `]`, and `:`).
252    pub fn id_value_range(&self) -> Option<rowan::TextRange> {
253        if let Some(id) = self
254            .0
255            .children_with_tokens()
256            .filter_map(|child| child.into_token())
257            .find(|token| token.kind() == SyntaxKind::FOOTNOTE_LABEL_ID)
258        {
259            return Some(id.text_range());
260        }
261
262        let marker = self
263            .0
264            .children_with_tokens()
265            .filter_map(|child| child.into_token())
266            .find(|token| token.kind() == SyntaxKind::FOOTNOTE_REFERENCE)?;
267
268        let marker_text = marker.text();
269        if !marker_text.starts_with("[^") {
270            return None;
271        }
272
273        let close_bracket = marker_text.find(']')?;
274        if close_bracket <= 2 {
275            return None;
276        }
277
278        if marker_text.as_bytes().get(close_bracket + 1) != Some(&b':') {
279            return None;
280        }
281
282        let token_start = marker.text_range().start();
283        let id_start = token_start + rowan::TextSize::from(2);
284        let id_end = token_start + rowan::TextSize::from(close_bracket as u32);
285        Some(rowan::TextRange::new(id_start, id_end))
286    }
287
288    /// Extracts the content of the footnote definition.
289    /// Returns the text content after the `[^id]:` marker.
290    pub fn content(&self) -> String {
291        // Skip the definition marker tokens and collect all other content
292        self.0
293            .children_with_tokens()
294            .filter_map(|child| match child {
295                rowan::NodeOrToken::Node(node) => Some(node.text().to_string()),
296                rowan::NodeOrToken::Token(token)
297                    if !matches!(
298                        token.kind(),
299                        SyntaxKind::FOOTNOTE_REFERENCE
300                            | SyntaxKind::FOOTNOTE_LABEL_START
301                            | SyntaxKind::FOOTNOTE_LABEL_ID
302                            | SyntaxKind::FOOTNOTE_LABEL_END
303                            | SyntaxKind::FOOTNOTE_LABEL_COLON
304                    ) =>
305                {
306                    Some(token.text().to_string())
307                }
308                _ => None,
309            })
310            .collect::<Vec<_>>()
311            .join("")
312    }
313
314    /// Check if this footnote definition is simple (single paragraph, no complex blocks).
315    /// Simple footnotes can be converted to inline style.
316    pub fn is_simple(&self) -> bool {
317        // Simple footnote has:
318        // - No blank lines in content (single paragraph)
319        // - No code blocks, lists, or other block elements
320        let content = self.content();
321
322        // Check for blank lines (indicates multi-paragraph)
323        if content.contains("\n\n") {
324            return false;
325        }
326
327        // Check for code blocks (need to distinguish from continuation lines)
328        // Code blocks have 8+ spaces (4 for footnote + 4 for code)
329        if content
330            .lines()
331            .skip(1)
332            .any(|line| line.len() > 8 && line.starts_with("        "))
333        {
334            return false;
335        }
336
337        // Check for list markers in continuation lines (after first line)
338        for line in content.lines().skip(1) {
339            let trimmed = line.trim_start();
340            if trimmed.starts_with("- ")
341                || trimmed.starts_with("* ")
342                || trimmed.starts_with("+ ")
343                || (trimmed
344                    .chars()
345                    .next()
346                    .map(|c| c.is_ascii_digit())
347                    .unwrap_or(false)
348                    && trimmed.chars().skip(1).any(|c| c == '.'))
349            {
350                return false;
351            }
352        }
353
354        // Check for list nodes in the CST (handles nested lists reliably).
355        if self
356            .0
357            .descendants()
358            .any(|node| node.kind() == SyntaxKind::LIST)
359        {
360            return false;
361        }
362
363        true
364    }
365}
366
367pub struct InlineFootnote(SyntaxNode);
368
369impl AstNode for InlineFootnote {
370    type Language = PanacheLanguage;
371
372    fn can_cast(kind: SyntaxKind) -> bool {
373        kind == SyntaxKind::INLINE_FOOTNOTE
374    }
375
376    fn cast(syntax: SyntaxNode) -> Option<Self> {
377        if Self::can_cast(syntax.kind()) {
378            Some(Self(syntax))
379        } else {
380            None
381        }
382    }
383
384    fn syntax(&self) -> &SyntaxNode {
385        &self.0
386    }
387}
388
389impl InlineFootnote {
390    /// Extracts the content of the inline footnote (text between ^[ and ]).
391    pub fn content(&self) -> String {
392        self.0
393            .children_with_tokens()
394            .filter_map(|child| {
395                if let Some(token) = child.as_token() {
396                    // Skip the start and end markers
397                    if token.kind() != SyntaxKind::INLINE_FOOTNOTE_START
398                        && token.kind() != SyntaxKind::INLINE_FOOTNOTE_END
399                    {
400                        Some(token.text().to_string())
401                    } else {
402                        None
403                    }
404                } else {
405                    // Include nested nodes (emphasis, code, etc.)
406                    child.as_node().map(|node| node.text().to_string())
407                }
408            })
409            .collect::<Vec<_>>()
410            .join("")
411    }
412}
413
414#[cfg(test)]
415mod tests {
416    use super::*;
417    use crate::parse;
418
419    #[test]
420    fn test_reference_definition_destination() {
421        let input = "[ref]: https://example.com \"Title\"";
422        let root = parse(input, None);
423        let def = root
424            .descendants()
425            .find_map(ReferenceDefinition::cast)
426            .expect("Should find ReferenceDefinition");
427
428        assert_eq!(def.label(), "ref");
429        // url()/destination() now return just the destination, read from the
430        // structured REFERENCE_URL node — no trailing title glommed on.
431        assert_eq!(def.url().as_deref(), Some("https://example.com"));
432        assert_eq!(def.destination().as_deref(), Some("https://example.com"));
433        assert_eq!(def.title().as_deref(), Some("Title"));
434        assert!(def.label_value_range().is_some());
435    }
436
437    #[test]
438    fn test_reference_definition_angle_url_and_no_title() {
439        let input = "[ref]: <https://example.com/path>";
440        let root = parse(input, None);
441        let def = root
442            .descendants()
443            .find_map(ReferenceDefinition::cast)
444            .expect("Should find ReferenceDefinition");
445
446        // url() strips the angle brackets; destination() keeps them so LSP
447        // consumers (extract_first_destination_token, heading_label_from_
448        // destination) can still recover URLs that contain spaces.
449        assert_eq!(def.url().as_deref(), Some("https://example.com/path"));
450        assert_eq!(
451            def.destination().as_deref(),
452            Some("<https://example.com/path>")
453        );
454        assert_eq!(def.title(), None);
455    }
456
457    #[test]
458    fn test_footnote_definition_single_line() {
459        let input = "[^1]: This is a simple footnote.";
460        let root = parse(input, None);
461        let def = root
462            .descendants()
463            .find_map(FootnoteDefinition::cast)
464            .expect("Should find FootnoteDefinition");
465
466        assert_eq!(def.id(), "1");
467        assert_eq!(
468            def.id_value_range()
469                .map(|range| {
470                    let start: usize = range.start().into();
471                    let end: usize = range.end().into();
472                    input[start..end].to_string()
473                })
474                .as_deref(),
475            Some("1")
476        );
477        assert_eq!(def.content().trim(), "This is a simple footnote.");
478        assert!(def.is_simple(), "Single line footnote should be simple");
479    }
480
481    #[test]
482    fn test_footnote_definition_multiline() {
483        let input = "[^1]: First line\n    Second line";
484        let root = parse(input, None);
485        let def = root
486            .descendants()
487            .find_map(FootnoteDefinition::cast)
488            .expect("Should find FootnoteDefinition");
489
490        assert_eq!(def.id(), "1");
491        let content = def.content();
492        assert!(content.contains("First line"));
493        assert!(content.contains("Second line"));
494        assert!(def.is_simple(), "Continuation lines should still be simple");
495    }
496
497    #[test]
498    fn test_footnote_definition_with_formatting() {
499        let input = "[^note]: Text with *emphasis* and `code`.";
500        let root = parse(input, None);
501        let def = root
502            .descendants()
503            .find_map(FootnoteDefinition::cast)
504            .expect("Should find FootnoteDefinition");
505
506        assert_eq!(def.id(), "note");
507        assert_eq!(
508            def.id_value_range()
509                .map(|range| {
510                    let start: usize = range.start().into();
511                    let end: usize = range.end().into();
512                    input[start..end].to_string()
513                })
514                .as_deref(),
515            Some("note")
516        );
517        let content = def.content();
518        assert!(content.contains("*emphasis*"));
519        assert!(content.contains("`code`"));
520    }
521
522    #[test]
523    fn test_footnote_definition_empty() {
524        let input = "[^1]: ";
525        let root = parse(input, None);
526        let def = root
527            .descendants()
528            .find_map(FootnoteDefinition::cast)
529            .expect("Should find FootnoteDefinition");
530
531        assert_eq!(def.id(), "1");
532        assert!(def.content().trim().is_empty());
533    }
534
535    #[test]
536    fn test_footnote_reference_id() {
537        let input = "[^test]";
538        let root = parse(input, None);
539        let ref_node = root
540            .descendants()
541            .find_map(FootnoteReference::cast)
542            .expect("Should find FootnoteReference");
543
544        assert_eq!(ref_node.id(), "test");
545        assert_eq!(
546            ref_node
547                .id_value_range()
548                .map(|range| {
549                    let start: usize = range.start().into();
550                    let end: usize = range.end().into();
551                    input[start..end].to_string()
552                })
553                .as_deref(),
554            Some("test")
555        );
556    }
557
558    #[test]
559    fn test_footnote_definition_is_simple() {
560        // Simple single-line
561        let input = "[^1]: Simple text.";
562        let root = parse(input, None);
563        let def = root
564            .descendants()
565            .find_map(FootnoteDefinition::cast)
566            .unwrap();
567        assert!(def.is_simple());
568
569        // Simple with continuation
570        let input2 = "[^1]: First line\n    continuation.";
571        let root2 = parse(input2, None);
572        let def2 = root2
573            .descendants()
574            .find_map(FootnoteDefinition::cast)
575            .unwrap();
576        assert!(def2.is_simple());
577    }
578
579    #[test]
580    fn test_footnote_definition_is_complex() {
581        // Multi-paragraph (blank line)
582        let input = "[^1]: First para.\n\n    Second para.";
583        let root = parse(input, None);
584        let def = root
585            .descendants()
586            .find_map(FootnoteDefinition::cast)
587            .unwrap();
588        assert!(!def.is_simple(), "Multi-paragraph should not be simple");
589
590        // With list
591        let input2 = "[^1]: Text\n    - Item 1\n    - Item 2";
592        let root2 = parse(input2, None);
593        let def2 = root2
594            .descendants()
595            .find_map(FootnoteDefinition::cast)
596            .unwrap();
597        assert!(!def2.is_simple(), "Footnote with list should not be simple");
598
599        // With code block
600        let input3 = "[^1]: Text\n\n        code block";
601        let root3 = parse(input3, None);
602        let def3 = root3
603            .descendants()
604            .find_map(FootnoteDefinition::cast)
605            .unwrap();
606        assert!(
607            !def3.is_simple(),
608            "Footnote with code block should not be simple"
609        );
610    }
611
612    #[test]
613    fn test_inline_footnote_content() {
614        let input = "Text^[This is an inline note] more text.";
615        let root = parse(input, None);
616        let inline = root
617            .descendants()
618            .find_map(InlineFootnote::cast)
619            .expect("Should find InlineFootnote");
620
621        assert_eq!(inline.content(), "This is an inline note");
622    }
623
624    #[test]
625    fn test_inline_footnote_with_formatting() {
626        let input = "Text^[Note with *emphasis* and `code`] more.";
627        let root = parse(input, None);
628        let inline = root
629            .descendants()
630            .find_map(InlineFootnote::cast)
631            .expect("Should find InlineFootnote");
632
633        let content = inline.content();
634        assert!(content.contains("emphasis"));
635        assert!(content.contains("code"));
636    }
637}