Skip to main content

supersigil_parser/
markdown_fences.rs

1//! Markdown fence extraction for supersigil documents.
2//!
3//! Parses a Markdown body (standard Markdown constructs) and extracts
4//! `supersigil-xml` fenced code blocks containing XML component markup.
5
6use markdown::mdast;
7use supersigil_core::SUPERSIGIL_XML_LANG;
8
9// ---------------------------------------------------------------------------
10// Types
11// ---------------------------------------------------------------------------
12
13/// Collected fences from Markdown parsing.
14#[derive(Debug, Default, PartialEq)]
15pub struct MarkdownFences {
16    /// Content of `supersigil-xml` fenced code blocks with byte offsets.
17    pub xml_fences: Vec<XmlFence>,
18}
19
20/// A fenced code block with language `supersigil-xml`.
21#[derive(Debug, PartialEq)]
22pub struct XmlFence {
23    /// The raw content between the fences.
24    pub content: String,
25    /// Byte offset of the content start in the normalized source (after the
26    /// opening delimiter line).
27    pub content_offset: usize,
28    /// Byte offset of the opening fence delimiter (`` ``` `` line) in the
29    /// normalized source.
30    pub fence_start: usize,
31    /// Byte offset of the end of the closing fence delimiter in the normalized
32    /// source.
33    pub fence_end: usize,
34}
35
36// ---------------------------------------------------------------------------
37// Parsing
38// ---------------------------------------------------------------------------
39
40/// Parse a Markdown body and extract `supersigil-xml` fenced code blocks.
41///
42/// `body` is the document content after the front-matter block.
43/// `body_offset` is the byte offset of `body` within the full normalized
44/// source, used to produce file-absolute offsets.
45///
46/// Uses default Markdown constructs.
47#[must_use]
48pub fn extract_markdown_fences(body: &str, body_offset: usize) -> MarkdownFences {
49    let options = markdown::ParseOptions::default();
50
51    let Ok(ast) = markdown::to_mdast(body, &options) else {
52        return MarkdownFences::default();
53    };
54
55    let mut fences = MarkdownFences::default();
56    collect_fences(&ast, body, body_offset, &mut fences);
57    fences
58}
59
60/// Recursively walk the AST collecting `Code` nodes.
61fn collect_fences(node: &mdast::Node, body: &str, body_offset: usize, fences: &mut MarkdownFences) {
62    match node {
63        mdast::Node::Code(code) => {
64            // `pos.start.offset` points to the opening fence delimiter (``` line).
65            // The actual content begins on the next line, so we advance past the
66            // first newline to get the byte offset of the code block content.
67            let (offset, fence_start_abs, fence_end_abs) =
68                code.position.as_ref().map_or((0, 0, 0), |pos| {
69                    let fence_start = pos.start.offset;
70                    let content_offset = body[fence_start..]
71                        .find('\n')
72                        .map_or(body_offset + fence_start, |nl| {
73                            body_offset + fence_start + nl + 1
74                        });
75                    (
76                        content_offset,
77                        body_offset + fence_start,
78                        body_offset + pos.end.offset,
79                    )
80                });
81
82            if code.lang.as_deref() == Some(SUPERSIGIL_XML_LANG) {
83                fences.xml_fences.push(XmlFence {
84                    content: code.value.clone(),
85                    content_offset: offset,
86                    fence_start: fence_start_abs,
87                    fence_end: fence_end_abs,
88                });
89            }
90        }
91        other => {
92            if let Some(children) = other.children() {
93                for child in children {
94                    collect_fences(child, body, body_offset, fences);
95                }
96            }
97        }
98    }
99}
100
101// ---------------------------------------------------------------------------
102// Tests
103// ---------------------------------------------------------------------------
104
105#[cfg(test)]
106mod tests {
107    use super::*;
108
109    // -- Fence detection ---------------------------------------------------
110
111    // supersigil: md-fence-detection
112    #[test]
113    fn no_fences_returns_empty() {
114        let body = "# Hello\n\nSome paragraph text.\n";
115        let result = extract_markdown_fences(body, 0);
116        assert!(result.xml_fences.is_empty());
117    }
118
119    // supersigil: md-fence-detection
120    // supersigil: md-xml-fence-collection
121    #[test]
122    fn detects_supersigil_xml_fence() {
123        let body = "# Title\n\n```supersigil-xml\n<Spec id=\"s1\">hello</Spec>\n```\n";
124        let result = extract_markdown_fences(body, 0);
125        assert_eq!(result.xml_fences.len(), 1);
126        assert_eq!(result.xml_fences[0].content, "<Spec id=\"s1\">hello</Spec>");
127    }
128
129    // supersigil: md-xml-fence-collection
130    #[test]
131    fn detects_multiple_xml_fences() {
132        let body = "\
133```supersigil-xml
134<A/>
135```
136
137```supersigil-xml
138<B/>
139```
140";
141        let result = extract_markdown_fences(body, 0);
142        assert_eq!(result.xml_fences.len(), 2);
143        assert_eq!(result.xml_fences[0].content, "<A/>");
144        assert_eq!(result.xml_fences[1].content, "<B/>");
145    }
146
147    // supersigil: md-xml-fence-collection
148    #[test]
149    fn xml_fence_offset_includes_body_offset() {
150        let body_offset = 42;
151        let body = "```supersigil-xml\n<X/>\n```\n";
152        let result = extract_markdown_fences(body, body_offset);
153        assert_eq!(result.xml_fences.len(), 1);
154        // The opening fence line "```supersigil-xml\n" is 18 bytes,
155        // so the content starts at body_offset + 18.
156        assert_eq!(
157            result.xml_fences[0].content_offset,
158            body_offset + "```supersigil-xml\n".len()
159        );
160    }
161
162    // -- Language matching -------------------------------------------------
163
164    #[test]
165    fn non_supersigil_xml_lang_ignored() {
166        let body = "```rust\nfn main() {}\n```\n";
167        let result = extract_markdown_fences(body, 0);
168        assert!(result.xml_fences.is_empty());
169    }
170
171    #[test]
172    fn supersigil_xml_is_case_sensitive() {
173        let body = "```Supersigil-xml\n<X/>\n```\n";
174        let result = extract_markdown_fences(body, 0);
175        assert!(result.xml_fences.is_empty());
176    }
177
178    #[test]
179    fn supersigil_xml_with_meta_still_detected() {
180        // Even if there's meta after the language, it should still be detected
181        // because markdown parses the first word as `lang`.
182        let body = "```supersigil-xml some-meta\n<X/>\n```\n";
183        let result = extract_markdown_fences(body, 0);
184        assert_eq!(result.xml_fences.len(), 1);
185        assert_eq!(result.xml_fences[0].content, "<X/>");
186    }
187
188    // -- Fences with no meta -----------------------------------------------
189
190    #[test]
191    fn code_fence_with_lang_but_no_meta() {
192        let body = "```python\nprint('hello')\n```\n";
193        let result = extract_markdown_fences(body, 0);
194        assert!(result.xml_fences.is_empty());
195    }
196
197    #[test]
198    fn code_fence_with_no_lang_no_meta() {
199        let body = "```\nplain text\n```\n";
200        let result = extract_markdown_fences(body, 0);
201        assert!(result.xml_fences.is_empty());
202    }
203
204    // -- Mixed fences -------------------------------------------------------
205
206    #[test]
207    fn non_supersigil_fences_ignored_alongside_xml() {
208        let body = "\
209Some text.
210
211```supersigil-xml
212<Spec id=\"s1\">content</Spec>
213```
214
215```sh
216echo hello
217```
218
219```rust
220fn main() {}
221```
222";
223        let result = extract_markdown_fences(body, 0);
224        assert_eq!(result.xml_fences.len(), 1);
225        assert_eq!(
226            result.xml_fences[0].content,
227            "<Spec id=\"s1\">content</Spec>"
228        );
229    }
230}