Skip to main content

rdx_parser/
lib.rs

1pub use rdx_ast::*;
2
3mod attributes;
4mod frontmatter;
5mod markdown;
6mod scanner;
7mod source_map;
8mod tags;
9mod text;
10
11use scanner::Segment;
12use source_map::SourceMap;
13
14/// Parse an RDX document string into a compliant AST.
15///
16/// This is the primary entry point for the parser. It handles:
17/// - YAML frontmatter extraction (spec 2.1)
18/// - CommonMark block structure via pulldown-cmark
19/// - RDX component tags with typed attributes (spec 2.2, 2.3)
20/// - Variable interpolation in text (spec 2.4)
21/// - Escape sequences (spec 2.5)
22/// - HTML pass-through for lowercase tags (spec 2.6)
23/// - Error nodes for malformed constructs (spec 3)
24pub fn parse(input: &str) -> Root {
25    let sm = SourceMap::new(input);
26    let (frontmatter, body_start) = frontmatter::extract_frontmatter(input);
27    let body = &input[body_start..];
28    let children = parse_body(body, body_start, &sm, input);
29
30    Root {
31        node_type: RootType::Root,
32        frontmatter,
33        children,
34        position: sm.position(0, input.len()),
35    }
36}
37
38/// Validate a variable path against spec 2.4.1 grammar:
39/// `[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)*`
40pub(crate) fn is_valid_variable_path(path: &str) -> bool {
41    if path.is_empty() {
42        return false;
43    }
44    for segment in path.split('.') {
45        if segment.is_empty() {
46            return false;
47        }
48        let bytes = segment.as_bytes();
49        if !bytes[0].is_ascii_alphabetic() && bytes[0] != b'_' {
50            return false;
51        }
52        for &b in &bytes[1..] {
53            if !b.is_ascii_alphanumeric() && b != b'_' {
54                return false;
55            }
56        }
57    }
58    true
59}
60
61/// Recursively parse a body region into AST nodes.
62/// Splits into markdown vs block-component segments, processes each accordingly.
63fn parse_body(body: &str, base_offset: usize, sm: &SourceMap, full_input: &str) -> Vec<Node> {
64    let segments = scanner::scan_segments(body, base_offset, sm);
65    let mut nodes = Vec::new();
66
67    for seg in segments {
68        match seg {
69            Segment::Markdown { start, end } => {
70                let text = &full_input[start..end];
71                nodes.extend(markdown::parse_markdown_region(text, start, sm, full_input));
72            }
73            Segment::BlockComponent {
74                tag,
75                body_start,
76                body_end,
77                close_end,
78            } => {
79                let inner = if body_start <= body_end {
80                    &full_input[body_start..body_end]
81                } else {
82                    ""
83                };
84                let children = parse_body(inner, body_start, sm, full_input);
85                nodes.push(Node::Component(ComponentNode {
86                    name: tag.name,
87                    is_inline: false,
88                    attributes: tag.attributes,
89                    children,
90                    position: sm.position(tag.start, close_end),
91                }));
92            }
93            Segment::BlockSelfClosing { tag } => {
94                nodes.push(Node::Component(ComponentNode {
95                    name: tag.name,
96                    is_inline: false,
97                    attributes: tag.attributes,
98                    children: vec![],
99                    position: sm.position(tag.start, tag.end),
100                }));
101            }
102            Segment::MathBlock {
103                value_start,
104                value_end,
105                block_end,
106            } => {
107                let value = if value_start <= value_end {
108                    full_input[value_start..value_end].to_string()
109                } else {
110                    String::new()
111                };
112                nodes.push(Node::MathDisplay(TextNode {
113                    value,
114                    position: sm.position(value_start.saturating_sub(3), block_end), // include $$
115                }));
116            }
117            Segment::Error {
118                message,
119                raw,
120                start,
121                end,
122            } => {
123                nodes.push(Node::Error(ErrorNode {
124                    message,
125                    raw_content: raw,
126                    position: sm.position(start, end),
127                }));
128            }
129        }
130    }
131
132    nodes
133}
134
135#[cfg(test)]
136mod tests {
137    use super::*;
138
139    #[test]
140    fn empty_document() {
141        let root = parse("");
142        assert_eq!(root.node_type, RootType::Root);
143        assert_eq!(root.frontmatter, None);
144        assert!(root.children.is_empty());
145    }
146
147    #[test]
148    fn frontmatter_only() {
149        let root = parse("---\ntitle: Hello\nversion: 2\n---\n");
150        assert!(root.frontmatter.is_some());
151        let fm = root.frontmatter.unwrap();
152        assert_eq!(fm["title"], "Hello");
153        assert_eq!(fm["version"], 2);
154    }
155
156    #[test]
157    fn frontmatter_no_trailing_content() {
158        let root = parse("---\nfoo: bar\n---");
159        assert!(root.frontmatter.is_some());
160        assert_eq!(root.frontmatter.unwrap()["foo"], "bar");
161    }
162
163    #[test]
164    fn no_frontmatter_when_not_at_line1() {
165        let root = parse("\n---\ntitle: Hello\n---\n");
166        assert_eq!(root.frontmatter, None);
167    }
168
169    #[test]
170    fn frontmatter_plus_content() {
171        let input = "---\ntitle: Test\n---\n# Hello\n";
172        let root = parse(input);
173        assert!(root.frontmatter.is_some());
174        assert_eq!(root.frontmatter.unwrap()["title"], "Test");
175        assert!(matches!(&root.children[0], Node::Heading(_)));
176    }
177
178    #[test]
179    fn pure_markdown_heading() {
180        let root = parse("# Hello World\n");
181        match &root.children[0] {
182            Node::Heading(block) => {
183                assert_eq!(block.depth, Some(1));
184                match &block.children[0] {
185                    Node::Text(t) => assert_eq!(t.value, "Hello World"),
186                    other => panic!("Expected text, got {:?}", other),
187                }
188            }
189            other => panic!("Expected heading, got {:?}", other),
190        }
191    }
192
193    #[test]
194    fn paragraph_with_emphasis() {
195        let root = parse("This is **bold** text.\n");
196        match &root.children[0] {
197            Node::Paragraph(block) => {
198                assert!(block.children.len() >= 3);
199                assert!(matches!(&block.children[1], Node::Strong(_)));
200            }
201            other => panic!("Expected paragraph, got {:?}", other),
202        }
203    }
204
205    #[test]
206    fn self_closing_block_component() {
207        let root = parse("<Badge status=\"beta\" />\n");
208        match &root.children[0] {
209            Node::Component(c) => {
210                assert_eq!(c.name, "Badge");
211                assert!(!c.is_inline);
212                assert!(c.children.is_empty());
213                assert_eq!(c.attributes[0].name, "status");
214                assert_eq!(c.attributes[0].value, AttributeValue::String("beta".into()));
215            }
216            other => panic!("Expected component, got {:?}", other),
217        }
218    }
219
220    #[test]
221    fn block_component_with_children() {
222        let root = parse("<Notice type=\"warning\">\nThis is a warning.\n</Notice>\n");
223        match &root.children[0] {
224            Node::Component(c) => {
225                assert_eq!(c.name, "Notice");
226                assert!(!c.is_inline);
227                assert!(!c.children.is_empty());
228            }
229            other => panic!("Expected component, got {:?}", other),
230        }
231    }
232
233    #[test]
234    fn nested_components() {
235        let root = parse("<Outer>\n<Inner>\nText\n</Inner>\n</Outer>\n");
236        match &root.children[0] {
237            Node::Component(outer) => {
238                assert_eq!(outer.name, "Outer");
239                match &outer.children[0] {
240                    Node::Component(inner) => assert_eq!(inner.name, "Inner"),
241                    other => panic!("Expected inner, got {:?}", other),
242                }
243            }
244            other => panic!("Expected outer, got {:?}", other),
245        }
246    }
247
248    #[test]
249    fn inline_self_closing_component() {
250        let root = parse("Text with <Badge /> inline.\n");
251        match &root.children[0] {
252            Node::Paragraph(p) => {
253                let has_badge = p
254                    .children
255                    .iter()
256                    .any(|n| matches!(n, Node::Component(c) if c.name == "Badge" && c.is_inline));
257                assert!(has_badge, "Should contain inline Badge: {:?}", p.children);
258            }
259            other => panic!("Expected paragraph, got {:?}", other),
260        }
261    }
262
263    #[test]
264    fn unclosed_block_component() {
265        let root = parse("<Notice>\nContent\n");
266        let has_error = root.children.iter().any(|n| matches!(n, Node::Error(_)));
267        assert!(
268            has_error,
269            "Should have error for unclosed tag: {:?}",
270            root.children
271        );
272    }
273
274    #[test]
275    fn stray_close_tag() {
276        let root = parse("</Notice>\n");
277        let has_error = root.children.iter().any(|n| matches!(n, Node::Error(_)));
278        assert!(
279            has_error,
280            "Should have error for stray close tag: {:?}",
281            root.children
282        );
283    }
284
285    #[test]
286    fn html_passthrough() {
287        let root = parse("<div>hello</div>\n");
288        let has_component = root
289            .children
290            .iter()
291            .any(|n| matches!(n, Node::Component(_)));
292        assert!(
293            !has_component,
294            "Lowercase HTML should not be component: {:?}",
295            root.children
296        );
297    }
298
299    #[test]
300    fn thematic_break_not_frontmatter() {
301        let root = parse("Hello\n\n---\n\nWorld\n");
302        assert_eq!(root.frontmatter, None);
303        let has_break = root
304            .children
305            .iter()
306            .any(|n| matches!(n, Node::ThematicBreak(_)));
307        assert!(has_break);
308    }
309
310    #[test]
311    fn position_tracking() {
312        let root = parse("# Hi\n");
313        assert_eq!(root.position.start.line, 1);
314        assert_eq!(root.position.start.column, 1);
315        assert_eq!(root.position.start.offset, 0);
316    }
317
318    #[test]
319    fn mixed_markdown_and_components() {
320        let input =
321            "# Title\n\n<Notice type=\"info\">\nSome **bold** content.\n</Notice>\n\nMore text.\n";
322        let root = parse(input);
323        assert!(root.children.len() >= 3);
324        assert!(matches!(&root.children[0], Node::Heading(_)));
325        assert!(matches!(&root.children[1], Node::Component(_)));
326        assert!(matches!(&root.children[2], Node::Paragraph(_)));
327    }
328
329    #[test]
330    fn component_with_markdown_children() {
331        let root = parse("<Notice>\n**Bold** and *italic*.\n</Notice>\n");
332        match &root.children[0] {
333            Node::Component(c) => {
334                assert_eq!(c.name, "Notice");
335                assert!(!c.children.is_empty());
336            }
337            other => panic!("Expected component, got {:?}", other),
338        }
339    }
340
341    #[test]
342    fn list_parsing() {
343        let root = parse("- item 1\n- item 2\n");
344        match &root.children[0] {
345            Node::List(l) => {
346                assert_eq!(l.ordered, Some(false));
347                assert_eq!(l.children.len(), 2);
348            }
349            other => panic!("Expected list, got {:?}", other),
350        }
351    }
352
353    #[test]
354    fn ordered_list() {
355        let root = parse("1. first\n2. second\n");
356        match &root.children[0] {
357            Node::List(l) => assert_eq!(l.ordered, Some(true)),
358            other => panic!("Expected list, got {:?}", other),
359        }
360    }
361
362    #[test]
363    fn blockquote() {
364        let root = parse("> quoted text\n");
365        assert!(matches!(&root.children[0], Node::Blockquote(_)));
366    }
367
368    #[test]
369    fn strikethrough() {
370        let root = parse("~~deleted~~\n");
371        match &root.children[0] {
372            Node::Paragraph(p) => {
373                assert!(
374                    p.children
375                        .iter()
376                        .any(|n| matches!(n, Node::Strikethrough(_)))
377                );
378            }
379            other => panic!("Expected paragraph, got {:?}", other),
380        }
381    }
382
383    #[test]
384    fn task_list() {
385        let root = parse("- [x] done\n- [ ] todo\n");
386        match &root.children[0] {
387            Node::List(l) => {
388                assert_eq!(l.children.len(), 2);
389                match &l.children[0] {
390                    Node::ListItem(li) => assert_eq!(li.checked, Some(true)),
391                    other => panic!("Expected list item, got {:?}", other),
392                }
393                match &l.children[1] {
394                    Node::ListItem(li) => assert_eq!(li.checked, Some(false)),
395                    other => panic!("Expected list item, got {:?}", other),
396                }
397            }
398            other => panic!("Expected list, got {:?}", other),
399        }
400    }
401
402    #[test]
403    fn link_with_url_and_title() {
404        let root = parse("[click](https://example.com \"My Title\")\n");
405        match &root.children[0] {
406            Node::Paragraph(p) => match &p.children[0] {
407                Node::Link(l) => {
408                    assert_eq!(l.url, "https://example.com");
409                    assert_eq!(l.title.as_deref(), Some("My Title"));
410                    assert!(!l.children.is_empty());
411                }
412                other => panic!("Expected link, got {:?}", other),
413            },
414            other => panic!("Expected paragraph, got {:?}", other),
415        }
416    }
417
418    #[test]
419    fn image_with_url() {
420        let root = parse("![alt text](image.png)\n");
421        match &root.children[0] {
422            Node::Paragraph(p) => match &p.children[0] {
423                Node::Image(i) => {
424                    assert_eq!(i.url, "image.png");
425                }
426                other => panic!("Expected image, got {:?}", other),
427            },
428            other => panic!("Expected paragraph, got {:?}", other),
429        }
430    }
431
432    #[test]
433    fn code_block_with_language() {
434        let root = parse("```rust\nlet x = 1;\n```\n");
435        match &root.children[0] {
436            Node::CodeBlock(cb) => {
437                assert_eq!(cb.lang.as_deref(), Some("rust"));
438                assert_eq!(cb.value, "let x = 1;\n");
439            }
440            other => panic!("Expected code block, got {:?}", other),
441        }
442    }
443
444    #[test]
445    fn footnote() {
446        let root = parse("Text[^1].\n\n[^1]: Footnote content.\n");
447        let has_ref = root.children.iter().any(|n| {
448            if let Node::Paragraph(p) = n {
449                p.children
450                    .iter()
451                    .any(|c| matches!(c, Node::FootnoteReference(_)))
452            } else {
453                false
454            }
455        });
456        let has_def = root
457            .children
458            .iter()
459            .any(|n| matches!(n, Node::FootnoteDefinition(_)));
460        assert!(
461            has_ref,
462            "Should have footnote reference: {:?}",
463            root.children
464        );
465        assert!(
466            has_def,
467            "Should have footnote definition: {:?}",
468            root.children
469        );
470    }
471
472    #[test]
473    fn inline_math() {
474        let root = parse("The equation $x^2 + y^2 = z^2$ is famous.\n");
475        match &root.children[0] {
476            Node::Paragraph(p) => {
477                let has_math = p
478                    .children
479                    .iter()
480                    .any(|n| matches!(n, Node::MathInline(t) if t.value == "x^2 + y^2 = z^2"));
481                assert!(has_math, "Should contain inline math: {:?}", p.children);
482            }
483            other => panic!("Expected paragraph, got {:?}", other),
484        }
485    }
486
487    #[test]
488    fn display_math_block() {
489        let root = parse("$$\nE = mc^2\n$$\n");
490        let has_math = root
491            .children
492            .iter()
493            .any(|n| matches!(n, Node::MathDisplay(t) if t.value.contains("E = mc^2")));
494        assert!(has_math, "Should contain display math: {:?}", root.children);
495    }
496
497    #[test]
498    fn math_does_not_conflict_with_variables() {
499        let root = parse("Price is {$amount} dollars.\n");
500        match &root.children[0] {
501            Node::Paragraph(p) => {
502                let has_var = p
503                    .children
504                    .iter()
505                    .any(|n| matches!(n, Node::Variable(v) if v.path == "amount"));
506                let has_math = p.children.iter().any(|n| matches!(n, Node::MathInline(_)));
507                assert!(has_var, "Should contain variable: {:?}", p.children);
508                assert!(!has_math, "Should NOT contain math: {:?}", p.children);
509            }
510            other => panic!("Expected paragraph, got {:?}", other),
511        }
512    }
513
514    #[test]
515    fn variable_path_validation() {
516        assert!(is_valid_variable_path("title"));
517        assert!(is_valid_variable_path("frontmatter.title"));
518        assert!(is_valid_variable_path("config.theme_name"));
519        assert!(is_valid_variable_path("_private"));
520        assert!(!is_valid_variable_path(""));
521        assert!(!is_valid_variable_path("123abc"));
522        assert!(!is_valid_variable_path("foo..bar"));
523        assert!(!is_valid_variable_path(".foo"));
524        assert!(!is_valid_variable_path("foo."));
525    }
526
527    #[test]
528    fn mixed_fence_chars_not_cross_closed() {
529        // A ~~~ fence should NOT be closed by ```
530        let root = parse("~~~\nstill fenced\n```\nstill fenced\n~~~\n\nAfter fence.\n");
531        // The ``` inside should be treated as content, not close the fence
532        // "After fence." should be a separate paragraph, not part of code block content
533        match &root.children[0] {
534            Node::CodeBlock(cb) => {
535                assert!(
536                    cb.value.contains("```"),
537                    "``` should be content inside ~~~ fence: {:?}",
538                    cb.value
539                );
540                assert!(
541                    cb.value.contains("still fenced"),
542                    "Content should be preserved: {:?}",
543                    cb.value
544                );
545            }
546            other => panic!("Expected code block, got {:?}", other),
547        }
548    }
549}