Skip to main content

rdx_parser/
lib.rs

1pub use rdx_ast::*;
2
3mod attributes;
4mod frontmatter;
5mod markdown;
6mod scanner;
7mod source_map;
8mod tags;
9mod text;
10
11use scanner::Segment;
12use source_map::SourceMap;
13
14/// Parse an RDX document string into a compliant AST.
15///
16/// This is the primary entry point for the parser. It handles:
17/// - YAML frontmatter extraction (spec 2.1)
18/// - CommonMark block structure via pulldown-cmark
19/// - RDX component tags with typed attributes (spec 2.2, 2.3)
20/// - Variable interpolation in text (spec 2.4)
21/// - Escape sequences (spec 2.5)
22/// - HTML pass-through for lowercase tags (spec 2.6)
23/// - Error nodes for malformed constructs (spec 3)
24pub fn parse(input: &str) -> Root {
25    let sm = SourceMap::new(input);
26    let (frontmatter, body_start) = frontmatter::extract_frontmatter(input);
27    let body = &input[body_start..];
28    let children = parse_body(body, body_start, &sm, input);
29
30    Root {
31        node_type: RootType::Root,
32        frontmatter,
33        children,
34        position: sm.position(0, input.len()),
35    }
36}
37
38/// Validate a variable path against spec 2.4.1 grammar:
39/// `[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)*`
40pub(crate) fn is_valid_variable_path(path: &str) -> bool {
41    if path.is_empty() {
42        return false;
43    }
44    for segment in path.split('.') {
45        if segment.is_empty() {
46            return false;
47        }
48        let bytes = segment.as_bytes();
49        if !bytes[0].is_ascii_alphabetic() && bytes[0] != b'_' {
50            return false;
51        }
52        for &b in &bytes[1..] {
53            if !b.is_ascii_alphanumeric() && b != b'_' {
54                return false;
55            }
56        }
57    }
58    true
59}
60
61/// Recursively parse a body region into AST nodes.
62/// Splits into markdown vs block-component segments, processes each accordingly.
63fn parse_body(body: &str, base_offset: usize, sm: &SourceMap, full_input: &str) -> Vec<Node> {
64    let segments = scanner::scan_segments(body, base_offset, sm);
65    let mut nodes = Vec::new();
66
67    for seg in segments {
68        match seg {
69            Segment::Markdown { start, end } => {
70                let text = &full_input[start..end];
71                nodes.extend(markdown::parse_markdown_region(text, start, sm, full_input));
72            }
73            Segment::BlockComponent {
74                tag,
75                body_start,
76                body_end,
77                close_end,
78            } => {
79                let inner = if body_start <= body_end {
80                    &full_input[body_start..body_end]
81                } else {
82                    ""
83                };
84                let children = parse_body(inner, body_start, sm, full_input);
85                let raw_content = if body_start <= body_end {
86                    full_input[body_start..body_end].to_string()
87                } else {
88                    String::new()
89                };
90                nodes.push(Node::Component(ComponentNode {
91                    name: tag.name,
92                    is_inline: false,
93                    attributes: tag.attributes,
94                    children,
95                    raw_content,
96                    position: sm.position(tag.start, close_end),
97                }));
98            }
99            Segment::BlockSelfClosing { tag } => {
100                nodes.push(Node::Component(ComponentNode {
101                    name: tag.name,
102                    is_inline: false,
103                    attributes: tag.attributes,
104                    children: vec![],
105                    raw_content: String::new(),
106                    position: sm.position(tag.start, tag.end),
107                }));
108            }
109            Segment::MathBlock {
110                value_start,
111                value_end,
112                block_end,
113            } => {
114                let value = if value_start <= value_end {
115                    full_input[value_start..value_end].to_string()
116                } else {
117                    String::new()
118                };
119                nodes.push(Node::MathDisplay(TextNode {
120                    value,
121                    position: sm.position(value_start.saturating_sub(3), block_end), // include $$
122                }));
123            }
124            Segment::Error {
125                message,
126                raw,
127                start,
128                end,
129            } => {
130                nodes.push(Node::Error(ErrorNode {
131                    message,
132                    raw_content: raw,
133                    position: sm.position(start, end),
134                }));
135            }
136        }
137    }
138
139    nodes
140}
141
142#[cfg(test)]
143mod tests {
144    use super::*;
145
146    #[test]
147    fn empty_document() {
148        let root = parse("");
149        assert_eq!(root.node_type, RootType::Root);
150        assert_eq!(root.frontmatter, None);
151        assert!(root.children.is_empty());
152    }
153
154    #[test]
155    fn frontmatter_only() {
156        let root = parse("---\ntitle: Hello\nversion: 2\n---\n");
157        assert!(root.frontmatter.is_some());
158        let fm = root.frontmatter.unwrap();
159        assert_eq!(fm["title"], "Hello");
160        assert_eq!(fm["version"], 2);
161    }
162
163    #[test]
164    fn frontmatter_no_trailing_content() {
165        let root = parse("---\nfoo: bar\n---");
166        assert!(root.frontmatter.is_some());
167        assert_eq!(root.frontmatter.unwrap()["foo"], "bar");
168    }
169
170    #[test]
171    fn no_frontmatter_when_not_at_line1() {
172        let root = parse("\n---\ntitle: Hello\n---\n");
173        assert_eq!(root.frontmatter, None);
174    }
175
176    #[test]
177    fn frontmatter_plus_content() {
178        let input = "---\ntitle: Test\n---\n# Hello\n";
179        let root = parse(input);
180        assert!(root.frontmatter.is_some());
181        assert_eq!(root.frontmatter.unwrap()["title"], "Test");
182        assert!(matches!(&root.children[0], Node::Heading(_)));
183    }
184
185    #[test]
186    fn pure_markdown_heading() {
187        let root = parse("# Hello World\n");
188        match &root.children[0] {
189            Node::Heading(block) => {
190                assert_eq!(block.depth, Some(1));
191                match &block.children[0] {
192                    Node::Text(t) => assert_eq!(t.value, "Hello World"),
193                    other => panic!("Expected text, got {:?}", other),
194                }
195            }
196            other => panic!("Expected heading, got {:?}", other),
197        }
198    }
199
200    #[test]
201    fn paragraph_with_emphasis() {
202        let root = parse("This is **bold** text.\n");
203        match &root.children[0] {
204            Node::Paragraph(block) => {
205                assert!(block.children.len() >= 3);
206                assert!(matches!(&block.children[1], Node::Strong(_)));
207            }
208            other => panic!("Expected paragraph, got {:?}", other),
209        }
210    }
211
212    #[test]
213    fn self_closing_block_component() {
214        let root = parse("<Badge status=\"beta\" />\n");
215        match &root.children[0] {
216            Node::Component(c) => {
217                assert_eq!(c.name, "Badge");
218                assert!(!c.is_inline);
219                assert!(c.children.is_empty());
220                assert_eq!(c.attributes[0].name, "status");
221                assert_eq!(c.attributes[0].value, AttributeValue::String("beta".into()));
222            }
223            other => panic!("Expected component, got {:?}", other),
224        }
225    }
226
227    #[test]
228    fn block_component_with_children() {
229        let root = parse("<Notice type=\"warning\">\nThis is a warning.\n</Notice>\n");
230        match &root.children[0] {
231            Node::Component(c) => {
232                assert_eq!(c.name, "Notice");
233                assert!(!c.is_inline);
234                assert!(!c.children.is_empty());
235            }
236            other => panic!("Expected component, got {:?}", other),
237        }
238    }
239
240    #[test]
241    fn nested_components() {
242        let root = parse("<Outer>\n<Inner>\nText\n</Inner>\n</Outer>\n");
243        match &root.children[0] {
244            Node::Component(outer) => {
245                assert_eq!(outer.name, "Outer");
246                match &outer.children[0] {
247                    Node::Component(inner) => assert_eq!(inner.name, "Inner"),
248                    other => panic!("Expected inner, got {:?}", other),
249                }
250            }
251            other => panic!("Expected outer, got {:?}", other),
252        }
253    }
254
255    #[test]
256    fn inline_self_closing_component() {
257        let root = parse("Text with <Badge /> inline.\n");
258        match &root.children[0] {
259            Node::Paragraph(p) => {
260                let has_badge = p
261                    .children
262                    .iter()
263                    .any(|n| matches!(n, Node::Component(c) if c.name == "Badge" && c.is_inline));
264                assert!(has_badge, "Should contain inline Badge: {:?}", p.children);
265            }
266            other => panic!("Expected paragraph, got {:?}", other),
267        }
268    }
269
270    #[test]
271    fn unclosed_block_component() {
272        let root = parse("<Notice>\nContent\n");
273        let has_error = root.children.iter().any(|n| matches!(n, Node::Error(_)));
274        assert!(
275            has_error,
276            "Should have error for unclosed tag: {:?}",
277            root.children
278        );
279    }
280
281    #[test]
282    fn stray_close_tag() {
283        let root = parse("</Notice>\n");
284        let has_error = root.children.iter().any(|n| matches!(n, Node::Error(_)));
285        assert!(
286            has_error,
287            "Should have error for stray close tag: {:?}",
288            root.children
289        );
290    }
291
292    #[test]
293    fn html_passthrough() {
294        let root = parse("<div>hello</div>\n");
295        let has_component = root
296            .children
297            .iter()
298            .any(|n| matches!(n, Node::Component(_)));
299        assert!(
300            !has_component,
301            "Lowercase HTML should not be component: {:?}",
302            root.children
303        );
304    }
305
306    #[test]
307    fn thematic_break_not_frontmatter() {
308        let root = parse("Hello\n\n---\n\nWorld\n");
309        assert_eq!(root.frontmatter, None);
310        let has_break = root
311            .children
312            .iter()
313            .any(|n| matches!(n, Node::ThematicBreak(_)));
314        assert!(has_break);
315    }
316
317    #[test]
318    fn position_tracking() {
319        let root = parse("# Hi\n");
320        assert_eq!(root.position.start.line, 1);
321        assert_eq!(root.position.start.column, 1);
322        assert_eq!(root.position.start.offset, 0);
323    }
324
325    #[test]
326    fn mixed_markdown_and_components() {
327        let input =
328            "# Title\n\n<Notice type=\"info\">\nSome **bold** content.\n</Notice>\n\nMore text.\n";
329        let root = parse(input);
330        assert!(root.children.len() >= 3);
331        assert!(matches!(&root.children[0], Node::Heading(_)));
332        assert!(matches!(&root.children[1], Node::Component(_)));
333        assert!(matches!(&root.children[2], Node::Paragraph(_)));
334    }
335
336    #[test]
337    fn component_with_markdown_children() {
338        let root = parse("<Notice>\n**Bold** and *italic*.\n</Notice>\n");
339        match &root.children[0] {
340            Node::Component(c) => {
341                assert_eq!(c.name, "Notice");
342                assert!(!c.children.is_empty());
343            }
344            other => panic!("Expected component, got {:?}", other),
345        }
346    }
347
348    #[test]
349    fn list_parsing() {
350        let root = parse("- item 1\n- item 2\n");
351        match &root.children[0] {
352            Node::List(l) => {
353                assert_eq!(l.ordered, Some(false));
354                assert_eq!(l.children.len(), 2);
355            }
356            other => panic!("Expected list, got {:?}", other),
357        }
358    }
359
360    #[test]
361    fn ordered_list() {
362        let root = parse("1. first\n2. second\n");
363        match &root.children[0] {
364            Node::List(l) => assert_eq!(l.ordered, Some(true)),
365            other => panic!("Expected list, got {:?}", other),
366        }
367    }
368
369    #[test]
370    fn blockquote() {
371        let root = parse("> quoted text\n");
372        assert!(matches!(&root.children[0], Node::Blockquote(_)));
373    }
374
375    #[test]
376    fn strikethrough() {
377        let root = parse("~~deleted~~\n");
378        match &root.children[0] {
379            Node::Paragraph(p) => {
380                assert!(
381                    p.children
382                        .iter()
383                        .any(|n| matches!(n, Node::Strikethrough(_)))
384                );
385            }
386            other => panic!("Expected paragraph, got {:?}", other),
387        }
388    }
389
390    #[test]
391    fn task_list() {
392        let root = parse("- [x] done\n- [ ] todo\n");
393        match &root.children[0] {
394            Node::List(l) => {
395                assert_eq!(l.children.len(), 2);
396                match &l.children[0] {
397                    Node::ListItem(li) => assert_eq!(li.checked, Some(true)),
398                    other => panic!("Expected list item, got {:?}", other),
399                }
400                match &l.children[1] {
401                    Node::ListItem(li) => assert_eq!(li.checked, Some(false)),
402                    other => panic!("Expected list item, got {:?}", other),
403                }
404            }
405            other => panic!("Expected list, got {:?}", other),
406        }
407    }
408
409    #[test]
410    fn link_with_url_and_title() {
411        let root = parse("[click](https://example.com \"My Title\")\n");
412        match &root.children[0] {
413            Node::Paragraph(p) => match &p.children[0] {
414                Node::Link(l) => {
415                    assert_eq!(l.url, "https://example.com");
416                    assert_eq!(l.title.as_deref(), Some("My Title"));
417                    assert!(!l.children.is_empty());
418                }
419                other => panic!("Expected link, got {:?}", other),
420            },
421            other => panic!("Expected paragraph, got {:?}", other),
422        }
423    }
424
425    #[test]
426    fn image_with_url() {
427        let root = parse("![alt text](image.png)\n");
428        match &root.children[0] {
429            Node::Paragraph(p) => match &p.children[0] {
430                Node::Image(i) => {
431                    assert_eq!(i.url, "image.png");
432                }
433                other => panic!("Expected image, got {:?}", other),
434            },
435            other => panic!("Expected paragraph, got {:?}", other),
436        }
437    }
438
439    #[test]
440    fn code_block_with_language() {
441        let root = parse("```rust\nlet x = 1;\n```\n");
442        match &root.children[0] {
443            Node::CodeBlock(cb) => {
444                assert_eq!(cb.lang.as_deref(), Some("rust"));
445                assert_eq!(cb.value, "let x = 1;\n");
446            }
447            other => panic!("Expected code block, got {:?}", other),
448        }
449    }
450
451    #[test]
452    fn footnote() {
453        let root = parse("Text[^1].\n\n[^1]: Footnote content.\n");
454        let has_ref = root.children.iter().any(|n| {
455            if let Node::Paragraph(p) = n {
456                p.children
457                    .iter()
458                    .any(|c| matches!(c, Node::FootnoteReference(_)))
459            } else {
460                false
461            }
462        });
463        let has_def = root
464            .children
465            .iter()
466            .any(|n| matches!(n, Node::FootnoteDefinition(_)));
467        assert!(
468            has_ref,
469            "Should have footnote reference: {:?}",
470            root.children
471        );
472        assert!(
473            has_def,
474            "Should have footnote definition: {:?}",
475            root.children
476        );
477    }
478
479    #[test]
480    fn inline_math() {
481        let root = parse("The equation $x^2 + y^2 = z^2$ is famous.\n");
482        match &root.children[0] {
483            Node::Paragraph(p) => {
484                let has_math = p
485                    .children
486                    .iter()
487                    .any(|n| matches!(n, Node::MathInline(t) if t.value == "x^2 + y^2 = z^2"));
488                assert!(has_math, "Should contain inline math: {:?}", p.children);
489            }
490            other => panic!("Expected paragraph, got {:?}", other),
491        }
492    }
493
494    #[test]
495    fn display_math_block() {
496        let root = parse("$$\nE = mc^2\n$$\n");
497        let has_math = root
498            .children
499            .iter()
500            .any(|n| matches!(n, Node::MathDisplay(t) if t.value.contains("E = mc^2")));
501        assert!(has_math, "Should contain display math: {:?}", root.children);
502    }
503
504    #[test]
505    fn math_does_not_conflict_with_variables() {
506        let root = parse("Price is {$amount} dollars.\n");
507        match &root.children[0] {
508            Node::Paragraph(p) => {
509                let has_var = p
510                    .children
511                    .iter()
512                    .any(|n| matches!(n, Node::Variable(v) if v.path == "amount"));
513                let has_math = p.children.iter().any(|n| matches!(n, Node::MathInline(_)));
514                assert!(has_var, "Should contain variable: {:?}", p.children);
515                assert!(!has_math, "Should NOT contain math: {:?}", p.children);
516            }
517            other => panic!("Expected paragraph, got {:?}", other),
518        }
519    }
520
521    #[test]
522    fn variable_path_validation() {
523        assert!(is_valid_variable_path("title"));
524        assert!(is_valid_variable_path("frontmatter.title"));
525        assert!(is_valid_variable_path("config.theme_name"));
526        assert!(is_valid_variable_path("_private"));
527        assert!(!is_valid_variable_path(""));
528        assert!(!is_valid_variable_path("123abc"));
529        assert!(!is_valid_variable_path("foo..bar"));
530        assert!(!is_valid_variable_path(".foo"));
531        assert!(!is_valid_variable_path("foo."));
532    }
533
534    #[test]
535    fn mixed_fence_chars_not_cross_closed() {
536        // A ~~~ fence should NOT be closed by ```
537        let root = parse("~~~\nstill fenced\n```\nstill fenced\n~~~\n\nAfter fence.\n");
538        // The ``` inside should be treated as content, not close the fence
539        // "After fence." should be a separate paragraph, not part of code block content
540        match &root.children[0] {
541            Node::CodeBlock(cb) => {
542                assert!(
543                    cb.value.contains("```"),
544                    "``` should be content inside ~~~ fence: {:?}",
545                    cb.value
546                );
547                assert!(
548                    cb.value.contains("still fenced"),
549                    "Content should be preserved: {:?}",
550                    cb.value
551                );
552            }
553            other => panic!("Expected code block, got {:?}", other),
554        }
555    }
556}