Skip to main content

rdx_parser/
lib.rs

1pub use rdx_ast::*;
2
3mod attributes;
4mod frontmatter;
5mod markdown;
6mod scanner;
7mod source_map;
8mod tags;
9mod text;
10
11use scanner::Segment;
12use source_map::SourceMap;
13
14/// Parse an RDX document string into a compliant AST.
15///
16/// This is the primary entry point for the parser. It handles:
17/// - YAML frontmatter extraction (spec 2.1)
18/// - CommonMark block structure via pulldown-cmark
19/// - RDX component tags with typed attributes (spec 2.2, 2.3)
20/// - Variable interpolation in text (spec 2.4)
21/// - Escape sequences (spec 2.5)
22/// - HTML pass-through for lowercase tags (spec 2.6)
23/// - Error nodes for malformed constructs (spec 3)
24pub fn parse(input: &str) -> Root {
25    let sm = SourceMap::new(input);
26    let (frontmatter, body_start) = frontmatter::extract_frontmatter(input);
27    let body = &input[body_start..];
28    let children = parse_body(body, body_start, &sm, input);
29
30    Root {
31        node_type: RootType::Root,
32        frontmatter,
33        children,
34        position: sm.position(0, input.len()),
35    }
36}
37
38/// Validate a variable path against spec 2.4.1 grammar:
39/// `[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)*`
40pub(crate) fn is_valid_variable_path(path: &str) -> bool {
41    if path.is_empty() {
42        return false;
43    }
44    for segment in path.split('.') {
45        if segment.is_empty() {
46            return false;
47        }
48        let bytes = segment.as_bytes();
49        if !bytes[0].is_ascii_alphabetic() && bytes[0] != b'_' {
50            return false;
51        }
52        for &b in &bytes[1..] {
53            if !b.is_ascii_alphanumeric() && b != b'_' {
54                return false;
55            }
56        }
57    }
58    true
59}
60
61/// Recursively parse a body region into AST nodes.
62/// Splits into markdown vs block-component segments, processes each accordingly.
63fn parse_body(body: &str, base_offset: usize, sm: &SourceMap, full_input: &str) -> Vec<Node> {
64    let segments = scanner::scan_segments(body, base_offset, sm);
65    let mut nodes = Vec::new();
66
67    for seg in segments {
68        match seg {
69            Segment::Markdown { start, end } => {
70                let text = &full_input[start..end];
71                nodes.extend(markdown::parse_markdown_region(text, start, sm, full_input));
72            }
73            Segment::BlockComponent {
74                tag,
75                body_start,
76                body_end,
77                close_end,
78            } => {
79                let inner = if body_start <= body_end {
80                    &full_input[body_start..body_end]
81                } else {
82                    ""
83                };
84                let children = parse_body(inner, body_start, sm, full_input);
85                let raw_content = if body_start <= body_end {
86                    full_input[body_start..body_end].to_string()
87                } else {
88                    String::new()
89                };
90                nodes.push(Node::Component(ComponentNode {
91                    name: tag.name,
92                    is_inline: false,
93                    attributes: tag.attributes,
94                    children,
95                    raw_content,
96                    position: sm.position(tag.start, close_end),
97                }));
98            }
99            Segment::BlockSelfClosing { tag } => {
100                nodes.push(Node::Component(ComponentNode {
101                    name: tag.name,
102                    is_inline: false,
103                    attributes: tag.attributes,
104                    children: vec![],
105                    raw_content: String::new(),
106                    position: sm.position(tag.start, tag.end),
107                }));
108            }
109            Segment::MathBlock {
110                value_start,
111                value_end,
112                block_end,
113                label,
114            } => {
115                let raw = if value_start <= value_end {
116                    full_input[value_start..value_end].to_string()
117                } else {
118                    String::new()
119                };
120                let tree = rdx_math::parse(&raw);
121                nodes.push(Node::MathDisplay(MathDisplayNode {
122                    raw,
123                    tree,
124                    label,
125                    position: sm.position(value_start.saturating_sub(3), block_end), // include $$
126                }));
127            }
128            Segment::Error {
129                message,
130                raw,
131                start,
132                end,
133            } => {
134                nodes.push(Node::Error(ErrorNode {
135                    message,
136                    raw_content: raw,
137                    position: sm.position(start, end),
138                }));
139            }
140        }
141    }
142
143    nodes
144}
145
146#[cfg(test)]
147mod tests {
148    use super::*;
149
150    #[test]
151    fn empty_document() {
152        let root = parse("");
153        assert_eq!(root.node_type, RootType::Root);
154        assert_eq!(root.frontmatter, None);
155        assert!(root.children.is_empty());
156    }
157
158    #[test]
159    fn frontmatter_only() {
160        let root = parse("---\ntitle: Hello\nversion: 2\n---\n");
161        assert!(root.frontmatter.is_some());
162        let fm = root.frontmatter.unwrap();
163        assert_eq!(fm["title"], "Hello");
164        assert_eq!(fm["version"], 2);
165    }
166
167    #[test]
168    fn frontmatter_no_trailing_content() {
169        let root = parse("---\nfoo: bar\n---");
170        assert!(root.frontmatter.is_some());
171        assert_eq!(root.frontmatter.unwrap()["foo"], "bar");
172    }
173
174    #[test]
175    fn no_frontmatter_when_not_at_line1() {
176        let root = parse("\n---\ntitle: Hello\n---\n");
177        assert_eq!(root.frontmatter, None);
178    }
179
180    #[test]
181    fn frontmatter_plus_content() {
182        let input = "---\ntitle: Test\n---\n# Hello\n";
183        let root = parse(input);
184        assert!(root.frontmatter.is_some());
185        assert_eq!(root.frontmatter.unwrap()["title"], "Test");
186        assert!(matches!(&root.children[0], Node::Heading(_)));
187    }
188
189    #[test]
190    fn pure_markdown_heading() {
191        let root = parse("# Hello World\n");
192        match &root.children[0] {
193            Node::Heading(block) => {
194                assert_eq!(block.depth, Some(1));
195                match &block.children[0] {
196                    Node::Text(t) => assert_eq!(t.value, "Hello World"),
197                    other => panic!("Expected text, got {:?}", other),
198                }
199            }
200            other => panic!("Expected heading, got {:?}", other),
201        }
202    }
203
204    #[test]
205    fn paragraph_with_emphasis() {
206        let root = parse("This is **bold** text.\n");
207        match &root.children[0] {
208            Node::Paragraph(block) => {
209                assert!(block.children.len() >= 3);
210                assert!(matches!(&block.children[1], Node::Strong(_)));
211            }
212            other => panic!("Expected paragraph, got {:?}", other),
213        }
214    }
215
216    #[test]
217    fn self_closing_block_component() {
218        let root = parse("<Badge status=\"beta\" />\n");
219        match &root.children[0] {
220            Node::Component(c) => {
221                assert_eq!(c.name, "Badge");
222                assert!(!c.is_inline);
223                assert!(c.children.is_empty());
224                assert_eq!(c.attributes[0].name, "status");
225                assert_eq!(c.attributes[0].value, AttributeValue::String("beta".into()));
226            }
227            other => panic!("Expected component, got {:?}", other),
228        }
229    }
230
231    #[test]
232    fn block_component_with_children() {
233        let root = parse("<Notice type=\"warning\">\nThis is a warning.\n</Notice>\n");
234        match &root.children[0] {
235            Node::Component(c) => {
236                assert_eq!(c.name, "Notice");
237                assert!(!c.is_inline);
238                assert!(!c.children.is_empty());
239            }
240            other => panic!("Expected component, got {:?}", other),
241        }
242    }
243
244    #[test]
245    fn nested_components() {
246        let root = parse("<Outer>\n<Inner>\nText\n</Inner>\n</Outer>\n");
247        match &root.children[0] {
248            Node::Component(outer) => {
249                assert_eq!(outer.name, "Outer");
250                match &outer.children[0] {
251                    Node::Component(inner) => assert_eq!(inner.name, "Inner"),
252                    other => panic!("Expected inner, got {:?}", other),
253                }
254            }
255            other => panic!("Expected outer, got {:?}", other),
256        }
257    }
258
259    #[test]
260    fn inline_self_closing_component() {
261        let root = parse("Text with <Badge /> inline.\n");
262        match &root.children[0] {
263            Node::Paragraph(p) => {
264                let has_badge = p
265                    .children
266                    .iter()
267                    .any(|n| matches!(n, Node::Component(c) if c.name == "Badge" && c.is_inline));
268                assert!(has_badge, "Should contain inline Badge: {:?}", p.children);
269            }
270            other => panic!("Expected paragraph, got {:?}", other),
271        }
272    }
273
274    #[test]
275    fn unclosed_block_component() {
276        let root = parse("<Notice>\nContent\n");
277        let has_error = root.children.iter().any(|n| matches!(n, Node::Error(_)));
278        assert!(
279            has_error,
280            "Should have error for unclosed tag: {:?}",
281            root.children
282        );
283    }
284
285    #[test]
286    fn stray_close_tag() {
287        let root = parse("</Notice>\n");
288        let has_error = root.children.iter().any(|n| matches!(n, Node::Error(_)));
289        assert!(
290            has_error,
291            "Should have error for stray close tag: {:?}",
292            root.children
293        );
294    }
295
296    #[test]
297    fn html_passthrough() {
298        let root = parse("<div>hello</div>\n");
299        let has_component = root
300            .children
301            .iter()
302            .any(|n| matches!(n, Node::Component(_)));
303        assert!(
304            !has_component,
305            "Lowercase HTML should not be component: {:?}",
306            root.children
307        );
308    }
309
310    #[test]
311    fn thematic_break_not_frontmatter() {
312        let root = parse("Hello\n\n---\n\nWorld\n");
313        assert_eq!(root.frontmatter, None);
314        let has_break = root
315            .children
316            .iter()
317            .any(|n| matches!(n, Node::ThematicBreak(_)));
318        assert!(has_break);
319    }
320
321    #[test]
322    fn position_tracking() {
323        let root = parse("# Hi\n");
324        assert_eq!(root.position.start.line, 1);
325        assert_eq!(root.position.start.column, 1);
326        assert_eq!(root.position.start.offset, 0);
327    }
328
329    #[test]
330    fn mixed_markdown_and_components() {
331        let input =
332            "# Title\n\n<Notice type=\"info\">\nSome **bold** content.\n</Notice>\n\nMore text.\n";
333        let root = parse(input);
334        assert!(root.children.len() >= 3);
335        assert!(matches!(&root.children[0], Node::Heading(_)));
336        assert!(matches!(&root.children[1], Node::Component(_)));
337        assert!(matches!(&root.children[2], Node::Paragraph(_)));
338    }
339
340    #[test]
341    fn component_with_markdown_children() {
342        let root = parse("<Notice>\n**Bold** and *italic*.\n</Notice>\n");
343        match &root.children[0] {
344            Node::Component(c) => {
345                assert_eq!(c.name, "Notice");
346                assert!(!c.children.is_empty());
347            }
348            other => panic!("Expected component, got {:?}", other),
349        }
350    }
351
352    #[test]
353    fn list_parsing() {
354        let root = parse("- item 1\n- item 2\n");
355        match &root.children[0] {
356            Node::List(l) => {
357                assert_eq!(l.ordered, Some(false));
358                assert_eq!(l.children.len(), 2);
359            }
360            other => panic!("Expected list, got {:?}", other),
361        }
362    }
363
364    #[test]
365    fn ordered_list() {
366        let root = parse("1. first\n2. second\n");
367        match &root.children[0] {
368            Node::List(l) => assert_eq!(l.ordered, Some(true)),
369            other => panic!("Expected list, got {:?}", other),
370        }
371    }
372
373    #[test]
374    fn blockquote() {
375        let root = parse("> quoted text\n");
376        assert!(matches!(&root.children[0], Node::Blockquote(_)));
377    }
378
379    #[test]
380    fn strikethrough() {
381        let root = parse("~~deleted~~\n");
382        match &root.children[0] {
383            Node::Paragraph(p) => {
384                assert!(
385                    p.children
386                        .iter()
387                        .any(|n| matches!(n, Node::Strikethrough(_)))
388                );
389            }
390            other => panic!("Expected paragraph, got {:?}", other),
391        }
392    }
393
394    #[test]
395    fn task_list() {
396        let root = parse("- [x] done\n- [ ] todo\n");
397        match &root.children[0] {
398            Node::List(l) => {
399                assert_eq!(l.children.len(), 2);
400                match &l.children[0] {
401                    Node::ListItem(li) => assert_eq!(li.checked, Some(true)),
402                    other => panic!("Expected list item, got {:?}", other),
403                }
404                match &l.children[1] {
405                    Node::ListItem(li) => assert_eq!(li.checked, Some(false)),
406                    other => panic!("Expected list item, got {:?}", other),
407                }
408            }
409            other => panic!("Expected list, got {:?}", other),
410        }
411    }
412
413    #[test]
414    fn link_with_url_and_title() {
415        let root = parse("[click](https://example.com \"My Title\")\n");
416        match &root.children[0] {
417            Node::Paragraph(p) => match &p.children[0] {
418                Node::Link(l) => {
419                    assert_eq!(l.url, "https://example.com");
420                    assert_eq!(l.title.as_deref(), Some("My Title"));
421                    assert!(!l.children.is_empty());
422                }
423                other => panic!("Expected link, got {:?}", other),
424            },
425            other => panic!("Expected paragraph, got {:?}", other),
426        }
427    }
428
429    #[test]
430    fn image_with_url() {
431        let root = parse("![alt text](image.png)\n");
432        match &root.children[0] {
433            Node::Paragraph(p) => match &p.children[0] {
434                Node::Image(i) => {
435                    assert_eq!(i.url, "image.png");
436                }
437                other => panic!("Expected image, got {:?}", other),
438            },
439            other => panic!("Expected paragraph, got {:?}", other),
440        }
441    }
442
443    #[test]
444    fn code_block_with_language() {
445        let root = parse("```rust\nlet x = 1;\n```\n");
446        match &root.children[0] {
447            Node::CodeBlock(cb) => {
448                assert_eq!(cb.lang.as_deref(), Some("rust"));
449                assert_eq!(cb.value, "let x = 1;\n");
450            }
451            other => panic!("Expected code block, got {:?}", other),
452        }
453    }
454
455    #[test]
456    fn footnote() {
457        let root = parse("Text[^1].\n\n[^1]: Footnote content.\n");
458        let has_ref = root.children.iter().any(|n| {
459            if let Node::Paragraph(p) = n {
460                p.children
461                    .iter()
462                    .any(|c| matches!(c, Node::FootnoteReference(_)))
463            } else {
464                false
465            }
466        });
467        let has_def = root
468            .children
469            .iter()
470            .any(|n| matches!(n, Node::FootnoteDefinition(_)));
471        assert!(
472            has_ref,
473            "Should have footnote reference: {:?}",
474            root.children
475        );
476        assert!(
477            has_def,
478            "Should have footnote definition: {:?}",
479            root.children
480        );
481    }
482
483    #[test]
484    fn inline_math() {
485        let root = parse("The equation $x^2 + y^2 = z^2$ is famous.\n");
486        match &root.children[0] {
487            Node::Paragraph(p) => {
488                let has_math = p
489                    .children
490                    .iter()
491                    .any(|n| matches!(n, Node::MathInline(t) if t.raw == "x^2 + y^2 = z^2"));
492                assert!(has_math, "Should contain inline math: {:?}", p.children);
493            }
494            other => panic!("Expected paragraph, got {:?}", other),
495        }
496    }
497
498    #[test]
499    fn display_math_block() {
500        let root = parse("$$\nE = mc^2\n$$\n");
501        let has_math = root
502            .children
503            .iter()
504            .any(|n| matches!(n, Node::MathDisplay(t) if t.raw.contains("E = mc^2")));
505        assert!(has_math, "Should contain display math: {:?}", root.children);
506    }
507
508    #[test]
509    fn math_does_not_conflict_with_variables() {
510        let root = parse("Price is {$amount} dollars.\n");
511        match &root.children[0] {
512            Node::Paragraph(p) => {
513                let has_var = p
514                    .children
515                    .iter()
516                    .any(|n| matches!(n, Node::Variable(v) if v.path == "amount"));
517                let has_math = p.children.iter().any(|n| matches!(n, Node::MathInline(_)));
518                assert!(has_var, "Should contain variable: {:?}", p.children);
519                assert!(!has_math, "Should NOT contain math: {:?}", p.children);
520            }
521            other => panic!("Expected paragraph, got {:?}", other),
522        }
523    }
524
525    #[test]
526    fn variable_path_validation() {
527        assert!(is_valid_variable_path("title"));
528        assert!(is_valid_variable_path("frontmatter.title"));
529        assert!(is_valid_variable_path("config.theme_name"));
530        assert!(is_valid_variable_path("_private"));
531        assert!(!is_valid_variable_path(""));
532        assert!(!is_valid_variable_path("123abc"));
533        assert!(!is_valid_variable_path("foo..bar"));
534        assert!(!is_valid_variable_path(".foo"));
535        assert!(!is_valid_variable_path("foo."));
536    }
537
538    #[test]
539    fn mixed_fence_chars_not_cross_closed() {
540        // A ~~~ fence should NOT be closed by ```
541        let root = parse("~~~\nstill fenced\n```\nstill fenced\n~~~\n\nAfter fence.\n");
542        // The ``` inside should be treated as content, not close the fence
543        // "After fence." should be a separate paragraph, not part of code block content
544        match &root.children[0] {
545            Node::CodeBlock(cb) => {
546                assert!(
547                    cb.value.contains("```"),
548                    "``` should be content inside ~~~ fence: {:?}",
549                    cb.value
550                );
551                assert!(
552                    cb.value.contains("still fenced"),
553                    "Content should be preserved: {:?}",
554                    cb.value
555                );
556            }
557            other => panic!("Expected code block, got {:?}", other),
558        }
559    }
560
561    #[test]
562    fn display_math_block_with_label() {
563        let root = parse("$$ {#eq:euler}\nE = mc^2\n$$\n");
564        let math = root.children.iter().find_map(|n| {
565            if let Node::MathDisplay(m) = n {
566                Some(m)
567            } else {
568                None
569            }
570        });
571        assert!(
572            math.is_some(),
573            "Should have display math: {:?}",
574            root.children
575        );
576        let m = math.unwrap();
577        assert!(
578            m.raw.contains("E = mc^2"),
579            "Math raw should contain content"
580        );
581        assert_eq!(m.label.as_deref(), Some("eq:euler"), "Should have label");
582    }
583
584    #[test]
585    fn display_math_block_without_label() {
586        let root = parse("$$\nE = mc^2\n$$\n");
587        let math = root.children.iter().find_map(|n| {
588            if let Node::MathDisplay(m) = n {
589                Some(m)
590            } else {
591                None
592            }
593        });
594        assert!(
595            math.is_some(),
596            "Should have display math: {:?}",
597            root.children
598        );
599        let m = math.unwrap();
600        assert!(m.label.is_none(), "Should have no label");
601    }
602
603    #[test]
604    fn code_block_meta_title() {
605        let root = parse("```rust title=\"main.rs\"\nfn main() {}\n```\n");
606        match &root.children[0] {
607            Node::CodeBlock(cb) => {
608                assert_eq!(cb.lang.as_deref(), Some("rust"));
609                assert_eq!(cb.title.as_deref(), Some("main.rs"));
610            }
611            other => panic!("Expected code block, got {:?}", other),
612        }
613    }
614
615    #[test]
616    fn code_block_meta_highlight_lines() {
617        let root = parse("```rust {3-5,12}\ncode\n```\n");
618        match &root.children[0] {
619            Node::CodeBlock(cb) => {
620                assert_eq!(cb.lang.as_deref(), Some("rust"));
621                let hl = cb.highlight.as_ref().expect("Should have highlight");
622                assert_eq!(hl, &[3, 4, 5, 12]);
623            }
624            other => panic!("Expected code block, got {:?}", other),
625        }
626    }
627
628    #[test]
629    fn code_block_meta_show_line_numbers() {
630        let root = parse("```rust showLineNumbers\ncode\n```\n");
631        match &root.children[0] {
632            Node::CodeBlock(cb) => {
633                assert_eq!(cb.show_line_numbers, Some(true));
634            }
635            other => panic!("Expected code block, got {:?}", other),
636        }
637    }
638
639    #[test]
640    fn code_block_meta_diff_flag() {
641        let root = parse("```rust diff\ncode\n```\n");
642        match &root.children[0] {
643            Node::CodeBlock(cb) => {
644                assert_eq!(cb.diff, Some(true));
645            }
646            other => panic!("Expected code block, got {:?}", other),
647        }
648    }
649
650    #[test]
651    fn code_block_meta_caption() {
652        let root = parse("```rust caption=\"Listing 1\"\ncode\n```\n");
653        match &root.children[0] {
654            Node::CodeBlock(cb) => {
655                assert_eq!(cb.caption.as_deref(), Some("Listing 1"));
656            }
657            other => panic!("Expected code block, got {:?}", other),
658        }
659    }
660
661    #[test]
662    fn inline_code_with_lang_hint() {
663        let root = parse("Use `SELECT *`{sql} here.\n");
664        match &root.children[0] {
665            Node::Paragraph(p) => {
666                let code = p.children.iter().find_map(|n| {
667                    if let Node::CodeInline(c) = n {
668                        Some(c)
669                    } else {
670                        None
671                    }
672                });
673                assert!(code.is_some(), "Should have inline code: {:?}", p.children);
674                let c = code.unwrap();
675                assert_eq!(c.value, "SELECT *");
676                assert_eq!(c.lang.as_deref(), Some("sql"));
677            }
678            other => panic!("Expected paragraph, got {:?}", other),
679        }
680    }
681
682    #[test]
683    fn definition_list_basic() {
684        let root = parse("Term\n: Definition text\n");
685        let dl = root.children.iter().find_map(|n| {
686            if let Node::DefinitionList(d) = n {
687                Some(d)
688            } else {
689                None
690            }
691        });
692        assert!(
693            dl.is_some(),
694            "Should have definition list: {:?}",
695            root.children
696        );
697        let dl = dl.unwrap();
698        let has_term = dl
699            .children
700            .iter()
701            .any(|n| matches!(n, Node::DefinitionTerm(_)));
702        let has_desc = dl
703            .children
704            .iter()
705            .any(|n| matches!(n, Node::DefinitionDescription(_)));
706        assert!(has_term, "Should have definition term: {:?}", dl.children);
707        assert!(
708            has_desc,
709            "Should have definition description: {:?}",
710            dl.children
711        );
712    }
713
714    #[test]
715    fn inline_code_without_lang_hint() {
716        let root = parse("Use `SELECT *` here.\n");
717        match &root.children[0] {
718            Node::Paragraph(p) => {
719                let code = p.children.iter().find_map(|n| {
720                    if let Node::CodeInline(c) = n {
721                        Some(c)
722                    } else {
723                        None
724                    }
725                });
726                assert!(code.is_some(), "Should have inline code: {:?}", p.children);
727                let c = code.unwrap();
728                assert_eq!(c.value, "SELECT *");
729                assert!(c.lang.is_none());
730            }
731            other => panic!("Expected paragraph, got {:?}", other),
732        }
733    }
734}