Skip to main content

mimium_lang/compiler/parser/
green.rs

1/// Green Tree - Concrete Syntax Tree (CST) for full-fidelity representation
2/// Based on the Red-Green Syntax Tree pattern
3///
4/// Green nodes are immutable and position-independent. They represent
5/// the structure of the syntax tree without absolute positions.
6/// Green trees use interning via SlotMap for efficient sharing and caching.
7use slotmap::{SlotMap, new_key_type};
8
9new_key_type! {
10    /// A handle to an interned GreenNode
11    pub struct GreenNodeId;
12}
13
14/// Green node - represents a CST node without position information
15/// This is the "Green" part of Red-Green Syntax Tree
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub enum GreenNode {
18    /// A token node (leaf)
19    Token {
20        token_index: usize,
21        width: usize, // Length in bytes
22    },
23
24    /// An internal node with children
25    Internal {
26        kind: SyntaxKind,
27        children: Vec<GreenNodeId>,
28        width: usize, // Total width of all children
29    },
30}
31
32impl GreenNode {
33    /// Get the width (length in bytes) of this node
34    pub fn width(&self) -> usize {
35        match self {
36            GreenNode::Token { width, .. } => *width,
37            GreenNode::Internal { width, .. } => *width,
38        }
39    }
40
41    /// Get the kind of this node
42    pub fn kind(&self) -> Option<SyntaxKind> {
43        match self {
44            GreenNode::Token { .. } => None,
45            GreenNode::Internal { kind, .. } => Some(*kind),
46        }
47    }
48
49    /// Get children if this is an internal node
50    pub fn children(&self) -> Option<&[GreenNodeId]> {
51        match self {
52            GreenNode::Token { .. } => None,
53            GreenNode::Internal { children, .. } => Some(children),
54        }
55    }
56}
57
58/// Arena for interning Green nodes
59#[derive(Debug, Default, Clone)]
60pub struct GreenNodeArena {
61    nodes: SlotMap<GreenNodeId, GreenNode>,
62}
63
64impl GreenNodeArena {
65    /// Create a new arena
66    pub fn new() -> Self {
67        Self {
68            nodes: SlotMap::with_key(),
69        }
70    }
71
72    /// Intern a token node
73    pub fn alloc_token(&mut self, token_index: usize, width: usize) -> GreenNodeId {
74        self.nodes.insert(GreenNode::Token { token_index, width })
75    }
76
77    /// Intern an internal node
78    pub fn alloc_internal(&mut self, kind: SyntaxKind, children: Vec<GreenNodeId>) -> GreenNodeId {
79        let width = children.iter().map(|&id| self.nodes[id].width()).sum();
80        self.nodes.insert(GreenNode::Internal {
81            kind,
82            children,
83            width,
84        })
85    }
86
87    /// Get a node by its ID
88    pub fn get(&self, id: GreenNodeId) -> &GreenNode {
89        &self.nodes[id]
90    }
91
92    /// Get the width of a node
93    pub fn width(&self, id: GreenNodeId) -> usize {
94        self.nodes[id].width()
95    }
96
97    /// Get the kind of a node
98    pub fn kind(&self, id: GreenNodeId) -> Option<SyntaxKind> {
99        self.nodes[id].kind()
100    }
101
102    /// Get children of a node
103    pub fn children(&self, id: GreenNodeId) -> Option<&[GreenNodeId]> {
104        self.nodes[id].children()
105    }
106
107    /// Pretty-print the CST tree structure with indentation.
108    ///
109    /// # Arguments
110    /// * `node_id` - The root node to print from
111    /// * `tokens` - Token slice for extracting token text
112    /// * `source` - Original source code
113    /// * `indent` - Current indentation level
114    ///
115    /// # Returns
116    /// A formatted string representation of the tree
117    pub fn print_tree(
118        &self,
119        node_id: GreenNodeId,
120        tokens: &[crate::compiler::parser::Token],
121        source: &str,
122        indent: usize,
123    ) -> String {
124        use std::fmt::Write;
125        let mut result = String::new();
126        let indent_str = "  ".repeat(indent);
127
128        match self.get(node_id) {
129            GreenNode::Token { token_index, .. } => {
130                let token = &tokens[*token_index];
131                let text = token.text(source);
132                // Escape newlines and special characters for display
133                let escaped_text = text
134                    .replace('\\', "\\\\")
135                    .replace('\n', "\\n")
136                    .replace('\r', "\\r")
137                    .replace('\t', "\\t");
138                let _ = writeln!(result, "{indent_str}{:?} \"{}\"", token.kind, escaped_text);
139            }
140            GreenNode::Internal { kind, children, .. } => {
141                let _ = writeln!(result, "{indent_str}{kind}");
142                for &child in children {
143                    result.push_str(&self.print_tree(child, tokens, source, indent + 1));
144                }
145            }
146        }
147        result
148    }
149}
150
151/// Syntax kinds - types of CST nodes
152#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
153pub enum SyntaxKind {
154    // Top-level
155    Program,
156    Statement,
157
158    // Declarations
159    FunctionDecl,
160    LetDecl,
161    LetRecDecl,
162
163    // Expressions
164    BinaryExpr,
165    UnaryExpr,
166    ParenExpr,
167    CallExpr,
168    FieldAccess,
169    IndexExpr,
170    AssignExpr,
171    ArrayExpr,
172    MacroExpansion,
173    BracketExpr,
174    EscapeExpr,
175    LambdaExpr,
176    IfExpr,
177    MatchExpr,
178    MatchArm,
179    MatchArmList,
180    MatchPattern,
181    ConstructorPattern,
182    BlockExpr,
183    TupleExpr,
184    RecordExpr,
185
186    // Literals
187    IntLiteral,
188    FloatLiteral,
189    StringLiteral,
190    SelfLiteral,
191    NowLiteral,
192    SampleRateLiteral,
193    PlaceHolderLiteral,
194
195    // Names and paths
196    Identifier,
197
198    // Types
199    TypeAnnotation,
200    PrimitiveType,
201    UnitType,
202    FunctionType,
203    TupleType,
204    RecordType,
205    ArrayType,
206    CodeType,
207    UnionType,
208    TypeIdent,
209
210    // Patterns
211    Pattern,
212    SinglePattern,
213    TuplePattern,
214    RecordPattern,
215
216    // Top-level statements
217    IncludeStmt,
218    StageDecl,
219    ModuleDecl,
220    UseStmt,
221    TypeDecl,
222    VariantDef,
223
224    // Module-related
225    QualifiedPath,
226    VisibilityPub,
227    UseTargetMultiple, // use foo::{bar, baz}
228    UseTargetWildcard, // use foo::*
229
230    // Lists and sequences
231    ParamList,
232    ArgList,
233    ExprList,
234    ParamDefault,
235
236    // Other
237    Error, // For error recovery
238}
239
240impl std::fmt::Display for SyntaxKind {
241    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
242        match self {
243            SyntaxKind::Program => write!(f, "Program"),
244            SyntaxKind::Statement => write!(f, "Statement"),
245            SyntaxKind::FunctionDecl => write!(f, "FunctionDecl"),
246            SyntaxKind::LetDecl => write!(f, "LetDecl"),
247            SyntaxKind::LetRecDecl => write!(f, "LetRecDecl"),
248            SyntaxKind::BinaryExpr => write!(f, "BinaryExpr"),
249            SyntaxKind::UnaryExpr => write!(f, "UnaryExpr"),
250            SyntaxKind::ParenExpr => write!(f, "ParenExpr"),
251            SyntaxKind::CallExpr => write!(f, "CallExpr"),
252            SyntaxKind::FieldAccess => write!(f, "FieldAccess"),
253            SyntaxKind::IndexExpr => write!(f, "IndexExpr"),
254            SyntaxKind::AssignExpr => write!(f, "AssignExpr"),
255            SyntaxKind::ArrayExpr => write!(f, "ArrayExpr"),
256            SyntaxKind::MacroExpansion => write!(f, "MacroExpansion"),
257            SyntaxKind::BracketExpr => write!(f, "BracketExpr"),
258            SyntaxKind::EscapeExpr => write!(f, "EscapeExpr"),
259            SyntaxKind::LambdaExpr => write!(f, "LambdaExpr"),
260            SyntaxKind::IfExpr => write!(f, "IfExpr"),
261            SyntaxKind::MatchExpr => write!(f, "MatchExpr"),
262            SyntaxKind::MatchArm => write!(f, "MatchArm"),
263            SyntaxKind::MatchArmList => write!(f, "MatchArmList"),
264            SyntaxKind::MatchPattern => write!(f, "MatchPattern"),
265            SyntaxKind::ConstructorPattern => write!(f, "ConstructorPattern"),
266            SyntaxKind::BlockExpr => write!(f, "BlockExpr"),
267            SyntaxKind::TupleExpr => write!(f, "TupleExpr"),
268            SyntaxKind::RecordExpr => write!(f, "RecordExpr"),
269            SyntaxKind::IntLiteral => write!(f, "IntLiteral"),
270            SyntaxKind::FloatLiteral => write!(f, "FloatLiteral"),
271            SyntaxKind::StringLiteral => write!(f, "StringLiteral"),
272            SyntaxKind::SelfLiteral => write!(f, "SelfLiteral"),
273            SyntaxKind::NowLiteral => write!(f, "NowLiteral"),
274            SyntaxKind::SampleRateLiteral => write!(f, "SampleRateLiteral"),
275            SyntaxKind::PlaceHolderLiteral => write!(f, "PlaceHolderLiteral"),
276            SyntaxKind::Identifier => write!(f, "Identifier"),
277            SyntaxKind::TypeAnnotation => write!(f, "TypeAnnotation"),
278            SyntaxKind::PrimitiveType => write!(f, "PrimitiveType"),
279            SyntaxKind::UnitType => write!(f, "UnitType"),
280            SyntaxKind::FunctionType => write!(f, "FunctionType"),
281            SyntaxKind::TupleType => write!(f, "TupleType"),
282            SyntaxKind::RecordType => write!(f, "RecordType"),
283            SyntaxKind::ArrayType => write!(f, "ArrayType"),
284            SyntaxKind::CodeType => write!(f, "CodeType"),
285            SyntaxKind::UnionType => write!(f, "UnionType"),
286            SyntaxKind::TypeIdent => write!(f, "TypeIdent"),
287            SyntaxKind::Pattern => write!(f, "Pattern"),
288            SyntaxKind::SinglePattern => write!(f, "SinglePattern"),
289            SyntaxKind::TuplePattern => write!(f, "TuplePattern"),
290            SyntaxKind::RecordPattern => write!(f, "RecordPattern"),
291            SyntaxKind::IncludeStmt => write!(f, "IncludeStmt"),
292            SyntaxKind::StageDecl => write!(f, "StageDecl"),
293            SyntaxKind::ModuleDecl => write!(f, "ModuleDecl"),
294            SyntaxKind::UseStmt => write!(f, "UseStmt"),
295            SyntaxKind::TypeDecl => write!(f, "TypeDecl"),
296            SyntaxKind::VariantDef => write!(f, "VariantDef"),
297            SyntaxKind::QualifiedPath => write!(f, "QualifiedPath"),
298            SyntaxKind::VisibilityPub => write!(f, "VisibilityPub"),
299            SyntaxKind::UseTargetMultiple => write!(f, "UseTargetMultiple"),
300            SyntaxKind::UseTargetWildcard => write!(f, "UseTargetWildcard"),
301            SyntaxKind::ParamList => write!(f, "ParamList"),
302            SyntaxKind::ArgList => write!(f, "ArgList"),
303            SyntaxKind::ExprList => write!(f, "ExprList"),
304            SyntaxKind::ParamDefault => write!(f, "ParamDefault"),
305            SyntaxKind::Error => write!(f, "Error"),
306        }
307    }
308}
309
310/// A marker representing a position in the parse tree building process.
311/// Used to wrap previously parsed nodes into a new parent node.
312#[derive(Clone, Copy, Debug)]
313pub struct Marker {
314    /// Position in the children list of the current node when the marker was created
315    pub pos: usize,
316}
317
318/// A builder for constructing Green Trees with an arena
319pub struct GreenTreeBuilder {
320    arena: GreenNodeArena,
321    stack: Vec<(SyntaxKind, Vec<GreenNodeId>)>,
322}
323
324impl GreenTreeBuilder {
325    pub fn new() -> Self {
326        Self {
327            arena: GreenNodeArena::new(),
328            stack: Vec::new(),
329        }
330    }
331
332    /// Get a reference to the arena
333    pub fn arena(&self) -> &GreenNodeArena {
334        &self.arena
335    }
336
337    /// Take ownership of the arena
338    pub fn into_arena(self) -> GreenNodeArena {
339        self.arena
340    }
341
342    /// Create a marker at the current position.
343    /// This marker can be used later to wrap nodes parsed after this point
344    /// into a new parent node using `start_node_at`.
345    pub fn marker(&self) -> Marker {
346        let pos = self
347            .stack
348            .last()
349            .map(|(_, children)| children.len())
350            .unwrap_or(0);
351        Marker { pos }
352    }
353
354    /// Start a new internal node
355    pub fn start_node(&mut self, kind: SyntaxKind) {
356        self.stack.push((kind, Vec::new()));
357    }
358
359    /// Start a new node at a previously saved marker position.
360    /// This effectively wraps all nodes parsed since the marker was created
361    /// into a new parent node.
362    pub fn start_node_at(&mut self, marker: Marker, kind: SyntaxKind) {
363        if let Some((_, children)) = self.stack.last_mut() {
364            // Extract children from marker position onwards
365            let wrapped_children: Vec<_> = children.drain(marker.pos..).collect();
366            // Push new node with those children
367            self.stack.push((kind, wrapped_children));
368        } else {
369            // At root level, just start a new node
370            self.stack.push((kind, Vec::new()));
371        }
372    }
373
374    /// Add a token as a child
375    pub fn add_token(&mut self, token_index: usize, width: usize) {
376        let token_id = self.arena.alloc_token(token_index, width);
377        if let Some((_, children)) = self.stack.last_mut() {
378            children.push(token_id);
379        }
380    }
381
382    /// Finish the current node and add it to its parent
383    pub fn finish_node(&mut self) -> Option<GreenNodeId> {
384        if let Some((kind, children)) = self.stack.pop() {
385            let node_id = self.arena.alloc_internal(kind, children);
386
387            // Add to parent if exists
388            if let Some((_, parent_children)) = self.stack.last_mut() {
389                parent_children.push(node_id);
390            }
391
392            Some(node_id)
393        } else {
394            None
395        }
396    }
397
398    /// Check if the builder is at the root level
399    pub fn is_root(&self) -> bool {
400        self.stack.len() <= 1
401    }
402}
403
404impl Default for GreenTreeBuilder {
405    fn default() -> Self {
406        Self::new()
407    }
408}
409
410#[cfg(test)]
411mod tests {
412    use super::*;
413
414    #[test]
415    fn test_green_node_token() {
416        let mut arena = GreenNodeArena::new();
417        let token_id = arena.alloc_token(0, 5);
418        assert_eq!(arena.width(token_id), 5);
419        assert_eq!(arena.kind(token_id), None);
420    }
421
422    #[test]
423    fn test_green_node_internal() {
424        let mut arena = GreenNodeArena::new();
425        let token1 = arena.alloc_token(0, 5);
426        let token2 = arena.alloc_token(1, 3);
427        let internal = arena.alloc_internal(SyntaxKind::BinaryExpr, vec![token1, token2]);
428
429        assert_eq!(arena.width(internal), 8);
430        assert_eq!(arena.kind(internal), Some(SyntaxKind::BinaryExpr));
431        assert_eq!(arena.children(internal).unwrap().len(), 2);
432    }
433
434    #[test]
435    fn test_builder() {
436        let mut builder = GreenTreeBuilder::new();
437
438        builder.start_node(SyntaxKind::Program);
439        builder.start_node(SyntaxKind::IntLiteral);
440        builder.add_token(0, 2); // "42"
441        builder.finish_node();
442        let root_id = builder.finish_node().unwrap();
443
444        let arena = builder.arena();
445        assert_eq!(arena.kind(root_id), Some(SyntaxKind::Program));
446        assert_eq!(arena.width(root_id), 2);
447    }
448
449    #[test]
450    fn test_nested_nodes() {
451        let mut builder = GreenTreeBuilder::new();
452
453        builder.start_node(SyntaxKind::BinaryExpr);
454        builder.add_token(0, 1); // "1"
455        builder.add_token(1, 1); // "+"
456        builder.add_token(2, 1); // "2"
457        let node_id = builder.finish_node().unwrap();
458
459        let arena = builder.arena();
460        assert_eq!(arena.width(node_id), 3);
461        assert_eq!(arena.children(node_id).unwrap().len(), 3);
462    }
463}