Skip to main content

mimium_lang/compiler/parser/
red.rs

1/// Red Tree - Abstract Syntax Tree (AST) without trivia
2/// Based on the Red-Green Syntax Tree pattern
3///
4/// Red nodes have absolute positions and are created from Green nodes.
5/// They represent the actual AST without comments and whitespace.
6/// Red nodes maintain parent references for bottom-up traversal.
7use super::green::{GreenNodeArena, GreenNodeId, SyntaxKind};
8use super::token::{Token, TokenKind};
9use std::sync::{Arc, Weak};
10
11/// Red node - represents an AST node with position information
12/// This is the "Red" part of Red-Green Syntax Tree
13#[derive(Debug, Clone)]
14pub struct RedNode {
15    /// The underlying green node ID
16    green_id: GreenNodeId,
17    /// Absolute position in the source
18    offset: usize,
19    /// Parent node (weak reference to avoid cycles)
20    parent: Option<Weak<RedNode>>,
21}
22
23impl RedNode {
24    /// Create a new red node from a green node ID
25    pub fn new(green_id: GreenNodeId, offset: usize) -> Arc<Self> {
26        Arc::new(RedNode {
27            green_id,
28            offset,
29            parent: None,
30        })
31    }
32
33    /// Create a new red node with a parent reference
34    pub fn new_with_parent(
35        green_id: GreenNodeId,
36        offset: usize,
37        parent: Weak<RedNode>,
38    ) -> Arc<Self> {
39        Arc::new(RedNode {
40            green_id,
41            offset,
42            parent: Some(parent),
43        })
44    }
45
46    /// Get the absolute position of this node
47    pub fn offset(&self) -> usize {
48        self.offset
49    }
50
51    /// Get the width (length) of this node
52    pub fn width(&self, arena: &GreenNodeArena) -> usize {
53        arena.width(self.green_id)
54    }
55
56    /// Get the end position of this node
57    pub fn end(&self, arena: &GreenNodeArena) -> usize {
58        self.offset + self.width(arena)
59    }
60
61    /// Get the syntax kind of this node
62    pub fn kind(&self, arena: &GreenNodeArena) -> Option<SyntaxKind> {
63        arena.kind(self.green_id)
64    }
65
66    /// Get the underlying green node ID
67    pub fn green_id(&self) -> GreenNodeId {
68        self.green_id
69    }
70
71    /// Get the parent node if it exists
72    pub fn parent(&self) -> Option<Arc<RedNode>> {
73        self.parent.as_ref().and_then(|weak| weak.upgrade())
74    }
75
76    /// Get children as red nodes with parent references
77    pub fn children(self: &Arc<Self>, arena: &GreenNodeArena) -> Vec<Arc<RedNode>> {
78        if let Some(green_children) = arena.children(self.green_id) {
79            let mut offset = self.offset;
80
81            green_children
82                .iter()
83                .map(|&child_id| {
84                    let child = RedNode::new_with_parent(child_id, offset, Arc::downgrade(self));
85                    offset += arena.width(child_id);
86                    child
87                })
88                .collect()
89        } else {
90            Vec::new()
91        }
92    }
93
94    /// Get ancestors of this node (bottom-up traversal)
95    pub fn ancestors(&self) -> Vec<Arc<RedNode>> {
96        let mut result = Vec::new();
97        let mut current = self.parent();
98
99        while let Some(node) = current {
100            result.push(node.clone());
101            current = node.parent();
102        }
103
104        result
105    }
106
107    /// Check if this node is a descendant of another node
108    pub fn is_descendant_of(&self, ancestor: &RedNode) -> bool {
109        self.ancestors()
110            .iter()
111            .any(|node| node.green_id == ancestor.green_id && node.offset == ancestor.offset)
112    }
113
114    /// Get the text of this node from the source
115    pub fn text<'a>(&self, source: &'a str, arena: &GreenNodeArena) -> &'a str {
116        &source[self.offset..self.end(arena)]
117    }
118}
119
120/// AST representation - simplified from Red Tree
121#[derive(Debug, Clone)]
122pub enum AstNode {
123    Program {
124        statements: Vec<AstNode>,
125    },
126
127    FunctionDecl {
128        name: String,
129        params: Vec<String>,
130        body: Box<AstNode>,
131    },
132
133    LetDecl {
134        name: String,
135        value: Box<AstNode>,
136    },
137
138    LetRecDecl {
139        name: String,
140        value: Box<AstNode>,
141    },
142
143    BinaryExpr {
144        op: String,
145        left: Box<AstNode>,
146        right: Box<AstNode>,
147    },
148
149    CallExpr {
150        callee: Box<AstNode>,
151        args: Vec<AstNode>,
152    },
153
154    IfExpr {
155        condition: Box<AstNode>,
156        then_branch: Box<AstNode>,
157        else_branch: Option<Box<AstNode>>,
158    },
159
160    BlockExpr {
161        statements: Vec<AstNode>,
162    },
163
164    TupleExpr {
165        elements: Vec<AstNode>,
166    },
167
168    RecordExpr {
169        fields: Vec<(String, AstNode)>,
170    },
171
172    IntLiteral(i64),
173    FloatLiteral(f64),
174    StringLiteral(String),
175    Identifier(String),
176
177    Error,
178}
179
180/// Convert Red Tree to AST
181pub fn red_to_ast(
182    red: &Arc<RedNode>,
183    source: &str,
184    tokens: &[Token],
185    arena: &GreenNodeArena,
186) -> AstNode {
187    match red.kind(arena) {
188        Some(SyntaxKind::Program) => {
189            let children = red.children(arena);
190            let mut statements: Vec<AstNode> = children
191                .iter()
192                .map(|child| red_to_ast(child, source, tokens, arena))
193                .collect();
194
195            // Transform flat statement list into Let-body-then chain
196            statements = vec![transform_let_chain(statements)];
197
198            AstNode::Program { statements }
199        }
200
201        Some(SyntaxKind::Statement) => {
202            let children = red.children(arena);
203            if let Some(first) = children.first() {
204                red_to_ast(first, source, tokens, arena)
205            } else {
206                AstNode::Error
207            }
208        }
209
210        Some(SyntaxKind::FunctionDecl) => {
211            let children = red.children(arena);
212            let mut name = String::new();
213            let mut params = Vec::new();
214            let mut body = None;
215
216            for (i, child) in children.iter().enumerate() {
217                let green = arena.get(child.green_id());
218                match green {
219                    super::green::GreenNode::Token { token_index, .. } => {
220                        if let Some(token) = tokens.get(*token_index)
221                            && matches!(token.kind, TokenKind::Ident | TokenKind::IdentFunction)
222                            && i == 1
223                        {
224                            name = token.text(source).to_string();
225                        }
226                    }
227                    _ => {
228                        if child.kind(arena) == Some(SyntaxKind::ParamList) {
229                            params = extract_params(child, source, tokens, arena);
230                        } else if child.kind(arena) == Some(SyntaxKind::BlockExpr) {
231                            body = Some(Box::new(red_to_ast(child, source, tokens, arena)));
232                        }
233                    }
234                }
235            }
236
237            AstNode::FunctionDecl {
238                name,
239                params,
240                body: body.unwrap_or_else(|| Box::new(AstNode::Error)),
241            }
242        }
243
244        Some(SyntaxKind::LetDecl) => {
245            let children = red.children(arena);
246            let mut name = String::new();
247            let mut value = None;
248
249            for (i, child) in children.iter().enumerate() {
250                let green = arena.get(child.green_id());
251                match green {
252                    super::green::GreenNode::Token { token_index, .. } => {
253                        if let Some(token) = tokens.get(*token_index)
254                            && matches!(token.kind, TokenKind::Ident | TokenKind::IdentVariable)
255                            && i == 1
256                        {
257                            name = token.text(source).to_string();
258                        }
259                    }
260                    _ => {
261                        // Attempt to extract identifier from pattern subtree if name is empty
262                        if name.is_empty()
263                            && matches!(
264                                child.kind(arena),
265                                Some(SyntaxKind::Pattern)
266                                    | Some(SyntaxKind::SinglePattern)
267                                    | Some(SyntaxKind::TuplePattern)
268                                    | Some(SyntaxKind::RecordPattern)
269                            )
270                        {
271                            // DFS over subtree to find first identifier token
272                            let mut stack = vec![child.clone()];
273                            while let Some(node) = stack.pop() {
274                                let g = arena.get(node.green_id());
275                                if let super::green::GreenNode::Token { token_index, .. } = g {
276                                    if let Some(tok) = tokens.get(*token_index)
277                                        && matches!(
278                                            tok.kind,
279                                            TokenKind::Ident | TokenKind::IdentVariable
280                                        )
281                                    {
282                                        name = tok.text(source).to_string();
283                                        break;
284                                    }
285                                } else {
286                                    stack.extend(node.children(arena));
287                                }
288                            }
289                        } else if value.is_none() {
290                            value = Some(Box::new(red_to_ast(child, source, tokens, arena)));
291                        }
292                    }
293                }
294            }
295
296            AstNode::LetDecl {
297                name,
298                value: value.unwrap_or_else(|| Box::new(AstNode::Error)),
299            }
300        }
301
302        Some(SyntaxKind::BlockExpr) => {
303            let children = red.children(arena);
304            let statements = children
305                .iter()
306                .filter_map(|child| {
307                    let green = arena.get(child.green_id());
308                    if matches!(green, super::green::GreenNode::Token { .. }) {
309                        None
310                    } else {
311                        Some(red_to_ast(child, source, tokens, arena))
312                    }
313                })
314                .collect();
315            AstNode::BlockExpr { statements }
316        }
317
318        Some(SyntaxKind::IntLiteral) => {
319            let children = red.children(arena);
320            if let Some(child) = children.first() {
321                let green = arena.get(child.green_id());
322                if let super::green::GreenNode::Token { token_index, .. } = green
323                    && let Some(token) = tokens.get(*token_index)
324                {
325                    let text = token.text(source);
326                    if let Ok(value) = text.parse::<i64>() {
327                        return AstNode::IntLiteral(value);
328                    }
329                }
330            }
331            AstNode::Error
332        }
333
334        Some(SyntaxKind::FloatLiteral) => {
335            let children = red.children(arena);
336            if let Some(child) = children.first() {
337                let green = arena.get(child.green_id());
338                if let super::green::GreenNode::Token { token_index, .. } = green
339                    && let Some(token) = tokens.get(*token_index)
340                {
341                    let text = token.text(source);
342                    if let Ok(value) = text.parse::<f64>() {
343                        return AstNode::FloatLiteral(value);
344                    }
345                }
346            }
347            AstNode::Error
348        }
349
350        Some(SyntaxKind::StringLiteral) => {
351            let children = red.children(arena);
352            if let Some(child) = children.first() {
353                let green = arena.get(child.green_id());
354                if let super::green::GreenNode::Token { token_index, .. } = green
355                    && let Some(token) = tokens.get(*token_index)
356                {
357                    let text = token.text(source);
358                    // Remove quotes
359                    let unquoted = text.trim_matches('"');
360                    return AstNode::StringLiteral(unquoted.to_string());
361                }
362            }
363            AstNode::Error
364        }
365
366        Some(SyntaxKind::Identifier) => {
367            let children = red.children(arena);
368            if let Some(child) = children.first() {
369                let green = arena.get(child.green_id());
370                if let super::green::GreenNode::Token { token_index, .. } = green
371                    && let Some(token) = tokens.get(*token_index)
372                {
373                    return AstNode::Identifier(token.text(source).to_string());
374                }
375            }
376            AstNode::Error
377        }
378
379        Some(SyntaxKind::TupleExpr) => {
380            let children = red.children(arena);
381            let elements = children
382                .iter()
383                .filter_map(|child| {
384                    // Skip tokens (parens, commas), only process expressions
385                    if child.kind(arena).is_some() {
386                        Some(red_to_ast(child, source, tokens, arena))
387                    } else {
388                        None
389                    }
390                })
391                .collect();
392            AstNode::TupleExpr { elements }
393        }
394
395        Some(SyntaxKind::RecordExpr) => {
396            let children = red.children(arena);
397            let mut fields = Vec::new();
398            let mut current_field_name = None;
399
400            for child in children.iter() {
401                let green = arena.get(child.green_id());
402                match green {
403                    super::green::GreenNode::Token { token_index, .. } => {
404                        if let Some(token) = tokens.get(*token_index)
405                            && matches!(token.kind, TokenKind::Ident | TokenKind::IdentVariable)
406                        {
407                            current_field_name = Some(token.text(source).to_string());
408                        }
409                    }
410                    _ => {
411                        // This is an expression node
412                        if let Some(field_name) = current_field_name.take() {
413                            let value = red_to_ast(child, source, tokens, arena);
414                            fields.push((field_name, value));
415                        }
416                    }
417                }
418            }
419            AstNode::RecordExpr { fields }
420        }
421
422        _ => AstNode::Error,
423    }
424}
425
426/// Extract parameter names from ParamList node
427fn extract_params(
428    red: &Arc<RedNode>,
429    source: &str,
430    tokens: &[Token],
431    arena: &GreenNodeArena,
432) -> Vec<String> {
433    let mut params = Vec::new();
434
435    for child in red.children(arena) {
436        let green = arena.get(child.green_id());
437        if let super::green::GreenNode::Token { token_index, .. } = green
438            && let Some(token) = tokens.get(*token_index)
439            && matches!(token.kind, TokenKind::Ident | TokenKind::IdentParameter)
440        {
441            params.push(token.text(source).to_string());
442        }
443    }
444
445    params
446}
447
448/// Transform a flat list of statements into a Let-body-then chain
449///
450/// For example:
451/// ```text
452/// [LetDecl(x, 1), LetDecl(y, 2), Expr(x+y)]
453/// ```
454/// becomes:
455/// ```text
456/// LetDecl(x, 1, LetDecl(y, 2, Expr(x+y)))
457/// ```
458fn transform_let_chain(statements: Vec<AstNode>) -> AstNode {
459    if statements.is_empty() {
460        return AstNode::Error;
461    }
462
463    // Work backwards through the statements, building the chain from the inside out
464    let result = statements.into_iter().rev().reduce(|body, stmt| {
465        match stmt {
466            AstNode::LetDecl { name, value } => {
467                // Transform LetDecl into a nested structure with body
468                // We destructure the Box, so value is AstNode, not Box<AstNode>
469                AstNode::LetDecl {
470                    name,
471                    value: Box::new(AstNode::BlockExpr {
472                        statements: vec![*value, body],
473                    }),
474                }
475            }
476            AstNode::LetRecDecl { name, value } => {
477                // Same for LetRecDecl
478                AstNode::LetRecDecl {
479                    name,
480                    value: Box::new(AstNode::BlockExpr {
481                        statements: vec![*value, body],
482                    }),
483                }
484            }
485            _ => {
486                // For non-Let statements, wrap with the body in a block
487                AstNode::BlockExpr {
488                    statements: vec![stmt, body],
489                }
490            }
491        }
492    });
493
494    result.unwrap_or(AstNode::Error)
495}
496
497#[cfg(test)]
498mod tests {
499    use super::*;
500    use crate::compiler::parser::cst_parser::parse_cst;
501    use crate::compiler::parser::preparser::preparse;
502    use crate::compiler::parser::tokenizer::tokenize;
503
504    #[test]
505    fn test_red_node_creation() {
506        let source = "42";
507        let tokens = tokenize(source);
508        let preparsed = preparse(&tokens);
509        let (root_id, arena, _tokens, errors) = parse_cst(tokens, &preparsed);
510        let red = RedNode::new(root_id, 0);
511
512        assert_eq!(red.offset(), 0);
513        assert!(red.width(&arena) > 0);
514        assert!(errors.is_empty(), "Expected no errors, got {errors:?}");
515    }
516
517    #[test]
518    fn test_red_to_ast_simple() {
519        let source = "42";
520        let tokens = tokenize(source);
521        let preparsed = preparse(&tokens);
522        let (root_id, arena, annotated_tokens, errors) = parse_cst(tokens, &preparsed);
523        let red = RedNode::new(root_id, 0);
524        let ast = red_to_ast(&red, source, &annotated_tokens, &arena);
525
526        match ast {
527            AstNode::Program { .. } => {} // Expected
528            _ => panic!("Expected Program node"),
529        }
530
531        assert!(errors.is_empty(), "Expected no errors, got {errors:?}");
532    }
533
534    #[test]
535    fn test_red_to_ast_function() {
536        let source = "fn add(x, y) { 42 }";
537        let tokens = tokenize(source);
538        let preparsed = preparse(&tokens);
539        let (root_id, arena, annotated_tokens, errors) = parse_cst(tokens, &preparsed);
540        let red = RedNode::new(root_id, 0);
541        let ast = red_to_ast(&red, source, &annotated_tokens, &arena);
542
543        match ast {
544            AstNode::Program { statements } => {
545                assert!(!statements.is_empty());
546            }
547            _ => panic!("Expected Program node"),
548        }
549
550        assert!(errors.is_empty(), "Expected no errors, got {errors:?}");
551    }
552
553    #[test]
554    fn test_parent_references() {
555        let source = "fn add(x, y) { 42 }";
556        let tokens = tokenize(source);
557        let preparsed = preparse(&tokens);
558        let (root_id, arena, _tokens, errors) = parse_cst(tokens, &preparsed);
559        let root = RedNode::new(root_id, 0);
560
561        // Root should have no parent
562        assert!(root.parent().is_none());
563
564        // Get children with parent references
565        let children = root.children(&arena);
566
567        // Children should have parent references pointing to root
568        for child in children.iter() {
569            let parent = child.parent();
570            assert!(parent.is_some(), "Child should have a parent reference");
571
572            let parent = parent.unwrap();
573            assert_eq!(parent.offset(), root.offset());
574            assert_eq!(parent.green_id(), root.green_id());
575        }
576
577        assert!(errors.is_empty(), "Expected no errors, got {errors:?}");
578    }
579
580    #[test]
581    fn test_ancestors() {
582        let source = "fn add(x, y) { let z = 42 }";
583        let tokens = tokenize(source);
584        let preparsed = preparse(&tokens);
585        let (root_id, arena, _tokens, errors) = parse_cst(tokens, &preparsed);
586        let root = RedNode::new(root_id, 0);
587
588        // Get first child (statement)
589        let children = root.children(&arena);
590        if let Some(statement) = children.first() {
591            // Get ancestors
592            let ancestors = statement.ancestors();
593
594            // Should have at least the root as ancestor
595            assert!(!ancestors.is_empty(), "Statement should have ancestors");
596
597            // First ancestor should be the root
598            if let Some(first_ancestor) = ancestors.first() {
599                assert_eq!(first_ancestor.offset(), root.offset());
600            }
601        }
602
603        assert!(errors.is_empty(), "Expected no errors, got {errors:?}");
604    }
605
606    #[test]
607    fn test_is_descendant_of() {
608        let source = "fn add(x, y) { 42 }";
609        let tokens = tokenize(source);
610        let preparsed = preparse(&tokens);
611        let (root_id, arena, _tokens, errors) = parse_cst(tokens, &preparsed);
612        let root = RedNode::new(root_id, 0);
613
614        let children = root.children(&arena);
615        if let Some(child) = children.first() {
616            // Child should be descendant of root
617            assert!(
618                child.is_descendant_of(&root),
619                "Child should be descendant of root"
620            );
621
622            // Root should not be descendant of child
623            assert!(
624                !root.is_descendant_of(child),
625                "Root should not be descendant of child"
626            );
627        }
628
629        assert!(errors.is_empty(), "Expected no errors, got {errors:?}");
630    }
631
632    #[test]
633    fn test_let_chain_transformation() {
634        let source = "let x = 1\nlet y = 2\nx";
635        let tokens = tokenize(source);
636        let preparsed = preparse(&tokens);
637        let (root_id, arena, annotated_tokens, errors) = parse_cst(tokens, &preparsed);
638        let red = RedNode::new(root_id, 0);
639        let ast = red_to_ast(&red, source, &annotated_tokens, &arena);
640
641        // The AST should have transformed the flat statement list into a chain
642        match ast {
643            AstNode::Program { statements } => {
644                assert_eq!(statements.len(), 1);
645                // The single statement should be a nested structure
646                // of Let bindings, not a flat list
647            }
648            _ => panic!("Expected Program node"),
649        }
650
651        assert!(errors.is_empty(), "Expected no errors, got {errors:?}");
652    }
653}