Skip to main content

mimium_lang/compiler/parser/
mod.rs

1mod cst_parser;
2mod cst_test_helpers;
3pub mod green;
4mod lower;
5mod preparser;
6mod red;
7/// Parser for mimium
8///
9/// This module implements a parser based on the Red-Green Syntax Tree pattern.
10/// The parser preserves all information from the source code including comments and whitespace.
11///
12/// For the formal syntax specification in EBNF, see `ebnf.md` in this directory.
13///
14/// ## Architecture
15///
16/// 1. **Tokenizer** (`tokenizer.rs`): Converts source text into position-aware tokens
17///    - Input: Source text (`&str`)
18///    - Output: Token sequence with position information
19///    - Tokens store kind, start position, and length (not the actual value)
20///
21/// 2. **Pre-parser** (`preparser.rs`): Separates trivia from syntax tokens
22///    - Input: Token sequence
23///    - Output: Index sequence with trivia maps
24///    - Comments and whitespace are stored in leading/trailing trivia maps
25///
26/// 3. **CST Parser** (`cst_parser.rs`): Parses into Green Tree (Concrete Syntax Tree)
27///    - Input: Token indices from pre-parser
28///    - Output: Green Tree (position-independent, immutable CST)
29///    - Represents the complete syntactic structure
30///
31/// 4. **AST Lowering** (`red.rs`, `lower.rs`): Converts Green Tree to Red Tree/AST
32///    - Input: Green Tree
33///    - Output: Red Tree (position-aware) or AST (without trivia)
34///    - Red nodes have absolute positions
35///    - AST removes trivia for semantic analysis
36///
37/// ## Usage Example
38///
39/// ```rust
40/// use mimium_lang::compiler::parser;
41///
42/// let source = "fn dsp() { 42 }";
43///
44/// // Step 1: Tokenize
45/// let tokens = parser::tokenize(source);
46///
47/// // Step 2: Pre-parse (separate trivia)
48/// let preparsed = parser::preparse(&tokens);
49///
50/// // Step 3: Parse to CST (Green Tree) and get annotated tokens with error collection
51/// let (green_id, arena, tokens, errors) = parser::parse_cst(tokens, &preparsed);
52/// if !errors.is_empty() {
53///     eprintln!("Parse errors: {:?}", errors);
54/// }
55///
56/// // Step 4: Convert to AST (Red Tree)
57/// let red = parser::green_to_red(green_id, 0);
58/// let ast = parser::red_to_ast(&red, source, &tokens, &arena);
59/// ```
60pub mod token;
61pub mod tokenizer;
62
63// Re-export main types and functions
64use crate::utils::error::{ReportableError, SimpleError};
65use crate::utils::metadata::Location;
66pub use cst_parser::{ParserError, parse_cst};
67pub use green::{GreenNodeArena, GreenNodeId, SyntaxKind};
68pub use lower::{add_global_context, parse_program, parse_to_expr};
69pub use preparser::{PreParsedTokens, preparse};
70pub use red::{AstNode, RedNode, red_to_ast};
71pub use token::{Token, TokenKind};
72pub use tokenizer::tokenize;
73
74/// Convenience function to create a Red node from a Green node
75pub fn green_to_red(green_id: GreenNodeId, offset: usize) -> std::sync::Arc<RedNode> {
76    RedNode::new(green_id, offset)
77}
78
79/// Complete parsing pipeline from source to AST with error collection
80pub fn parse(
81    source: &str,
82) -> (
83    AstNode,
84    Vec<Token>,
85    PreParsedTokens,
86    GreenNodeArena,
87    Vec<cst_parser::ParserError>,
88) {
89    let tokens = tokenize(source);
90    let preparsed = preparse(&tokens);
91    let (green_id, arena, tokens, errors) = parse_cst(tokens, &preparsed);
92    let red = green_to_red(green_id, 0);
93    let ast = red_to_ast(&red, source, &tokens, &arena);
94
95    (ast, tokens, preparsed, arena, errors)
96}
97
98/// Convert parser errors to ReportableError with source span.
99pub fn parser_errors_to_reportable(
100    source: &str,
101    file_path: std::path::PathBuf,
102    errors: Vec<cst_parser::ParserError>,
103) -> Vec<Box<dyn ReportableError>> {
104    let tokens = tokenize(source);
105    let fallback_span = tokens.last().map(|t| t.start..t.end()).unwrap_or(0..0);
106
107    errors
108        .into_iter()
109        .map(|err| {
110            let span = tokens
111                .get(err.token_index)
112                .map(|t| t.start..t.end())
113                .unwrap_or_else(|| fallback_span.clone());
114            Box::new(SimpleError {
115                message: format!("Parse error: {err}"),
116                span: Location {
117                    span,
118                    path: file_path.clone(),
119                },
120            }) as Box<dyn ReportableError>
121        })
122        .collect()
123}
124
125#[cfg(test)]
126mod tests {
127    use super::*;
128
129    #[test]
130    fn test_full_pipeline() {
131        let source = "fn dsp() { 42 }";
132        let (ast, tokens, _preparsed, _arena, errors) = parse(source);
133
134        match ast {
135            AstNode::Program { statements } => {
136                assert!(!statements.is_empty());
137            }
138            _ => panic!("Expected Program node"),
139        }
140
141        assert!(!tokens.is_empty());
142        assert!(errors.is_empty(), "Expected no errors, got {errors:?}");
143    }
144
145    #[test]
146    fn test_with_comments() {
147        let source = r#"
148            // This is a comment
149            fn dsp() {
150                /* multi-line
151                   comment */
152                42
153            }
154        "#;
155
156        let (ast, tokens, _preparsed, _arena, errors) = parse(source);
157
158        // Check that comments are in the token stream
159        let has_comments = tokens.iter().any(|t| {
160            matches!(
161                t.kind,
162                TokenKind::SingleLineComment | TokenKind::MultiLineComment
163            )
164        });
165        assert!(has_comments);
166
167        // Check that AST is still valid
168        match ast {
169            AstNode::Program { .. } => {}
170            _ => panic!("Expected Program node"),
171        }
172
173        assert!(errors.is_empty(), "Expected no errors, got {errors:?}");
174    }
175
176    #[test]
177    fn test_let_binding() {
178        let source = "let x = 42";
179        let (ast, _tokens, _preparsed, _arena, errors) = parse(source);
180
181        match ast {
182            AstNode::Program { statements } => {
183                assert_eq!(statements.len(), 1);
184                match &statements[0] {
185                    AstNode::LetDecl { name, value } => {
186                        assert_eq!(name, "x");
187                        match value.as_ref() {
188                            AstNode::IntLiteral(n) => assert_eq!(*n, 42),
189                            _ => panic!("Expected IntLiteral"),
190                        }
191                    }
192                    _ => panic!("Expected LetDecl"),
193                }
194            }
195            _ => panic!("Expected Program node"),
196        }
197
198        assert!(errors.is_empty(), "Expected no errors, got {errors:?}");
199    }
200
201    #[test]
202    fn test_function_with_params() {
203        let source = "fn add(x, y) { x }";
204        let (ast, _tokens, _preparsed, _arena, errors) = parse(source);
205
206        match ast {
207            AstNode::Program { statements } => {
208                assert_eq!(statements.len(), 1);
209                match &statements[0] {
210                    AstNode::FunctionDecl {
211                        name,
212                        params,
213                        body: _,
214                    } => {
215                        assert_eq!(name, "add");
216                        assert_eq!(params, &vec!["x".to_string(), "y".to_string()]);
217                    }
218                    _ => panic!("Expected FunctionDecl"),
219                }
220            }
221            _ => panic!("Expected Program node"),
222        }
223
224        assert!(errors.is_empty(), "Expected no errors, got {errors:?}");
225    }
226
227    #[test]
228    fn test_position_information() {
229        let source = "fn dsp() { 42 }";
230        let tokens = tokenize(source);
231
232        // Check that tokens have correct positions
233        assert_eq!(tokens[0].kind, TokenKind::Function);
234        assert_eq!(tokens[0].text(source), "fn");
235        assert_eq!(tokens[0].start, 0);
236        assert_eq!(tokens[0].length, 2);
237
238        // Find the "42" token
239        let num_token = tokens.iter().find(|t| t.kind == TokenKind::Int).unwrap();
240        assert_eq!(num_token.text(source), "42");
241    }
242
243    #[test]
244    fn test_unexpected_closing_token_does_not_stall() {
245        let source = "}";
246        let (ast, tokens, _preparsed, _arena, errors) = parse(source);
247
248        match ast {
249            AstNode::Program { .. } => {}
250            _ => panic!("Expected Program node"),
251        }
252
253        assert!(!tokens.is_empty());
254        assert!(
255            !errors.is_empty(),
256            "Expected parser errors for malformed input"
257        );
258    }
259
260    #[test]
261    fn test_nested_function_types_in_module_params() {
262        let source = r#"
263            mod fdn{
264                #stage(main)
265                fn zipwith(fun:(((float)->float),float)->float,left:[(float)->float],right:[float])->[float]{
266                    0
267                }
268            }
269        "#;
270        let (ast, tokens, _preparsed, _arena, errors) = parse(source);
271
272        match ast {
273            AstNode::Program { statements } => {
274                assert!(!statements.is_empty());
275            }
276            _ => panic!("Expected Program node"),
277        }
278
279        assert!(!tokens.is_empty());
280        assert!(errors.is_empty(), "Expected no errors, got {errors:?}");
281    }
282
283    #[test]
284    fn test_malformed_module_body_does_not_stall() {
285        let source = r#"
286            mod fdn{
287                fn broken(x:){
288                    0
289                }
290            }
291        "#;
292        let (ast, tokens, _preparsed, _arena, errors) = parse(source);
293
294        match ast {
295            AstNode::Program { .. } => {}
296            _ => panic!("Expected Program node"),
297        }
298
299        assert!(!tokens.is_empty());
300        assert!(
301            !errors.is_empty(),
302            "Expected parser errors for malformed module input"
303        );
304    }
305
306    #[test]
307    fn test_exact_module_zipwith_source() {
308        let source = "mod fdn{\n    #stage(main)\n    fn zipwith(fun:(((float)->float),float)->float,left:[(float)->float],right:[float])->[float]{\n        0\n    }\n}\n";
309        let (ast, tokens, _preparsed, _arena, errors) = parse(source);
310
311        match ast {
312            AstNode::Program { statements } => {
313                assert!(!statements.is_empty());
314            }
315            _ => panic!("Expected Program node"),
316        }
317
318        assert!(!tokens.is_empty());
319        assert!(errors.is_empty(), "Expected no errors, got {errors:?}");
320    }
321
322    #[test]
323    fn test_exact_module_zipwith_cst_print() {
324        let source = "mod fdn{\n    #stage(main)\n    fn zipwith(fun:(((float)->float),float)->float,left:[(float)->float],right:[float])->[float]{\n        0\n    }\n}\n";
325        let tokens = tokenize(source);
326        let preparsed = preparse(&tokens);
327        let (green_id, arena, tokens, errors) = parse_cst(tokens, &preparsed);
328
329        assert!(errors.is_empty(), "Expected no errors, got {errors:?}");
330
331        let tree_output = arena.print_tree(green_id, &tokens, source, 0);
332        assert!(tree_output.contains("FunctionDecl"));
333        assert!(tree_output.contains("zipwith"));
334    }
335}