Skip to main content

lumen_compiler/
lib.rs

1//! Lumen Compiler
2//!
3//! Transforms Lumen source files (`.lm`, `.lumen`, `.lm.md`, `.lumen.md`) into LIR modules.
4
5pub mod compiler;
6pub mod diagnostics;
7pub mod markdown;
8
9use compiler::ast::{Directive, ImportDecl, ImportList, Item};
10use compiler::lir::LirModule;
11use compiler::resolve::SymbolTable;
12use std::collections::HashSet;
13
14use thiserror::Error;
15
16#[derive(Debug, Error)]
17pub enum CompileError {
18    #[error("lex error: {0}")]
19    Lex(#[from] compiler::lexer::LexError),
20    #[error("parse errors: {0:?}")]
21    Parse(Vec<compiler::parser::ParseError>),
22    #[error("resolve errors: {0:?}")]
23    Resolve(Vec<compiler::resolve::ResolveError>),
24    #[error("type errors: {0:?}")]
25    Type(Vec<compiler::typecheck::TypeError>),
26    #[error("constraint errors: {0:?}")]
27    Constraint(Vec<compiler::constraints::ConstraintError>),
28}
29
30impl From<compiler::parser::ParseError> for CompileError {
31    fn from(err: compiler::parser::ParseError) -> Self {
32        CompileError::Parse(vec![err])
33    }
34}
35
36/// Compile with access to external modules for import resolution.
37///
38/// The `resolve_import` callback takes a module path (e.g., "mathlib") and returns
39/// the source content of that module if it exists, or None if not found.
40pub fn compile_with_imports(
41    source: &str,
42    resolve_import: &dyn Fn(&str) -> Option<String>,
43) -> Result<LirModule, CompileError> {
44    let mut compilation_stack = HashSet::new();
45    compile_with_imports_internal(source, resolve_import, &mut compilation_stack, None)
46}
47
48/// Internal implementation that tracks the compilation stack for circular import detection
49fn compile_with_imports_internal(
50    source: &str,
51    resolve_import: &dyn Fn(&str) -> Option<String>,
52    compilation_stack: &mut HashSet<String>,
53    _current_module: Option<&str>,
54) -> Result<LirModule, CompileError> {
55    // 1. Extract Markdown blocks
56    let extracted = markdown::extract::extract_blocks(source);
57
58    // 2. Build directives
59    let directives: Vec<Directive> = extracted
60        .directives
61        .iter()
62        .map(|d| Directive {
63            name: d.name.clone(),
64            value: d.value.clone(),
65            span: d.span,
66        })
67        .collect();
68
69    // 3. Concatenate all code blocks
70    // 3. Concatenate all code blocks preserving line numbers
71    let mut full_code = String::new();
72    let mut current_line = 1;
73
74    for block in extracted.code_blocks.iter() {
75        // Pad with newlines to reach the block's start line
76        while current_line < block.code_start_line {
77            full_code.push('\n');
78            current_line += 1;
79        }
80
81        full_code.push_str(&block.code);
82
83        let lines_in_block = block.code.chars().filter(|&c| c == '\n').count();
84        current_line += lines_in_block;
85    }
86
87    if full_code.trim().is_empty() {
88        return Ok(LirModule::new("sha256:empty".to_string()));
89    }
90
91    // 4. Lex
92    // We start at line 1 because we padded the code to match the file structure
93    let mut lexer = compiler::lexer::Lexer::new(&full_code, 1, 0);
94    let tokens = lexer.tokenize()?;
95
96    // 5. Parse
97    let mut parser = compiler::parser::Parser::new(tokens);
98    let (program, parse_errors) = parser.parse_program_with_recovery(directives);
99    if !parse_errors.is_empty() {
100        return Err(CompileError::Parse(parse_errors));
101    }
102
103    // 6. Process imports before resolution
104    let mut base_symbols = SymbolTable::new();
105    let mut import_errors = Vec::new();
106    let mut imported_modules: Vec<LirModule> = Vec::new();
107
108    // Collect all imports
109    let imports: Vec<&ImportDecl> = program
110        .items
111        .iter()
112        .filter_map(|item| {
113            if let Item::Import(imp) = item {
114                Some(imp)
115            } else {
116                None
117            }
118        })
119        .collect();
120
121    // Process each import
122    for import in imports {
123        let module_path = import.path.join(".");
124
125        // Check for circular imports
126        if compilation_stack.contains(&module_path) {
127            let chain: Vec<String> = compilation_stack.iter().cloned().collect();
128            let chain_str = format!("{} -> {}", chain.join(" -> "), module_path);
129            import_errors.push(compiler::resolve::ResolveError::CircularImport {
130                module: module_path.clone(),
131                chain: chain_str,
132            });
133            continue;
134        }
135
136        // Resolve the module source
137        let imported_source = match resolve_import(&module_path) {
138            Some(src) => src,
139            None => {
140                import_errors.push(compiler::resolve::ResolveError::ModuleNotFound {
141                    module: module_path.clone(),
142                    line: import.span.line,
143                });
144                continue;
145            }
146        };
147
148        // Track this module in the compilation stack
149        compilation_stack.insert(module_path.clone());
150
151        // Recursively compile the imported module. The markdown pipeline now
152        // supports fenced and unfenced source forms.
153        let imported_module = compile_with_imports_internal(
154            &imported_source,
155            resolve_import,
156            compilation_stack,
157            Some(&module_path),
158        )?;
159
160        // Remove from stack after compilation
161        compilation_stack.remove(&module_path);
162
163        // Keep the compiled module for later merging
164        imported_modules.push(imported_module);
165
166        // Extract symbols from the imported module by parsing it as markdown if it has
167        // fenced lumen blocks, otherwise as raw source.
168        let imported_extracted = markdown::extract::extract_blocks(&imported_source);
169        let (imported_code, imported_directives, imported_line, imported_offset) =
170            if imported_extracted.code_blocks.is_empty() {
171                (imported_source.clone(), vec![], 1, 0)
172            } else {
173                let mut code = String::new();
174                let mut first_line = 1;
175                let mut first_offset = 0;
176                for (i, block) in imported_extracted.code_blocks.iter().enumerate() {
177                    if i == 0 {
178                        first_line = block.code_start_line;
179                        first_offset = block.code_offset;
180                    }
181                    if !code.is_empty() {
182                        code.push('\n');
183                    }
184                    code.push_str(&block.code);
185                }
186                let directives: Vec<Directive> = imported_extracted
187                    .directives
188                    .iter()
189                    .map(|d| Directive {
190                        name: d.name.clone(),
191                        value: d.value.clone(),
192                        span: d.span,
193                    })
194                    .collect();
195                (code, directives, first_line, first_offset)
196            };
197
198        let mut imported_lexer =
199            compiler::lexer::Lexer::new(&imported_code, imported_line, imported_offset);
200        if let Ok(imported_tokens) = imported_lexer.tokenize() {
201            let mut imported_parser = compiler::parser::Parser::new(imported_tokens);
202            if let Ok(imported_program) = imported_parser.parse_program(imported_directives) {
203                if let Ok(imported_symbols) = compiler::resolve::resolve(&imported_program) {
204                    // Import the requested symbols
205                    match &import.names {
206                        ImportList::Wildcard => {
207                            // Import all top-level definitions
208                            for (name, info) in imported_symbols.cells {
209                                base_symbols.import_cell(name, info);
210                            }
211                            for (name, info) in imported_symbols.types {
212                                base_symbols.import_type(name, info);
213                            }
214                            for (name, type_expr) in imported_symbols.type_aliases {
215                                base_symbols.import_type_alias(name, type_expr);
216                            }
217                        }
218                        ImportList::Names(names) => {
219                            for import_name in names {
220                                let symbol_name = &import_name.name;
221                                let local_name = import_name.alias.as_ref().unwrap_or(symbol_name);
222
223                                // Try to find the symbol in cells, types, or type aliases
224                                let mut found = false;
225
226                                if let Some(cell_info) = imported_symbols.cells.get(symbol_name) {
227                                    base_symbols.import_cell(local_name.clone(), cell_info.clone());
228                                    found = true;
229                                }
230
231                                if let Some(type_info) = imported_symbols.types.get(symbol_name) {
232                                    base_symbols.import_type(local_name.clone(), type_info.clone());
233                                    found = true;
234                                }
235
236                                if let Some(type_expr) =
237                                    imported_symbols.type_aliases.get(symbol_name)
238                                {
239                                    base_symbols
240                                        .import_type_alias(local_name.clone(), type_expr.clone());
241                                    found = true;
242                                }
243
244                                if !found {
245                                    import_errors.push(
246                                        compiler::resolve::ResolveError::ImportedSymbolNotFound {
247                                            symbol: symbol_name.clone(),
248                                            module: module_path.clone(),
249                                            line: import_name.span.line,
250                                        },
251                                    );
252                                }
253                            }
254                        }
255                    }
256                }
257            }
258        }
259    }
260
261    if !import_errors.is_empty() {
262        return Err(CompileError::Resolve(import_errors));
263    }
264
265    // 7. Resolve with imported symbols pre-populated
266    // Use resolve_with_base so imported symbols are available during resolution
267    let symbols = compiler::resolve::resolve_with_base(&program, base_symbols)
268        .map_err(CompileError::Resolve)?;
269
270    // 8. Typecheck
271    compiler::typecheck::typecheck(&program, &symbols).map_err(CompileError::Type)?;
272
273    // 9. Validate constraints
274    compiler::constraints::validate_constraints(&program).map_err(CompileError::Constraint)?;
275
276    // 10. Lower to LIR
277    let mut module = compiler::lower::lower(&program, &symbols, source);
278
279    // 11. Merge imported modules
280    for imported_module in imported_modules {
281        module.merge(&imported_module);
282    }
283
284    Ok(module)
285}
286
287/// Compile raw .lm source with access to external modules for import resolution.
288///
289/// The `resolve_import` callback takes a module path (e.g., "mathlib") and returns
290/// the source content of that module if it exists, or None if not found.
291pub fn compile_raw_with_imports(
292    source: &str,
293    resolve_import: &dyn Fn(&str) -> Option<String>,
294) -> Result<LirModule, CompileError> {
295    let mut compilation_stack = HashSet::new();
296    compile_raw_with_imports_internal(source, resolve_import, &mut compilation_stack, None)
297}
298
299/// Internal implementation for raw source compilation with imports
300fn compile_raw_with_imports_internal(
301    source: &str,
302    resolve_import: &dyn Fn(&str) -> Option<String>,
303    compilation_stack: &mut HashSet<String>,
304    _current_module: Option<&str>,
305) -> Result<LirModule, CompileError> {
306    if source.trim().is_empty() {
307        return Ok(LirModule::new("sha256:empty".to_string()));
308    }
309
310    // 1. Lex (start at line 1, offset 0)
311    let mut lexer = compiler::lexer::Lexer::new(source, 1, 0);
312    let tokens = lexer.tokenize()?;
313
314    // 2. Parse (no directives for raw source)
315    let mut parser = compiler::parser::Parser::new(tokens);
316    let (program, parse_errors) = parser.parse_program_with_recovery(vec![]);
317    if !parse_errors.is_empty() {
318        return Err(CompileError::Parse(parse_errors));
319    }
320
321    // 3. Process imports before resolution
322    let mut base_symbols = SymbolTable::new();
323    let mut import_errors = Vec::new();
324    let mut imported_modules: Vec<LirModule> = Vec::new();
325
326    // Collect all imports
327    let imports: Vec<&ImportDecl> = program
328        .items
329        .iter()
330        .filter_map(|item| {
331            if let Item::Import(imp) = item {
332                Some(imp)
333            } else {
334                None
335            }
336        })
337        .collect();
338
339    // Process each import
340    for import in imports {
341        let module_path = import.path.join(".");
342
343        // Check for circular imports
344        if compilation_stack.contains(&module_path) {
345            let chain: Vec<String> = compilation_stack.iter().cloned().collect();
346            let chain_str = format!("{} -> {}", chain.join(" -> "), module_path);
347            import_errors.push(compiler::resolve::ResolveError::CircularImport {
348                module: module_path.clone(),
349                chain: chain_str,
350            });
351            continue;
352        }
353
354        // Resolve the module source
355        let imported_source = match resolve_import(&module_path) {
356            Some(src) => src,
357            None => {
358                import_errors.push(compiler::resolve::ResolveError::ModuleNotFound {
359                    module: module_path.clone(),
360                    line: import.span.line,
361                });
362                continue;
363            }
364        };
365
366        // Track this module in the compilation stack
367        compilation_stack.insert(module_path.clone());
368
369        // Recursively compile the imported module through the markdown pipeline,
370        // which also handles unfenced source.
371        let imported_module = compile_with_imports_internal(
372            &imported_source,
373            resolve_import,
374            compilation_stack,
375            Some(&module_path),
376        )?;
377
378        // Remove from stack after compilation
379        compilation_stack.remove(&module_path);
380
381        // Keep the compiled module for later merging
382        imported_modules.push(imported_module);
383
384        // Extract symbols from the imported module by parsing it as markdown if it has
385        // fenced lumen blocks, otherwise as raw source.
386        let imported_extracted = markdown::extract::extract_blocks(&imported_source);
387        let (imported_code, imported_directives, imported_line, imported_offset) =
388            if imported_extracted.code_blocks.is_empty() {
389                (imported_source.clone(), vec![], 1, 0)
390            } else {
391                let mut code = String::new();
392                let mut first_line = 1;
393                let mut first_offset = 0;
394                for (i, block) in imported_extracted.code_blocks.iter().enumerate() {
395                    if i == 0 {
396                        first_line = block.code_start_line;
397                        first_offset = block.code_offset;
398                    }
399                    if !code.is_empty() {
400                        code.push('\n');
401                    }
402                    code.push_str(&block.code);
403                }
404                let directives: Vec<Directive> = imported_extracted
405                    .directives
406                    .iter()
407                    .map(|d| Directive {
408                        name: d.name.clone(),
409                        value: d.value.clone(),
410                        span: d.span,
411                    })
412                    .collect();
413                (code, directives, first_line, first_offset)
414            };
415
416        let mut imported_lexer =
417            compiler::lexer::Lexer::new(&imported_code, imported_line, imported_offset);
418        if let Ok(imported_tokens) = imported_lexer.tokenize() {
419            let mut imported_parser = compiler::parser::Parser::new(imported_tokens);
420            if let Ok(imported_program) = imported_parser.parse_program(imported_directives) {
421                if let Ok(imported_symbols) = compiler::resolve::resolve(&imported_program) {
422                    // Import the requested symbols
423                    match &import.names {
424                        ImportList::Wildcard => {
425                            // Import all top-level definitions
426                            for (name, info) in imported_symbols.cells {
427                                base_symbols.import_cell(name, info);
428                            }
429                            for (name, info) in imported_symbols.types {
430                                base_symbols.import_type(name, info);
431                            }
432                            for (name, type_expr) in imported_symbols.type_aliases {
433                                base_symbols.import_type_alias(name, type_expr);
434                            }
435                        }
436                        ImportList::Names(names) => {
437                            for import_name in names {
438                                let symbol_name = &import_name.name;
439                                let local_name = import_name.alias.as_ref().unwrap_or(symbol_name);
440
441                                // Try to find the symbol in cells, types, or type aliases
442                                let mut found = false;
443
444                                if let Some(cell_info) = imported_symbols.cells.get(symbol_name) {
445                                    base_symbols.import_cell(local_name.clone(), cell_info.clone());
446                                    found = true;
447                                }
448
449                                if let Some(type_info) = imported_symbols.types.get(symbol_name) {
450                                    base_symbols.import_type(local_name.clone(), type_info.clone());
451                                    found = true;
452                                }
453
454                                if let Some(type_expr) =
455                                    imported_symbols.type_aliases.get(symbol_name)
456                                {
457                                    base_symbols
458                                        .import_type_alias(local_name.clone(), type_expr.clone());
459                                    found = true;
460                                }
461
462                                if !found {
463                                    import_errors.push(
464                                        compiler::resolve::ResolveError::ImportedSymbolNotFound {
465                                            symbol: symbol_name.clone(),
466                                            module: module_path.clone(),
467                                            line: import_name.span.line,
468                                        },
469                                    );
470                                }
471                            }
472                        }
473                    }
474                }
475            }
476        }
477    }
478
479    if !import_errors.is_empty() {
480        return Err(CompileError::Resolve(import_errors));
481    }
482
483    // 4. Resolve with imported symbols pre-populated
484    let symbols = compiler::resolve::resolve_with_base(&program, base_symbols)
485        .map_err(CompileError::Resolve)?;
486
487    // 5. Typecheck
488    compiler::typecheck::typecheck(&program, &symbols).map_err(CompileError::Type)?;
489
490    // 6. Validate constraints
491    compiler::constraints::validate_constraints(&program).map_err(CompileError::Constraint)?;
492
493    // 7. Lower to LIR
494    let mut module = compiler::lower::lower(&program, &symbols, source);
495
496    // 8. Merge imported modules
497    for imported_module in imported_modules {
498        module.merge(&imported_module);
499    }
500
501    Ok(module)
502}
503
504/// Compile a `.lm` raw Lumen source file to a LIR module.
505/// This skips markdown extraction and processes the source directly.
506pub fn compile_raw(source: &str) -> Result<LirModule, CompileError> {
507    if source.trim().is_empty() {
508        return Ok(LirModule::new("sha256:empty".to_string()));
509    }
510
511    // 1. Lex (start at line 1, offset 0)
512    let mut lexer = compiler::lexer::Lexer::new(source, 1, 0);
513    let tokens = lexer.tokenize()?;
514
515    // 2. Parse (no directives for raw source)
516    let mut parser = compiler::parser::Parser::new(tokens);
517    let (program, parse_errors) = parser.parse_program_with_recovery(vec![]);
518    if !parse_errors.is_empty() {
519        return Err(CompileError::Parse(parse_errors));
520    }
521
522    // 3. Resolve
523    let symbols = compiler::resolve::resolve(&program).map_err(CompileError::Resolve)?;
524
525    // 4. Typecheck
526    compiler::typecheck::typecheck(&program, &symbols).map_err(CompileError::Type)?;
527
528    // 5. Validate constraints
529    compiler::constraints::validate_constraints(&program).map_err(CompileError::Constraint)?;
530
531    // 6. Lower to LIR
532    let module = compiler::lower::lower(&program, &symbols, source);
533
534    Ok(module)
535}
536
537pub fn compile(source: &str) -> Result<LirModule, CompileError> {
538    // 1. Extract Markdown blocks
539    let extracted = markdown::extract::extract_blocks(source);
540
541    // 2. Build directives
542    let directives: Vec<Directive> = extracted
543        .directives
544        .iter()
545        .map(|d| Directive {
546            name: d.name.clone(),
547            value: d.value.clone(),
548            span: d.span,
549        })
550        .collect();
551
552    // 3. Concatenate all code blocks preserving line numbers
553    let mut full_code = String::new();
554    let mut current_line = 1;
555
556    for block in extracted.code_blocks.iter() {
557        while current_line < block.code_start_line {
558            full_code.push('\n');
559            current_line += 1;
560        }
561        full_code.push_str(&block.code);
562        let lines_in_block = block.code.chars().filter(|&c| c == '\n').count();
563        current_line += lines_in_block;
564    }
565
566    if full_code.trim().is_empty() {
567        return Ok(LirModule::new("sha256:empty".to_string()));
568    }
569
570    // 4. Lex
571    let mut lexer = compiler::lexer::Lexer::new(&full_code, 1, 0);
572    let tokens = lexer.tokenize()?;
573
574    // 5. Parse
575    let mut parser = compiler::parser::Parser::new(tokens);
576    let (program, parse_errors) = parser.parse_program_with_recovery(directives);
577    if !parse_errors.is_empty() {
578        return Err(CompileError::Parse(parse_errors));
579    }
580
581    // 6. Resolve
582    let symbols = compiler::resolve::resolve(&program).map_err(CompileError::Resolve)?;
583
584    // 7. Typecheck
585    compiler::typecheck::typecheck(&program, &symbols).map_err(CompileError::Type)?;
586
587    // 8. Validate constraints
588    compiler::constraints::validate_constraints(&program).map_err(CompileError::Constraint)?;
589
590    // 9. Lower to LIR
591    let module = compiler::lower::lower(&program, &symbols, source);
592
593    Ok(module)
594}
595
596/// Format a compile error with rich diagnostics (colors, source snippets, suggestions).
597///
598/// This is a convenience function that wraps `diagnostics::format_compile_error`
599/// and renders all diagnostics with ANSI colors for terminal display.
600pub fn format_error(error: &CompileError, source: &str, filename: &str) -> String {
601    diagnostics::format_compile_error(error, source, filename)
602        .iter()
603        .map(|d| d.render_ansi())
604        .collect::<Vec<_>>()
605        .join("\n")
606}
607
608#[cfg(test)]
609mod tests {
610    use super::*;
611
612    #[test]
613    fn test_compile_simple() {
614        let src = r#"# Test
615
616```lumen
617cell main() -> Int
618  return 42
619end
620```
621"#;
622        let module = compile(src).unwrap();
623        assert_eq!(module.cells.len(), 1);
624        assert_eq!(module.cells[0].name, "main");
625    }
626
627    #[test]
628    fn test_compile_with_record() {
629        let src = r#"# Test
630
631```lumen
632record Point
633  x: Int
634  y: Int
635end
636```
637
638```lumen
639cell origin() -> Point
640  return Point(x: 0, y: 0)
641end
642```
643"#;
644        let module = compile(src).unwrap();
645        assert_eq!(module.types.len(), 1);
646        assert_eq!(module.cells.len(), 1);
647    }
648
649    #[test]
650    fn test_compile_full_example() {
651        let src = r#"@lumen 1
652@package "test"
653
654# Hello World
655
656```lumen
657record Greeting
658  message: String
659end
660```
661
662```lumen
663cell greet(name: String) -> Greeting
664  let msg = "Hello, " + name
665  return Greeting(message: msg)
666end
667```
668"#;
669        let module = compile(src).unwrap();
670        assert_eq!(module.types.len(), 1);
671        assert_eq!(module.cells.len(), 1);
672        assert_eq!(module.version, "1.0.0");
673    }
674
675    #[test]
676    fn test_compile_raw_collects_multiple_parse_errors() {
677        let src = r#"
678cell bad1() -> Int
679  let x =
680  return 1
681end
682
683cell bad2(param Int) -> Int
684  return param
685end
686
687record Broken
688  x:
689end
690
691cell bad3() -> Int
692  return
693end
694"#;
695
696        let err = compile_raw(src).expect_err("expected parse errors");
697        match err {
698            CompileError::Parse(errors) => {
699                assert!(
700                    errors.len() >= 3,
701                    "expected at least 3 parse errors, got {}",
702                    errors.len()
703                );
704            }
705            other => panic!("expected parse errors, got {:?}", other),
706        }
707    }
708
709    #[test]
710    fn test_compile_markdown_collects_multiple_parse_errors() {
711        let src = r#"# Broken
712
713```lumen
714cell bad1() -> Int
715  let x =
716  return 1
717end
718
719cell bad2(param Int) -> Int
720  return param
721end
722
723record Broken
724  x:
725end
726
727cell bad3() -> Int
728  return
729end
730```
731"#;
732
733        let err = compile(src).expect_err("expected parse errors");
734        match err {
735            CompileError::Parse(errors) => {
736                assert!(
737                    errors.len() >= 3,
738                    "expected at least 3 parse errors, got {}",
739                    errors.len()
740                );
741            }
742            other => panic!("expected parse errors, got {:?}", other),
743        }
744    }
745}