Skip to main content

mimium_lang/ast/
program.rs

1use std::collections::{HashMap, HashSet};
2use std::path::{Path, PathBuf};
3
4use serde::{Deserialize, Serialize};
5
6use super::resolve_include::resolve_include;
7use super::statement::Statement;
8use crate::ast::Expr;
9use crate::ast::statement::into_then_expr;
10use crate::interner::{ExprNodeId, Symbol, ToSymbol, TypeNodeId};
11use crate::pattern::TypedId;
12use crate::types::{PType, RecordTypeField, Type};
13use crate::utils::error::{ReportableError, SimpleError};
14use crate::utils::metadata::{Location, Span};
15
16use super::StageKind;
17
18/// Visibility modifier for module members
19#[derive(Clone, Debug, PartialEq, Default, Serialize, Deserialize)]
20pub enum Visibility {
21    #[default]
22    Private,
23    Public,
24}
25
26/// Qualified path for module references (e.g., modA::modB::func)
27#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
28pub struct QualifiedPath {
29    pub segments: Vec<Symbol>,
30}
31
32impl QualifiedPath {
33    pub fn new(segments: Vec<Symbol>) -> Self {
34        Self { segments }
35    }
36
37    pub fn single(name: Symbol) -> Self {
38        Self {
39            segments: vec![name],
40        }
41    }
42}
43
44/// Target of a use statement: single symbol, multiple symbols, or wildcard
45#[derive(Clone, Debug, PartialEq)]
46pub enum UseTarget {
47    /// Single import: `use foo::bar`
48    Single,
49    /// Multiple imports: `use foo::{bar, baz}`
50    Multiple(Vec<Symbol>),
51    /// Wildcard import: `use foo::*`
52    Wildcard,
53}
54
55/// A variant definition for user-defined sum types
56#[derive(Clone, Debug, PartialEq)]
57pub struct VariantDef {
58    /// The name of the variant constructor
59    pub name: Symbol,
60    /// Optional payload type for the variant
61    pub payload: Option<TypeNodeId>,
62}
63
64impl VariantDef {
65    pub fn new(name: Symbol, payload: Option<TypeNodeId>) -> Self {
66        Self { name, payload }
67    }
68}
69
70#[derive(Clone, Debug, PartialEq)]
71pub enum ProgramStatement {
72    FnDefinition {
73        visibility: Visibility,
74        name: Symbol,
75        args: (Vec<TypedId>, Location),
76        return_type: Option<TypeNodeId>,
77        body: ExprNodeId,
78    },
79    StageDeclaration {
80        stage: StageKind,
81    },
82    GlobalStatement(Statement),
83    Import(Symbol),
84    /// Module definition: mod name { ... } (inline) or mod name; (external file)
85    ModuleDefinition {
86        visibility: Visibility,
87        name: Symbol,
88        /// Body of the module. None means external file module (mod foo;)
89        body: Option<Vec<(ProgramStatement, Span)>>,
90    },
91    /// Use statement: `use path::to::item`, `use path::{a, b}`, or `use path::*`
92    /// Can be prefixed with `pub` for re-exporting
93    UseStatement {
94        /// Visibility of the re-export (pub use for re-exporting)
95        visibility: Visibility,
96        /// The base path (for `use foo::bar`, this is `[foo, bar]`; for `use foo::{a, b}`, this is `[foo]`)
97        path: QualifiedPath,
98        /// The import target type
99        target: UseTarget,
100    },
101    /// Type alias: type Alias = BaseType
102    TypeAlias {
103        visibility: Visibility,
104        name: Symbol,
105        target_type: crate::interner::TypeNodeId,
106    },
107    /// Type declaration: type Name = Variant1 | Variant2 | ...
108    TypeDeclaration {
109        visibility: Visibility,
110        name: Symbol,
111        variants: Vec<VariantDef>,
112        /// Whether this type was declared with `type rec` (allows recursive references)
113        is_recursive: bool,
114    },
115    Comment(Symbol),
116    DocComment(Symbol),
117    Error,
118}
119
120/// Information about a type declaration, including its variants and recursion flag
121#[derive(Clone, Debug, PartialEq)]
122pub struct TypeDeclInfo {
123    pub variants: Vec<VariantDef>,
124    /// Whether this type was declared with `type rec` (allows recursive references)
125    pub is_recursive: bool,
126}
127
128/// Map from type name to type declaration info
129pub type TypeDeclarationMap = HashMap<Symbol, TypeDeclInfo>;
130/// Map from type alias name to target type
131pub type TypeAliasMap = HashMap<Symbol, crate::interner::TypeNodeId>;
132
133#[derive(Clone, Debug, PartialEq, Default)]
134pub struct Program {
135    pub statements: Vec<(ProgramStatement, Span)>,
136}
137
138/// Convert a qualified path to a mangled symbol name.
139/// For example, `foo::bar::baz` becomes `foo$bar$baz`.
140fn mangle_qualified_name(prefix: &[Symbol], name: Symbol) -> Symbol {
141    use crate::interner::ToSymbol;
142    if prefix.is_empty() {
143        name
144    } else {
145        let path_str = prefix
146            .iter()
147            .map(|s| s.as_str())
148            .collect::<Vec<_>>()
149            .join("$");
150        format!("{}${}", path_str, name.as_str()).to_symbol()
151    }
152}
153
154fn is_reserved_type_param_name(name: Symbol) -> bool {
155    let s = name.as_str();
156    s.len() == 1 && s.as_bytes()[0].is_ascii_lowercase()
157}
158
159/// Convert a full qualified path (all segments) to a mangled symbol name.
160/// For example, `[foo, bar, baz]` becomes `foo$bar$baz`.
161fn mangle_qualified_path(segments: &[Symbol]) -> Symbol {
162    use crate::interner::ToSymbol;
163    segments
164        .iter()
165        .map(|s| s.as_str())
166        .collect::<Vec<_>>()
167        .join("$")
168        .to_symbol()
169}
170
171/// Resolve an external file module (`mod foo;` syntax).
172/// Looks for `{name}.mmm` in the same directory as the current file.
173fn resolve_external_module(
174    name: Symbol,
175    file_path: &Path,
176    span: Span,
177    errs: &mut Vec<Box<dyn ReportableError>>,
178    module_prefix: &[Symbol],
179    module_info: &mut ModuleInfo,
180) -> Vec<(Statement, Location)> {
181    let module_filename = format!("{}.mmm", name.as_str());
182    let (imported, mut new_errs) =
183        resolve_include(file_path.to_str().unwrap(), &module_filename, span);
184    errs.append(&mut new_errs);
185
186    // Process imported program with the module prefix
187    stmts_from_program_with_prefix(
188        imported.program.statements,
189        imported.resolved_path,
190        errs,
191        module_prefix,
192        module_info,
193    )
194}
195
196/// Map from mangled symbol name to whether it's public.
197/// Only contains entries for module members (not top-level definitions).
198pub type VisibilityMap = HashMap<Symbol, bool>;
199
200/// Map from alias name to the mangled name it refers to.
201/// Created from `use` statements, e.g., `use foo::bar` creates `bar -> foo$bar`.
202pub type UseAliasMap = HashMap<Symbol, Symbol>;
203
204/// Map from mangled function name to its module context (prefix).
205/// Used for relative path resolution within modules.
206pub type ModuleContextMap = HashMap<Symbol, Vec<Symbol>>;
207
208/// Module-related information collected during parsing.
209/// Contains visibility information for module members and use aliases.
210#[derive(Clone, Debug, Default)]
211pub struct ModuleInfo {
212    /// Map from mangled symbol name to whether it's public (only for module members)
213    pub visibility_map: VisibilityMap,
214    /// Map from alias name to mangled name (from use statements)
215    pub use_alias_map: UseAliasMap,
216    /// Map from mangled function name to its module context (for relative path resolution)
217    pub module_context_map: ModuleContextMap,
218    /// List of wildcard import base paths (e.g., `use foo::*` stores "foo")
219    pub wildcard_imports: Vec<Symbol>,
220    /// Type declarations for user-defined sum types
221    pub type_declarations: TypeDeclarationMap,
222    /// Type aliases for simple type aliases
223    pub type_aliases: TypeAliasMap,
224    /// Loaded external modules to avoid duplicate loading when resolving `use` statements
225    pub loaded_external_modules: HashSet<Symbol>,
226}
227
228impl ModuleInfo {
229    pub fn new() -> Self {
230        Self::default()
231    }
232}
233
234/// Resolve a qualified path, trying relative resolution from current module context.
235/// Returns `(mangled_name, resolved_path_segments)`.
236///
237/// For example, if current context is `[outer]` and path is `[inner, secret]`:
238/// 1. First tries `inner$secret` (absolute path)
239/// 2. If not found, tries `outer$inner$secret` (relative path from current module)
240///
241/// The `exists` closure should return `true` if the given mangled name exists in the environment.
242pub fn resolve_qualified_path<F>(
243    path_segments: &[Symbol],
244    absolute_mangled: Symbol,
245    current_module_context: &[Symbol],
246    exists: F,
247) -> (Symbol, Vec<Symbol>)
248where
249    F: Fn(&Symbol) -> bool,
250{
251    // First, try the absolute path
252    if exists(&absolute_mangled) {
253        return (absolute_mangled, path_segments.to_vec());
254    }
255
256    // If not found and we have a module context, try relative path
257    if !current_module_context.is_empty() {
258        // Build the relative path: context + path_segments
259        let mut relative_path = current_module_context.to_vec();
260        relative_path.extend(path_segments.iter().copied());
261
262        let relative_mangled = relative_path
263            .iter()
264            .map(|s| s.as_str())
265            .collect::<Vec<_>>()
266            .join("$")
267            .to_symbol();
268
269        if exists(&relative_mangled) {
270            return (relative_mangled, relative_path);
271        }
272    }
273
274    // Return absolute path if relative resolution failed
275    (absolute_mangled, path_segments.to_vec())
276}
277
278fn stmts_from_program(
279    program: Program,
280    file_path: PathBuf,
281    errs: &mut Vec<Box<dyn ReportableError>>,
282    module_info: &mut ModuleInfo,
283) -> Vec<(Statement, Location)> {
284    stmts_from_program_with_prefix(program.statements, file_path, errs, &[], module_info)
285}
286
287fn stmts_from_program_with_prefix(
288    statements: Vec<(ProgramStatement, Span)>,
289    file_path: PathBuf,
290    errs: &mut Vec<Box<dyn ReportableError>>,
291    module_prefix: &[Symbol],
292    module_info: &mut ModuleInfo,
293) -> Vec<(Statement, Location)> {
294    // Track the current stage so that module/use wrappers can restore it correctly.
295    let mut current_stage = StageKind::Main;
296    let mut result = Vec::new();
297
298    for (stmt, span) in statements {
299        let stmts: Option<Vec<(Statement, Location)>> = match stmt {
300            ProgramStatement::FnDefinition {
301                visibility,
302                name,
303                args,
304                return_type,
305                body,
306            } => {
307                let loc = Location::new(span, file_path.clone());
308                let argloc = args.1.clone();
309                let argsty = args
310                    .clone()
311                    .0
312                    .into_iter()
313                    .map(RecordTypeField::from)
314                    .collect::<Vec<_>>();
315                let argty = match argsty.as_slice() {
316                    [] => Type::Primitive(PType::Unit).into_id_with_location(argloc.clone()),
317                    [arg] => arg.ty,
318                    _ => Type::Record(argsty).into_id_with_location(argloc),
319                };
320                let fnty = Type::Function {
321                    arg: argty,
322                    ret: return_type.unwrap_or(Type::Unknown.into_id_with_location(loc.clone())),
323                }
324                .into_id_with_location(loc.clone());
325                // Use mangled name if inside a module
326                let mangled_name = mangle_qualified_name(module_prefix, name);
327                // Track visibility for all functions (both module members and top-level)
328                module_info
329                    .visibility_map
330                    .insert(mangled_name, visibility == Visibility::Public);
331                // Track module context for relative path resolution
332                if !module_prefix.is_empty() {
333                    module_info
334                        .module_context_map
335                        .insert(mangled_name, module_prefix.to_vec());
336                }
337                Some(vec![(
338                    Statement::LetRec(
339                        TypedId::new(mangled_name, fnty),
340                        Expr::Lambda(args.0, return_type, body).into_id(loc.clone()),
341                    ),
342                    loc,
343                )])
344            }
345            ProgramStatement::GlobalStatement(statement) => {
346                if !module_prefix.is_empty() {
347                    collect_statement_bindings(&statement)
348                        .into_iter()
349                        .for_each(|name| {
350                            module_info
351                                .module_context_map
352                                .insert(name, module_prefix.to_vec());
353                        });
354                }
355                Some(vec![(statement, Location::new(span, file_path.clone()))])
356            }
357            ProgramStatement::Comment(_) | ProgramStatement::DocComment(_) => None,
358            ProgramStatement::Import(filename) => {
359                let (imported, mut new_errs) =
360                    resolve_include(file_path.to_str().unwrap(), filename.as_str(), span.clone());
361                errs.append(&mut new_errs);
362                let res =
363                    stmts_from_program(imported.program, imported.resolved_path, errs, module_info);
364                Some(res)
365            }
366            ProgramStatement::StageDeclaration { stage } => {
367                current_stage = stage.clone();
368                Some(vec![(
369                    Statement::DeclareStage(stage),
370                    Location::new(span, file_path.clone()),
371                )])
372            }
373            ProgramStatement::ModuleDefinition {
374                visibility: _,
375                name,
376                body,
377            } => {
378                let module_symbol = mangle_qualified_name(module_prefix, name);
379                module_info.loaded_external_modules.insert(module_symbol);
380
381                // Flatten module contents with qualified names
382                let mut new_prefix = module_prefix.to_vec();
383                new_prefix.push(name);
384
385                let inner_stmts = match body {
386                    Some(inline_body) => {
387                        // Inline module: mod foo { ... }
388                        stmts_from_program_with_prefix(
389                            inline_body,
390                            file_path.clone(),
391                            errs,
392                            &new_prefix,
393                            module_info,
394                        )
395                    }
396                    None => {
397                        // External file module: mod foo;
398                        resolve_external_module(
399                            name,
400                            &file_path,
401                            span.clone(),
402                            errs,
403                            &new_prefix,
404                            module_info,
405                        )
406                    }
407                };
408
409                // Wrap module contents with stage boundary:
410                // - Start with #stage(main) to isolate module from consumer's stage context
411                // - End with the consumer's current stage to restore it after module processing
412                let module_loc = Location::new(span, file_path.clone());
413                let start_decl = (Statement::DeclareStage(StageKind::Main), module_loc.clone());
414                let restore_decl = (Statement::DeclareStage(current_stage.clone()), module_loc);
415                let result = [vec![start_decl], inner_stmts, vec![restore_decl]].concat();
416
417                Some(result)
418            }
419            ProgramStatement::UseStatement {
420                visibility,
421                path,
422                target,
423            } => {
424                let imported_stmts = if let Some(base_module) = path.segments.first().copied() {
425                    let local_module_symbol = mangle_qualified_name(module_prefix, base_module);
426                    let absolute_module_symbol = mangle_qualified_name(&[], base_module);
427                    if module_info
428                        .loaded_external_modules
429                        .contains(&local_module_symbol)
430                        || module_info
431                            .loaded_external_modules
432                            .contains(&absolute_module_symbol)
433                    {
434                        vec![]
435                    } else {
436                        module_info
437                            .loaded_external_modules
438                            .insert(absolute_module_symbol);
439                        let new_prefix = vec![base_module];
440                        let inner_stmts = resolve_external_module(
441                            base_module,
442                            &file_path,
443                            span.clone(),
444                            errs,
445                            &new_prefix,
446                            module_info,
447                        );
448                        let module_loc = Location::new(span.clone(), file_path.clone());
449                        let start_decl =
450                            (Statement::DeclareStage(StageKind::Main), module_loc.clone());
451                        let restore_decl =
452                            (Statement::DeclareStage(current_stage.clone()), module_loc);
453                        [vec![start_decl], inner_stmts, vec![restore_decl]].concat()
454                    }
455                } else {
456                    vec![]
457                };
458
459                process_use_statement(&visibility, &path, &target, module_prefix, module_info);
460                (!imported_stmts.is_empty()).then_some(imported_stmts)
461            }
462            ProgramStatement::TypeAlias {
463                visibility,
464                name,
465                target_type,
466            } => {
467                if is_reserved_type_param_name(name) {
468                    errs.push(Box::new(SimpleError {
469                        message: format!(
470                            "type name '{}' is reserved for explicit type parameters (single lowercase letter)",
471                            name.as_str()
472                        ),
473                        span: Location::new(span.clone(), file_path.clone()),
474                    }));
475                    continue;
476                }
477                // Store type alias for later use in type environment
478                let mangled_name = mangle_qualified_name(module_prefix, name);
479                module_info.type_aliases.insert(mangled_name, target_type);
480                // Track visibility for all type aliases (both module members and top-level)
481                module_info
482                    .visibility_map
483                    .insert(mangled_name, visibility == Visibility::Public);
484                // Track module context for relative path resolution
485                if !module_prefix.is_empty() {
486                    module_info
487                        .module_context_map
488                        .insert(mangled_name, module_prefix.to_vec());
489                }
490                None
491            }
492            ProgramStatement::TypeDeclaration {
493                visibility,
494                name,
495                variants,
496                is_recursive,
497            } => {
498                if is_reserved_type_param_name(name) {
499                    errs.push(Box::new(SimpleError {
500                        message: format!(
501                            "type name '{}' is reserved for explicit type parameters (single lowercase letter)",
502                            name.as_str()
503                        ),
504                        span: Location::new(span.clone(), file_path.clone()),
505                    }));
506                    continue;
507                }
508                // Store type declaration for later use in type environment
509                let mangled_name = mangle_qualified_name(module_prefix, name);
510                module_info.type_declarations.insert(
511                    mangled_name,
512                    TypeDeclInfo {
513                        variants,
514                        is_recursive,
515                    },
516                );
517                // Track visibility for type declarations
518                module_info
519                    .visibility_map
520                    .insert(mangled_name, visibility == Visibility::Public);
521                if !module_prefix.is_empty() {
522                    // Track module context for relative path resolution
523                    module_info
524                        .module_context_map
525                        .insert(mangled_name, module_prefix.to_vec());
526                }
527                None
528            }
529            ProgramStatement::Error => Some(vec![(
530                Statement::Error,
531                Location::new(span, file_path.clone()),
532            )]),
533        };
534        if let Some(stmts) = stmts {
535            result.extend(stmts);
536        }
537    }
538    result
539}
540
541fn collect_pattern_bindings(pat: &crate::pattern::Pattern, out: &mut Vec<Symbol>) {
542    match pat {
543        crate::pattern::Pattern::Single(name) => out.push(*name),
544        crate::pattern::Pattern::Tuple(items) => {
545            items.iter().for_each(|p| collect_pattern_bindings(p, out));
546        }
547        crate::pattern::Pattern::Record(fields) => {
548            fields
549                .iter()
550                .for_each(|(_, p)| collect_pattern_bindings(p, out));
551        }
552        crate::pattern::Pattern::Placeholder | crate::pattern::Pattern::Error => {}
553    }
554}
555
556fn collect_statement_bindings(stmt: &Statement) -> Vec<Symbol> {
557    match stmt {
558        Statement::Let(typed_pat, _) => {
559            let mut symbols = vec![];
560            collect_pattern_bindings(&typed_pat.pat, &mut symbols);
561            symbols
562        }
563        Statement::LetRec(id, _) => vec![id.id],
564        Statement::Single(expr) => collect_expr_bindings(*expr),
565        _ => vec![],
566    }
567}
568
569fn collect_expr_bindings(expr: ExprNodeId) -> Vec<Symbol> {
570    match expr.to_expr() {
571        Expr::Let(typed_pat, _body, then_opt) => {
572            let mut symbols = vec![];
573            collect_pattern_bindings(&typed_pat.pat, &mut symbols);
574            if let Some(then) = then_opt {
575                symbols.extend(collect_expr_bindings(then));
576            }
577            symbols
578        }
579        Expr::LetRec(id, _body, then_opt) => {
580            let mut symbols = vec![id.id];
581            if let Some(then) = then_opt {
582                symbols.extend(collect_expr_bindings(then));
583            }
584            symbols
585        }
586        _ => vec![],
587    }
588}
589
590/// Process a use statement, registering aliases in module_info.
591/// - Single: `use foo::bar` → alias `bar` → `foo$bar`
592/// - Multiple: `use foo::{a, b}` → alias `a` → `foo$a`, `b` → `foo$b`
593/// - Wildcard: `use foo::*` → import all public members from foo
594/// If visibility is Public, the imported names are re-exported as public.
595fn process_use_statement(
596    visibility: &Visibility,
597    path: &QualifiedPath,
598    target: &UseTarget,
599    module_prefix: &[Symbol],
600    module_info: &mut ModuleInfo,
601) {
602    let resolve_use_mangled = |segments: &[Symbol], info: &ModuleInfo| {
603        let absolute_mangled = mangle_qualified_path(segments);
604        let (resolved, _) =
605            resolve_qualified_path(segments, absolute_mangled, module_prefix, |name| {
606                info.visibility_map.contains_key(name)
607                    || info.use_alias_map.contains_key(name)
608                    || info.module_context_map.contains_key(name)
609                    || info.type_aliases.contains_key(name)
610                    || info.type_declarations.contains_key(name)
611            });
612        resolved
613    };
614
615    fn register_alias(
616        module_info: &mut ModuleInfo,
617        visibility: &Visibility,
618        module_prefix: &[Symbol],
619        alias_name: Symbol,
620        mangled: Symbol,
621    ) {
622        module_info.use_alias_map.insert(alias_name, mangled);
623
624        if *visibility == Visibility::Public {
625            let exported_name = mangle_qualified_name(module_prefix, alias_name);
626            module_info.visibility_map.insert(exported_name, true);
627            module_info.use_alias_map.insert(exported_name, mangled);
628        }
629    }
630
631    match target {
632        UseTarget::Single => {
633            // use foo::bar creates an alias: bar -> foo$bar
634            if let Some(alias_name) = path.segments.last().copied() {
635                let mangled = resolve_use_mangled(&path.segments, module_info);
636                register_alias(module_info, visibility, module_prefix, alias_name, mangled);
637            }
638        }
639        UseTarget::Multiple(names) => {
640            // use foo::{bar, baz} creates:
641            //   bar -> foo$bar
642            //   baz -> foo$baz
643            for name in names {
644                let mut full_path = path.segments.clone();
645                full_path.push(*name);
646                let mangled = resolve_use_mangled(&full_path, module_info);
647                register_alias(module_info, visibility, module_prefix, *name, mangled);
648            }
649        }
650        UseTarget::Wildcard => {
651            // use foo::* imports all public members from foo
652            // We need to defer this until we know all the public members
653            // For now, store the base path for later resolution
654            let base_mangled = if path.segments.is_empty() {
655                // use ::* at module level means import from parent (current prefix)
656                module_prefix
657                    .iter()
658                    .map(|s| s.as_str())
659                    .collect::<Vec<_>>()
660                    .join("$")
661            } else {
662                mangle_qualified_path(&path.segments).as_str().to_string()
663            };
664
665            // Store wildcard import for later resolution
666            // The key is the base path (e.g., "foo"), value is the prefix for resolving members
667            module_info.wildcard_imports.push(base_mangled.to_symbol());
668
669            // Note: For pub use foo::*, we can't know all exported names at this point.
670            // Wildcard re-exports would require a second pass or runtime resolution.
671            // For now, wildcard imports with pub are stored but individual re-exports
672            // need to be resolved later when the symbols are actually accessed.
673        }
674    }
675}
676
677pub(crate) fn expr_from_program(
678    program: Program,
679    file_path: PathBuf,
680) -> (ExprNodeId, ModuleInfo, Vec<Box<dyn ReportableError>>) {
681    let mut errs = vec![];
682    let mut module_info = ModuleInfo::new();
683    let stmts = stmts_from_program(program, file_path.clone(), &mut errs, &mut module_info);
684
685    let res = into_then_expr(stmts.as_slice()).unwrap_or(Expr::Error.into_id_without_span());
686
687    (res, module_info, errs)
688}