Skip to main content

aver/codegen/
mod.rs

1/// Aver → target language transpilation.
2///
3/// The codegen module transforms a type-checked Aver AST into source code
4/// for a target language. Current backends: Rust deployment and Lean proof export.
5pub(crate) mod builtin_helpers;
6pub(crate) mod builtin_records;
7pub(crate) mod builtins;
8pub mod common;
9#[cfg(feature = "runtime")]
10pub mod dafny;
11#[cfg(feature = "runtime")]
12pub mod lean;
13#[cfg(feature = "runtime")]
14pub mod recursion;
15#[cfg(feature = "runtime")]
16pub mod rust;
17#[cfg(feature = "wasip2")]
18pub mod wasip2;
19#[cfg(feature = "wasm-compile")]
20pub mod wasm_gc;
21
22use std::collections::{HashMap, HashSet};
23
24use crate::ast::{FnDef, TopLevel, TypeDef};
25use crate::types::checker::TypeCheckResult;
26
27/// Information about a dependent module loaded for codegen.
28pub struct ModuleInfo {
29    /// Qualified module path, e.g. "Models.User".
30    pub prefix: String,
31    /// Direct `depends [...]` entries from the source module.
32    pub depends: Vec<String>,
33    /// Type definitions from the module.
34    pub type_defs: Vec<TypeDef>,
35    /// Function definitions from the module (excluding `main`).
36    pub fn_defs: Vec<FnDef>,
37    /// IR-level analysis facts produced by the dep module's pipeline run
38    /// (`analyze` stage). `None` for modules loaded via paths that skip
39    /// the analyze stage (none in production today; left optional for
40    /// future ad-hoc loaders). Aver's module DAG invariant makes per-module
41    /// analysis sufficient — see `project_aver_module_dag` memory and
42    /// `src/ir/analyze.rs` for why cross-module SCCs are impossible.
43    pub analysis: Option<crate::ir::AnalysisResult>,
44}
45
46/// Collected context from the Aver program, shared across all backends.
47pub struct CodegenContext {
48    /// All top-level items (post-TCO transform, post-typecheck).
49    pub items: Vec<TopLevel>,
50    /// Function signatures: name → (param_types, return_type, effects).
51    pub fn_sigs: HashMap<String, (Vec<crate::types::Type>, crate::types::Type, Vec<String>)>,
52    /// Functions eligible for auto-memoization.
53    pub memo_fns: HashSet<String>,
54    /// Set of type names whose values are memo-safe.
55    pub memo_safe_types: HashSet<String>,
56    /// User-defined type definitions (for struct/enum generation).
57    pub type_defs: Vec<TypeDef>,
58    /// User-defined function definitions.
59    pub fn_defs: Vec<FnDef>,
60    /// Project/binary name.
61    pub project_name: String,
62    /// Dependent modules loaded for inlining.
63    pub modules: Vec<ModuleInfo>,
64    /// Set of module prefixes for qualified name resolution (e.g. "Models.User").
65    pub module_prefixes: HashSet<String>,
66    /// Embedded runtime policy from `aver.toml` for generated code.
67    #[cfg(feature = "runtime")]
68    pub policy: Option<crate::config::ProjectConfig>,
69    /// Emit generated scoped runtime support (replay and/or runtime-loaded policy).
70    pub emit_replay_runtime: bool,
71    /// Load runtime policy from the active module root instead of embedding it.
72    pub runtime_policy_from_env: bool,
73    /// Explicit guest entry boundary for scoped replay/policy.
74    pub guest_entry: Option<String>,
75    /// Emit extra generated helpers needed only by the cached self-host helper.
76    pub emit_self_host_support: bool,
77    /// Extra fn_defs visible during current module emission (not in `fn_defs` or `modules`).
78    /// Set temporarily by the Rust backend when emitting a dependent module so that
79    /// `find_fn_def_by_name` can resolve same-module calls.
80    pub extra_fn_defs: Vec<FnDef>,
81    /// Functions that are part of a mutual-TCO SCC group (emitted as trampoline + wrappers).
82    /// Functions NOT in this set but with TailCalls are emitted as plain self-TCO loops.
83    pub mutual_tco_members: HashSet<String>,
84    /// Functions that call themselves directly or transitively. Set-form
85    /// union of `entry_analysis.recursive_fns` plus each module's
86    /// `analysis.recursive_fns`. Used by codegen sites that previously
87    /// called `call_graph::find_recursive_fns` ad-hoc (Lean recursion
88    /// planning, type checker flow, etc.).
89    pub recursive_fns: HashSet<String>,
90    /// Per-fn analysis facts unioned from entry + every dep module's
91    /// `AnalysisResult.fn_analyses`. WASM emitter / VM compiler /
92    /// future inliner read `allocates`, `thin_kind`, `body_shape`,
93    /// `local_count`, etc. from here instead of recomputing.
94    pub fn_analyses: HashMap<String, crate::ir::FnAnalysis>,
95    /// Buffer-build sink fns (`List.prepend`/`reverse` builders consumed
96    /// by `String.join`). The Rust backend emits a `<fn>__buffered`
97    /// variant alongside each entry; the WASM backend rewrites bodies
98    /// to call `rt_buffer_*` helpers. Detection lives in `ir::buffer_build`.
99    pub buffer_build_sinks: HashMap<String, crate::ir::BufferBuildShape>,
100    /// Fusion sites detected for `String.join(<sink>(...), sep)` calls.
101    /// Each entry pairs an enclosing fn + line + sink fn name; the
102    /// emitter rewrites these call expressions to use buffered variants
103    /// in place of the producer + consumer chain.
104    pub buffer_fusion_sites: Vec<crate::ir::FusionSite>,
105    /// Synthesized `<fn>__buffered` variants for every buffer-build
106    /// sink, produced by `ir::synthesize_buffered_variants`. These are
107    /// real `FnDef`s with proper body AST; backends iterate over them
108    /// alongside `fn_defs` so they reach codegen through the same
109    /// pipeline (TCO / no-alloc / mutual-recursion all apply
110    /// identically). Empty when no sinks are detected.
111    pub synthesized_buffered_fns: Vec<FnDef>,
112}
113
114/// Output files from a codegen backend.
115pub struct ProjectOutput {
116    /// Files to write: (relative_path, content).
117    pub files: Vec<(String, String)>,
118}
119
120/// Build a CodegenContext from parsed + type-checked items.
121///
122/// `entry_analysis` is the `analyze` stage output for `items` (entry
123/// module). When provided, codegen reads `mutual_tco_members`,
124/// `recursive_fns`, and per-fn `FnAnalysis` from it instead of recomputing.
125/// Each `ModuleInfo` in `modules` carries its own per-module analysis;
126/// codegen unions the per-module sets to build a global view (sound
127/// under Aver's module DAG invariant — no cross-module SCCs possible,
128/// see `src/ir/analyze.rs` doc).
129pub fn build_context(
130    items: Vec<TopLevel>,
131    tc_result: &TypeCheckResult,
132    entry_analysis: Option<&crate::ir::AnalysisResult>,
133    memo_fns: HashSet<String>,
134    project_name: String,
135    modules: Vec<ModuleInfo>,
136) -> CodegenContext {
137    let type_defs: Vec<TypeDef> = items
138        .iter()
139        .filter_map(|item| {
140            if let TopLevel::TypeDef(td) = item {
141                Some(td.clone())
142            } else {
143                None
144            }
145        })
146        .collect();
147
148    let fn_defs: Vec<FnDef> = items
149        .iter()
150        .filter_map(|item| {
151            if let TopLevel::FnDef(fd) = item {
152                Some(fd.clone())
153            } else {
154                None
155            }
156        })
157        .collect();
158
159    let module_prefixes: HashSet<String> = modules.iter().map(|m| m.prefix.clone()).collect();
160
161    // Mutual-TCO membership unions per-module sets from the analyze stage
162    // (entry's `entry_analysis` + each dep module's `module.analysis`).
163    // Aver's module DAG invariant guarantees SCCs never span modules, so
164    // a per-module union is the correct global view — see
165    // `project_aver_module_dag` memory and `src/ir/analyze.rs` doc.
166    //
167    // Falls back to ad-hoc `tailcall_scc_components` per module when the
168    // analysis isn't supplied (callers that haven't migrated to the
169    // pipeline). The fallback path will go away once every entry point
170    // runs the canonical pipeline.
171    let mut mutual_tco_members: HashSet<String> = HashSet::new();
172    match entry_analysis {
173        Some(a) => mutual_tco_members.extend(a.mutual_tco_members.iter().cloned()),
174        None => {
175            let entry_fns: Vec<&FnDef> = fn_defs.iter().filter(|fd| fd.name != "main").collect();
176            for group in crate::call_graph::tailcall_scc_components(&entry_fns) {
177                if group.len() < 2 {
178                    continue;
179                }
180                for fd in group {
181                    mutual_tco_members.insert(fd.name.clone());
182                }
183            }
184        }
185    }
186    for module in &modules {
187        match module.analysis.as_ref() {
188            Some(a) => mutual_tco_members.extend(a.mutual_tco_members.iter().cloned()),
189            None => {
190                let mod_fns: Vec<&FnDef> = module.fn_defs.iter().collect();
191                for group in crate::call_graph::tailcall_scc_components(&mod_fns) {
192                    if group.len() < 2 {
193                        continue;
194                    }
195                    for fd in group {
196                        mutual_tco_members.insert(fd.name.clone());
197                    }
198                }
199            }
200        }
201    }
202
203    // Per-fn analysis dictionary — union of entry's `fn_analyses` plus
204    // each dep module's. Codegen reads `allocates`, `thin_kind`, etc.
205    // from here instead of recomputing.
206    let mut fn_analyses: HashMap<String, crate::ir::FnAnalysis> = HashMap::new();
207    if let Some(a) = entry_analysis {
208        for (name, fa) in &a.fn_analyses {
209            fn_analyses.insert(name.clone(), fa.clone());
210        }
211    }
212    for module in &modules {
213        if let Some(a) = module.analysis.as_ref() {
214            for (name, fa) in &a.fn_analyses {
215                fn_analyses
216                    .entry(name.clone())
217                    .or_insert_with(|| fa.clone());
218            }
219        }
220    }
221
222    // `recursive_fns` follows the same shape as `mutual_tco_members` —
223    // per-module sets unioned (Aver's module DAG keeps cross-module
224    // recursion from existing). Falls back to ad-hoc `find_recursive_fns`
225    // when a module's analysis is missing.
226    let mut recursive_fns: HashSet<String> = HashSet::new();
227    match entry_analysis {
228        Some(a) => recursive_fns.extend(a.recursive_fns.iter().cloned()),
229        None => {
230            recursive_fns.extend(crate::call_graph::find_recursive_fns(&items));
231        }
232    }
233    for module in &modules {
234        match module.analysis.as_ref() {
235            Some(a) => recursive_fns.extend(a.recursive_fns.iter().cloned()),
236            None => {
237                let mod_items: Vec<TopLevel> = module
238                    .fn_defs
239                    .iter()
240                    .map(|fd| TopLevel::FnDef(fd.clone()))
241                    .collect();
242                recursive_fns.extend(crate::call_graph::find_recursive_fns(&mod_items));
243            }
244        }
245    }
246
247    // Start with checker's fn_sigs (exposed API), then add signatures for
248    // ALL module functions (including private helpers) via SymbolRegistry.
249    // Codegen emits full module implementations, so it needs signatures for
250    // intra-module calls that the checker intentionally omits.
251    let mut fn_sigs = tc_result.fn_sigs.clone();
252    {
253        let pairs: Vec<(String, Vec<TopLevel>)> = modules
254            .iter()
255            .map(|m| {
256                let items: Vec<TopLevel> = m
257                    .fn_defs
258                    .iter()
259                    .map(|fd| TopLevel::FnDef(fd.clone()))
260                    .chain(m.type_defs.iter().map(|td| TopLevel::TypeDef(td.clone())))
261                    .collect();
262                (m.prefix.clone(), items)
263            })
264            .collect();
265        let registry = crate::visibility::SymbolRegistry::from_modules_all(&pairs);
266        for entry in &registry.entries {
267            if fn_sigs.contains_key(&entry.canonical_name) {
268                continue;
269            }
270            if let crate::visibility::SymbolKind::Function {
271                params,
272                return_type,
273                effects,
274                ..
275            } = &entry.kind
276            {
277                let parsed_params: Vec<crate::types::Type> = params
278                    .iter()
279                    .map(|(_, ty_str)| crate::types::parse_type_str(ty_str))
280                    .collect();
281                let ret = crate::types::parse_type_str(return_type);
282                fn_sigs.insert(
283                    entry.canonical_name.clone(),
284                    (parsed_params, ret, effects.clone()),
285                );
286            }
287        }
288    }
289
290    // Detection layer for buffer-build sinks + fusion sites. The
291    // ACTUAL rewrite + synthesis must happen BEFORE the resolver
292    // pass (callers run it via `ir::run_buffer_build_pass` between
293    // TCO and resolver) — the detector matches on `Expr::Ident`
294    // shapes that resolver later rewrites to `Expr::Resolved`. We
295    // rerun detection here against the final items so the resulting
296    // ctx fields reflect what's actually in the AST. With pre-
297    // resolver pass having already run, sinks/sites should be the
298    // same set (sinks are fns, not call sites; fusion sites were
299    // rewritten away so the post-rewrite count is zero in normal flow).
300    let detect_fns: Vec<&FnDef> = fn_defs
301        .iter()
302        .chain(modules.iter().flat_map(|m| m.fn_defs.iter()))
303        .collect();
304    let buffer_build_sinks = crate::ir::compute_buffer_build_sinks(&detect_fns);
305    let buffer_fusion_sites = crate::ir::find_fusion_sites(&detect_fns, &buffer_build_sinks);
306    // The synthesizer already ran in the pre-resolver compile pass
307    // (`ir::run_buffer_build_pass`); the resulting `<fn>__buffered`
308    // variants live in `items` (or in dep `module.fn_defs`) directly,
309    // so we just collect references for the ctx field instead of
310    // re-synthesizing — re-running here would duplicate every fn
311    // and confuse the WASM emitter's fn_indices table.
312    let synthesized_buffered_fns: Vec<FnDef> = fn_defs
313        .iter()
314        .chain(modules.iter().flat_map(|m| m.fn_defs.iter()))
315        .filter(|fd| fd.name.ends_with("__buffered"))
316        .cloned()
317        .collect();
318    // 0.15 Traversal — register signatures for the four buffer-build
319    // internal intrinsics. Without these in fn_sigs, downstream
320    // `infer_aver_type` on `__buf_append(...)` etc. returns None and
321    // `expr_is_heap_ptr` falls through to false — meaning TCO
322    // compaction doesn't retain the buffer pointer across GC, the
323    // buffer object gets relocated by collect_end, and the next
324    // iteration reads through the stale pointer producing
325    // `memory access out of bounds` traps. Buffer parses to
326    // Type::Named("Buffer") which is_heap_type accepts.
327    {
328        let buffer_ty = || crate::types::Type::Named("Buffer".to_string());
329        let str_ty = || crate::types::Type::Str;
330        let int_ty = || crate::types::Type::Int;
331        let intrinsic_sigs: &[(&str, Vec<crate::types::Type>, crate::types::Type)] = &[
332            ("__buf_new", vec![int_ty()], buffer_ty()),
333            ("__buf_append", vec![buffer_ty(), str_ty()], buffer_ty()),
334            (
335                "__buf_append_sep_unless_first",
336                vec![buffer_ty(), str_ty()],
337                buffer_ty(),
338            ),
339            ("__buf_finalize", vec![buffer_ty()], str_ty()),
340        ];
341        for (name, params, ret) in intrinsic_sigs {
342            fn_sigs.insert(name.to_string(), (params.clone(), ret.clone(), vec![]));
343        }
344    }
345
346    // Inject signatures for synthesized variants into fn_sigs so the
347    // WASM emitter's type-section pass produces correct param/return
348    // wasm types (the fallback path emits `all-i64` which breaks
349    // validation when a body calls intrinsics with i32 buffer ptrs).
350    for fd in synthesized_buffered_fns.iter() {
351        if fn_sigs.contains_key(&fd.name) {
352            continue;
353        }
354        let param_types: Vec<crate::types::Type> = fd
355            .params
356            .iter()
357            .map(|(_, ty_str)| crate::types::parse_type_str(ty_str))
358            .collect();
359        let ret = crate::types::parse_type_str(&fd.return_type);
360        fn_sigs.insert(
361            fd.name.clone(),
362            (
363                param_types,
364                ret,
365                fd.effects.iter().map(|e| e.node.clone()).collect(),
366            ),
367        );
368    }
369
370    CodegenContext {
371        items,
372        fn_sigs,
373        memo_fns,
374        memo_safe_types: tc_result.memo_safe_types.clone(),
375        type_defs,
376        fn_defs,
377        project_name,
378        modules,
379        module_prefixes,
380        #[cfg(feature = "runtime")]
381        policy: None,
382        emit_replay_runtime: false,
383        runtime_policy_from_env: false,
384        guest_entry: None,
385        emit_self_host_support: false,
386        extra_fn_defs: Vec::new(),
387        mutual_tco_members,
388        recursive_fns,
389        fn_analyses,
390        buffer_build_sinks,
391        buffer_fusion_sites,
392        synthesized_buffered_fns,
393    }
394}
395
396impl CodegenContext {
397    /// Recompute `mutual_tco_members` and `recursive_fns` from current
398    /// `items` + `modules`. Used by test helpers that build the context
399    /// piecewise (push items in-place, bypass `build_context`) so the
400    /// derived sets stay in sync. Idempotent — production callers go
401    /// through `build_context`, where these are already populated from
402    /// the analyze stage; calling `refresh_facts` again is a no-op for
403    /// them (computes the same answer).
404    pub fn refresh_facts(&mut self) {
405        let entry_fn_refs: Vec<&FnDef> =
406            self.fn_defs.iter().filter(|fd| fd.name != "main").collect();
407
408        let mut mutual_tco_members: HashSet<String> = HashSet::new();
409        for group in crate::call_graph::tailcall_scc_components(&entry_fn_refs) {
410            if group.len() < 2 {
411                continue;
412            }
413            for fd in group {
414                mutual_tco_members.insert(fd.name.clone());
415            }
416        }
417        for module in &self.modules {
418            let mod_fns: Vec<&FnDef> = module.fn_defs.iter().collect();
419            for group in crate::call_graph::tailcall_scc_components(&mod_fns) {
420                if group.len() < 2 {
421                    continue;
422                }
423                for fd in group {
424                    mutual_tco_members.insert(fd.name.clone());
425                }
426            }
427        }
428        self.mutual_tco_members = mutual_tco_members;
429
430        let mut recursive_fns: HashSet<String> = crate::call_graph::find_recursive_fns(&self.items);
431        for module in &self.modules {
432            let mod_items: Vec<TopLevel> = module
433                .fn_defs
434                .iter()
435                .map(|fd| TopLevel::FnDef(fd.clone()))
436                .collect();
437            recursive_fns.extend(crate::call_graph::find_recursive_fns(&mod_items));
438        }
439        self.recursive_fns = recursive_fns;
440    }
441}