aver/codegen/mod.rs
1/// Aver → target language transpilation.
2///
3/// The codegen module transforms a type-checked Aver AST into source code
4/// for a target language. Current backends: Rust deployment and Lean proof export.
5pub(crate) mod builtin_helpers;
6pub(crate) mod builtin_records;
7pub(crate) mod builtins;
8pub mod common;
9#[cfg(feature = "runtime")]
10pub mod dafny;
11#[cfg(feature = "runtime")]
12pub mod lean;
13#[cfg(feature = "runtime")]
14pub mod recursion;
15#[cfg(feature = "runtime")]
16pub mod rust;
17#[cfg(feature = "wasm-compile")]
18pub mod wasm;
19
20use std::collections::{HashMap, HashSet};
21
22use crate::ast::{FnDef, TopLevel, TypeDef};
23use crate::types::checker::TypeCheckResult;
24
25/// Information about a dependent module loaded for codegen.
26pub struct ModuleInfo {
27 /// Qualified module path, e.g. "Models.User".
28 pub prefix: String,
29 /// Direct `depends [...]` entries from the source module.
30 pub depends: Vec<String>,
31 /// Type definitions from the module.
32 pub type_defs: Vec<TypeDef>,
33 /// Function definitions from the module (excluding `main`).
34 pub fn_defs: Vec<FnDef>,
35 /// IR-level analysis facts produced by the dep module's pipeline run
36 /// (`analyze` stage). `None` for modules loaded via paths that skip
37 /// the analyze stage (none in production today; left optional for
38 /// future ad-hoc loaders). Aver's module DAG invariant makes per-module
39 /// analysis sufficient — see `project_aver_module_dag` memory and
40 /// `src/ir/analyze.rs` for why cross-module SCCs are impossible.
41 pub analysis: Option<crate::ir::AnalysisResult>,
42}
43
44/// Collected context from the Aver program, shared across all backends.
45pub struct CodegenContext {
46 /// All top-level items (post-TCO transform, post-typecheck).
47 pub items: Vec<TopLevel>,
48 /// Function signatures: name → (param_types, return_type, effects).
49 pub fn_sigs: HashMap<String, (Vec<crate::types::Type>, crate::types::Type, Vec<String>)>,
50 /// Functions eligible for auto-memoization.
51 pub memo_fns: HashSet<String>,
52 /// Set of type names whose values are memo-safe.
53 pub memo_safe_types: HashSet<String>,
54 /// User-defined type definitions (for struct/enum generation).
55 pub type_defs: Vec<TypeDef>,
56 /// User-defined function definitions.
57 pub fn_defs: Vec<FnDef>,
58 /// Project/binary name.
59 pub project_name: String,
60 /// Dependent modules loaded for inlining.
61 pub modules: Vec<ModuleInfo>,
62 /// Set of module prefixes for qualified name resolution (e.g. "Models.User").
63 pub module_prefixes: HashSet<String>,
64 /// Embedded runtime policy from `aver.toml` for generated code.
65 #[cfg(feature = "runtime")]
66 pub policy: Option<crate::config::ProjectConfig>,
67 /// Emit generated scoped runtime support (replay and/or runtime-loaded policy).
68 pub emit_replay_runtime: bool,
69 /// Load runtime policy from the active module root instead of embedding it.
70 pub runtime_policy_from_env: bool,
71 /// Explicit guest entry boundary for scoped replay/policy.
72 pub guest_entry: Option<String>,
73 /// Emit extra generated helpers needed only by the cached self-host helper.
74 pub emit_self_host_support: bool,
75 /// Extra fn_defs visible during current module emission (not in `fn_defs` or `modules`).
76 /// Set temporarily by the Rust backend when emitting a dependent module so that
77 /// `find_fn_def_by_name` can resolve same-module calls.
78 pub extra_fn_defs: Vec<FnDef>,
79 /// Functions that are part of a mutual-TCO SCC group (emitted as trampoline + wrappers).
80 /// Functions NOT in this set but with TailCalls are emitted as plain self-TCO loops.
81 pub mutual_tco_members: HashSet<String>,
82 /// Functions that call themselves directly or transitively. Set-form
83 /// union of `entry_analysis.recursive_fns` plus each module's
84 /// `analysis.recursive_fns`. Used by codegen sites that previously
85 /// called `call_graph::find_recursive_fns` ad-hoc (Lean recursion
86 /// planning, type checker flow, etc.).
87 pub recursive_fns: HashSet<String>,
88 /// Per-fn analysis facts unioned from entry + every dep module's
89 /// `AnalysisResult.fn_analyses`. WASM emitter / VM compiler /
90 /// future inliner read `allocates`, `thin_kind`, `body_shape`,
91 /// `local_count`, etc. from here instead of recomputing.
92 pub fn_analyses: HashMap<String, crate::ir::FnAnalysis>,
93 /// Buffer-build sink fns (`List.prepend`/`reverse` builders consumed
94 /// by `String.join`). The Rust backend emits a `<fn>__buffered`
95 /// variant alongside each entry; the WASM backend rewrites bodies
96 /// to call `rt_buffer_*` helpers. Detection lives in `ir::buffer_build`.
97 pub buffer_build_sinks: HashMap<String, crate::ir::BufferBuildShape>,
98 /// Fusion sites detected for `String.join(<sink>(...), sep)` calls.
99 /// Each entry pairs an enclosing fn + line + sink fn name; the
100 /// emitter rewrites these call expressions to use buffered variants
101 /// in place of the producer + consumer chain.
102 pub buffer_fusion_sites: Vec<crate::ir::FusionSite>,
103 /// Synthesized `<fn>__buffered` variants for every buffer-build
104 /// sink, produced by `ir::synthesize_buffered_variants`. These are
105 /// real `FnDef`s with proper body AST; backends iterate over them
106 /// alongside `fn_defs` so they reach codegen through the same
107 /// pipeline (TCO / no-alloc / mutual-recursion all apply
108 /// identically). Empty when no sinks are detected.
109 pub synthesized_buffered_fns: Vec<FnDef>,
110}
111
112/// Output files from a codegen backend.
113pub struct ProjectOutput {
114 /// Files to write: (relative_path, content).
115 pub files: Vec<(String, String)>,
116}
117
118/// Build a CodegenContext from parsed + type-checked items.
119///
120/// `entry_analysis` is the `analyze` stage output for `items` (entry
121/// module). When provided, codegen reads `mutual_tco_members`,
122/// `recursive_fns`, and per-fn `FnAnalysis` from it instead of recomputing.
123/// Each `ModuleInfo` in `modules` carries its own per-module analysis;
124/// codegen unions the per-module sets to build a global view (sound
125/// under Aver's module DAG invariant — no cross-module SCCs possible,
126/// see `src/ir/analyze.rs` doc).
127pub fn build_context(
128 items: Vec<TopLevel>,
129 tc_result: &TypeCheckResult,
130 entry_analysis: Option<&crate::ir::AnalysisResult>,
131 memo_fns: HashSet<String>,
132 project_name: String,
133 modules: Vec<ModuleInfo>,
134) -> CodegenContext {
135 let type_defs: Vec<TypeDef> = items
136 .iter()
137 .filter_map(|item| {
138 if let TopLevel::TypeDef(td) = item {
139 Some(td.clone())
140 } else {
141 None
142 }
143 })
144 .collect();
145
146 let fn_defs: Vec<FnDef> = items
147 .iter()
148 .filter_map(|item| {
149 if let TopLevel::FnDef(fd) = item {
150 Some(fd.clone())
151 } else {
152 None
153 }
154 })
155 .collect();
156
157 let module_prefixes: HashSet<String> = modules.iter().map(|m| m.prefix.clone()).collect();
158
159 // Mutual-TCO membership unions per-module sets from the analyze stage
160 // (entry's `entry_analysis` + each dep module's `module.analysis`).
161 // Aver's module DAG invariant guarantees SCCs never span modules, so
162 // a per-module union is the correct global view — see
163 // `project_aver_module_dag` memory and `src/ir/analyze.rs` doc.
164 //
165 // Falls back to ad-hoc `tailcall_scc_components` per module when the
166 // analysis isn't supplied (callers that haven't migrated to the
167 // pipeline). The fallback path will go away once every entry point
168 // runs the canonical pipeline.
169 let mut mutual_tco_members: HashSet<String> = HashSet::new();
170 match entry_analysis {
171 Some(a) => mutual_tco_members.extend(a.mutual_tco_members.iter().cloned()),
172 None => {
173 let entry_fns: Vec<&FnDef> = fn_defs.iter().filter(|fd| fd.name != "main").collect();
174 for group in crate::call_graph::tailcall_scc_components(&entry_fns) {
175 if group.len() < 2 {
176 continue;
177 }
178 for fd in group {
179 mutual_tco_members.insert(fd.name.clone());
180 }
181 }
182 }
183 }
184 for module in &modules {
185 match module.analysis.as_ref() {
186 Some(a) => mutual_tco_members.extend(a.mutual_tco_members.iter().cloned()),
187 None => {
188 let mod_fns: Vec<&FnDef> = module.fn_defs.iter().collect();
189 for group in crate::call_graph::tailcall_scc_components(&mod_fns) {
190 if group.len() < 2 {
191 continue;
192 }
193 for fd in group {
194 mutual_tco_members.insert(fd.name.clone());
195 }
196 }
197 }
198 }
199 }
200
201 // Per-fn analysis dictionary — union of entry's `fn_analyses` plus
202 // each dep module's. Codegen reads `allocates`, `thin_kind`, etc.
203 // from here instead of recomputing.
204 let mut fn_analyses: HashMap<String, crate::ir::FnAnalysis> = HashMap::new();
205 if let Some(a) = entry_analysis {
206 for (name, fa) in &a.fn_analyses {
207 fn_analyses.insert(name.clone(), fa.clone());
208 }
209 }
210 for module in &modules {
211 if let Some(a) = module.analysis.as_ref() {
212 for (name, fa) in &a.fn_analyses {
213 fn_analyses
214 .entry(name.clone())
215 .or_insert_with(|| fa.clone());
216 }
217 }
218 }
219
220 // `recursive_fns` follows the same shape as `mutual_tco_members` —
221 // per-module sets unioned (Aver's module DAG keeps cross-module
222 // recursion from existing). Falls back to ad-hoc `find_recursive_fns`
223 // when a module's analysis is missing.
224 let mut recursive_fns: HashSet<String> = HashSet::new();
225 match entry_analysis {
226 Some(a) => recursive_fns.extend(a.recursive_fns.iter().cloned()),
227 None => {
228 recursive_fns.extend(crate::call_graph::find_recursive_fns(&items));
229 }
230 }
231 for module in &modules {
232 match module.analysis.as_ref() {
233 Some(a) => recursive_fns.extend(a.recursive_fns.iter().cloned()),
234 None => {
235 let mod_items: Vec<TopLevel> = module
236 .fn_defs
237 .iter()
238 .map(|fd| TopLevel::FnDef(fd.clone()))
239 .collect();
240 recursive_fns.extend(crate::call_graph::find_recursive_fns(&mod_items));
241 }
242 }
243 }
244
245 // Start with checker's fn_sigs (exposed API), then add signatures for
246 // ALL module functions (including private helpers) via SymbolRegistry.
247 // Codegen emits full module implementations, so it needs signatures for
248 // intra-module calls that the checker intentionally omits.
249 let mut fn_sigs = tc_result.fn_sigs.clone();
250 {
251 let pairs: Vec<(String, Vec<TopLevel>)> = modules
252 .iter()
253 .map(|m| {
254 let items: Vec<TopLevel> = m
255 .fn_defs
256 .iter()
257 .map(|fd| TopLevel::FnDef(fd.clone()))
258 .chain(m.type_defs.iter().map(|td| TopLevel::TypeDef(td.clone())))
259 .collect();
260 (m.prefix.clone(), items)
261 })
262 .collect();
263 let registry = crate::visibility::SymbolRegistry::from_modules_all(&pairs);
264 for entry in ®istry.entries {
265 if fn_sigs.contains_key(&entry.canonical_name) {
266 continue;
267 }
268 if let crate::visibility::SymbolKind::Function {
269 params,
270 return_type,
271 effects,
272 ..
273 } = &entry.kind
274 {
275 let parsed_params: Vec<crate::types::Type> = params
276 .iter()
277 .map(|(_, ty_str)| crate::types::parse_type_str(ty_str))
278 .collect();
279 let ret = crate::types::parse_type_str(return_type);
280 fn_sigs.insert(
281 entry.canonical_name.clone(),
282 (parsed_params, ret, effects.clone()),
283 );
284 }
285 }
286 }
287
288 // Detection layer for buffer-build sinks + fusion sites. The
289 // ACTUAL rewrite + synthesis must happen BEFORE the resolver
290 // pass (callers run it via `ir::run_buffer_build_pass` between
291 // TCO and resolver) — the detector matches on `Expr::Ident`
292 // shapes that resolver later rewrites to `Expr::Resolved`. We
293 // rerun detection here against the final items so the resulting
294 // ctx fields reflect what's actually in the AST. With pre-
295 // resolver pass having already run, sinks/sites should be the
296 // same set (sinks are fns, not call sites; fusion sites were
297 // rewritten away so the post-rewrite count is zero in normal flow).
298 let detect_fns: Vec<&FnDef> = fn_defs
299 .iter()
300 .chain(modules.iter().flat_map(|m| m.fn_defs.iter()))
301 .collect();
302 let buffer_build_sinks = crate::ir::compute_buffer_build_sinks(&detect_fns);
303 let buffer_fusion_sites = crate::ir::find_fusion_sites(&detect_fns, &buffer_build_sinks);
304 // The synthesizer already ran in the pre-resolver compile pass
305 // (`ir::run_buffer_build_pass`); the resulting `<fn>__buffered`
306 // variants live in `items` (or in dep `module.fn_defs`) directly,
307 // so we just collect references for the ctx field instead of
308 // re-synthesizing — re-running here would duplicate every fn
309 // and confuse the WASM emitter's fn_indices table.
310 let synthesized_buffered_fns: Vec<FnDef> = fn_defs
311 .iter()
312 .chain(modules.iter().flat_map(|m| m.fn_defs.iter()))
313 .filter(|fd| fd.name.ends_with("__buffered"))
314 .cloned()
315 .collect();
316 // 0.15 Traversal — register signatures for the four buffer-build
317 // internal intrinsics. Without these in fn_sigs, downstream
318 // `infer_aver_type` on `__buf_append(...)` etc. returns None and
319 // `expr_is_heap_ptr` falls through to false — meaning TCO
320 // compaction doesn't retain the buffer pointer across GC, the
321 // buffer object gets relocated by collect_end, and the next
322 // iteration reads through the stale pointer producing
323 // `memory access out of bounds` traps. Buffer parses to
324 // Type::Named("Buffer") which is_heap_type accepts.
325 {
326 let buffer_ty = || crate::types::Type::Named("Buffer".to_string());
327 let str_ty = || crate::types::Type::Str;
328 let int_ty = || crate::types::Type::Int;
329 let intrinsic_sigs: &[(&str, Vec<crate::types::Type>, crate::types::Type)] = &[
330 ("__buf_new", vec![int_ty()], buffer_ty()),
331 ("__buf_append", vec![buffer_ty(), str_ty()], buffer_ty()),
332 (
333 "__buf_append_sep_unless_first",
334 vec![buffer_ty(), str_ty()],
335 buffer_ty(),
336 ),
337 ("__buf_finalize", vec![buffer_ty()], str_ty()),
338 ];
339 for (name, params, ret) in intrinsic_sigs {
340 fn_sigs.insert(name.to_string(), (params.clone(), ret.clone(), vec![]));
341 }
342 }
343
344 // Inject signatures for synthesized variants into fn_sigs so the
345 // WASM emitter's type-section pass produces correct param/return
346 // wasm types (the fallback path emits `all-i64` which breaks
347 // validation when a body calls intrinsics with i32 buffer ptrs).
348 for fd in synthesized_buffered_fns.iter() {
349 if fn_sigs.contains_key(&fd.name) {
350 continue;
351 }
352 let param_types: Vec<crate::types::Type> = fd
353 .params
354 .iter()
355 .map(|(_, ty_str)| crate::types::parse_type_str(ty_str))
356 .collect();
357 let ret = crate::types::parse_type_str(&fd.return_type);
358 fn_sigs.insert(
359 fd.name.clone(),
360 (
361 param_types,
362 ret,
363 fd.effects.iter().map(|e| e.node.clone()).collect(),
364 ),
365 );
366 }
367
368 CodegenContext {
369 items,
370 fn_sigs,
371 memo_fns,
372 memo_safe_types: tc_result.memo_safe_types.clone(),
373 type_defs,
374 fn_defs,
375 project_name,
376 modules,
377 module_prefixes,
378 #[cfg(feature = "runtime")]
379 policy: None,
380 emit_replay_runtime: false,
381 runtime_policy_from_env: false,
382 guest_entry: None,
383 emit_self_host_support: false,
384 extra_fn_defs: Vec::new(),
385 mutual_tco_members,
386 recursive_fns,
387 fn_analyses,
388 buffer_build_sinks,
389 buffer_fusion_sites,
390 synthesized_buffered_fns,
391 }
392}
393
394impl CodegenContext {
395 /// Recompute `mutual_tco_members` and `recursive_fns` from current
396 /// `items` + `modules`. Used by test helpers that build the context
397 /// piecewise (push items in-place, bypass `build_context`) so the
398 /// derived sets stay in sync. Idempotent — production callers go
399 /// through `build_context`, where these are already populated from
400 /// the analyze stage; calling `refresh_facts` again is a no-op for
401 /// them (computes the same answer).
402 pub fn refresh_facts(&mut self) {
403 let entry_fn_refs: Vec<&FnDef> =
404 self.fn_defs.iter().filter(|fd| fd.name != "main").collect();
405
406 let mut mutual_tco_members: HashSet<String> = HashSet::new();
407 for group in crate::call_graph::tailcall_scc_components(&entry_fn_refs) {
408 if group.len() < 2 {
409 continue;
410 }
411 for fd in group {
412 mutual_tco_members.insert(fd.name.clone());
413 }
414 }
415 for module in &self.modules {
416 let mod_fns: Vec<&FnDef> = module.fn_defs.iter().collect();
417 for group in crate::call_graph::tailcall_scc_components(&mod_fns) {
418 if group.len() < 2 {
419 continue;
420 }
421 for fd in group {
422 mutual_tco_members.insert(fd.name.clone());
423 }
424 }
425 }
426 self.mutual_tco_members = mutual_tco_members;
427
428 let mut recursive_fns: HashSet<String> = crate::call_graph::find_recursive_fns(&self.items);
429 for module in &self.modules {
430 let mod_items: Vec<TopLevel> = module
431 .fn_defs
432 .iter()
433 .map(|fd| TopLevel::FnDef(fd.clone()))
434 .collect();
435 recursive_fns.extend(crate::call_graph::find_recursive_fns(&mod_items));
436 }
437 self.recursive_fns = recursive_fns;
438 }
439}