aver/codegen/mod.rs
1/// Aver → target language transpilation.
2///
3/// The codegen module transforms a type-checked Aver AST into source code
4/// for a target language. Current backends: Rust deployment and Lean proof export.
5pub(crate) mod builtin_helpers;
6pub(crate) mod builtin_records;
7pub(crate) mod builtins;
8pub mod common;
9#[cfg(feature = "runtime")]
10pub mod dafny;
11#[cfg(feature = "runtime")]
12pub mod lean;
13#[cfg(feature = "runtime")]
14pub mod recursion;
15#[cfg(feature = "runtime")]
16pub mod rust;
17#[cfg(feature = "wasm-compile")]
18pub mod wasm;
19
20use std::collections::{HashMap, HashSet};
21
22use crate::ast::{FnDef, TopLevel, TypeDef};
23use crate::types::checker::TypeCheckResult;
24
25/// Information about a dependent module loaded for codegen.
26pub struct ModuleInfo {
27 /// Qualified module path, e.g. "Models.User".
28 pub prefix: String,
29 /// Direct `depends [...]` entries from the source module.
30 pub depends: Vec<String>,
31 /// Type definitions from the module.
32 pub type_defs: Vec<TypeDef>,
33 /// Function definitions from the module (excluding `main`).
34 pub fn_defs: Vec<FnDef>,
35}
36
37/// Collected context from the Aver program, shared across all backends.
38pub struct CodegenContext {
39 /// All top-level items (post-TCO transform, post-typecheck).
40 pub items: Vec<TopLevel>,
41 /// Function signatures: name → (param_types, return_type, effects).
42 pub fn_sigs: HashMap<String, (Vec<crate::types::Type>, crate::types::Type, Vec<String>)>,
43 /// Functions eligible for auto-memoization.
44 pub memo_fns: HashSet<String>,
45 /// Set of type names whose values are memo-safe.
46 pub memo_safe_types: HashSet<String>,
47 /// User-defined type definitions (for struct/enum generation).
48 pub type_defs: Vec<TypeDef>,
49 /// User-defined function definitions.
50 pub fn_defs: Vec<FnDef>,
51 /// Project/binary name.
52 pub project_name: String,
53 /// Dependent modules loaded for inlining.
54 pub modules: Vec<ModuleInfo>,
55 /// Set of module prefixes for qualified name resolution (e.g. "Models.User").
56 pub module_prefixes: HashSet<String>,
57 /// Embedded runtime policy from `aver.toml` for generated code.
58 #[cfg(feature = "runtime")]
59 pub policy: Option<crate::config::ProjectConfig>,
60 /// Emit generated scoped runtime support (replay and/or runtime-loaded policy).
61 pub emit_replay_runtime: bool,
62 /// Load runtime policy from the active module root instead of embedding it.
63 pub runtime_policy_from_env: bool,
64 /// Explicit guest entry boundary for scoped replay/policy.
65 pub guest_entry: Option<String>,
66 /// Emit extra generated helpers needed only by the cached self-host helper.
67 pub emit_self_host_support: bool,
68 /// Extra fn_defs visible during current module emission (not in `fn_defs` or `modules`).
69 /// Set temporarily by the Rust backend when emitting a dependent module so that
70 /// `find_fn_def_by_name` can resolve same-module calls.
71 pub extra_fn_defs: Vec<FnDef>,
72 /// Functions that are part of a mutual-TCO SCC group (emitted as trampoline + wrappers).
73 /// Functions NOT in this set but with TailCalls are emitted as plain self-TCO loops.
74 pub mutual_tco_members: HashSet<String>,
75 /// Buffer-build sink fns (`List.prepend`/`reverse` builders consumed
76 /// by `String.join`). The Rust backend emits a `<fn>__buffered`
77 /// variant alongside each entry; the WASM backend rewrites bodies
78 /// to call `rt_buffer_*` helpers. Detection lives in `ir::buffer_build`.
79 pub buffer_build_sinks: HashMap<String, crate::ir::BufferBuildShape>,
80 /// Fusion sites detected for `String.join(<sink>(...), sep)` calls.
81 /// Each entry pairs an enclosing fn + line + sink fn name; the
82 /// emitter rewrites these call expressions to use buffered variants
83 /// in place of the producer + consumer chain.
84 pub buffer_fusion_sites: Vec<crate::ir::FusionSite>,
85 /// Synthesized `<fn>__buffered` variants for every buffer-build
86 /// sink, produced by `ir::synthesize_buffered_variants`. These are
87 /// real `FnDef`s with proper body AST; backends iterate over them
88 /// alongside `fn_defs` so they reach codegen through the same
89 /// pipeline (TCO / no-alloc / mutual-recursion all apply
90 /// identically). Empty when no sinks are detected.
91 pub synthesized_buffered_fns: Vec<FnDef>,
92}
93
94/// Output files from a codegen backend.
95pub struct ProjectOutput {
96 /// Files to write: (relative_path, content).
97 pub files: Vec<(String, String)>,
98}
99
100/// Build a CodegenContext from parsed + type-checked items.
101pub fn build_context(
102 items: Vec<TopLevel>,
103 tc_result: &TypeCheckResult,
104 memo_fns: HashSet<String>,
105 project_name: String,
106 modules: Vec<ModuleInfo>,
107) -> CodegenContext {
108 let type_defs: Vec<TypeDef> = items
109 .iter()
110 .filter_map(|item| {
111 if let TopLevel::TypeDef(td) = item {
112 Some(td.clone())
113 } else {
114 None
115 }
116 })
117 .collect();
118
119 let fn_defs: Vec<FnDef> = items
120 .iter()
121 .filter_map(|item| {
122 if let TopLevel::FnDef(fd) = item {
123 Some(fd.clone())
124 } else {
125 None
126 }
127 })
128 .collect();
129
130 let module_prefixes: HashSet<String> = modules.iter().map(|m| m.prefix.clone()).collect();
131
132 // Compute which functions are in mutual-TCO SCC groups (emitted as trampoline + wrappers).
133 let mut mutual_tco_members = HashSet::new();
134 {
135 // Entry module (non-main)
136 let entry_fns: Vec<&FnDef> = fn_defs.iter().filter(|fd| fd.name != "main").collect();
137 for group in crate::call_graph::tailcall_scc_components(&entry_fns) {
138 for fd in &group {
139 mutual_tco_members.insert(fd.name.clone());
140 }
141 }
142 // Dependent modules
143 for module in &modules {
144 let mod_fns: Vec<&FnDef> = module.fn_defs.iter().collect();
145 for group in crate::call_graph::tailcall_scc_components(&mod_fns) {
146 for fd in &group {
147 mutual_tco_members.insert(fd.name.clone());
148 }
149 }
150 }
151 }
152
153 // Start with checker's fn_sigs (exposed API), then add signatures for
154 // ALL module functions (including private helpers) via SymbolRegistry.
155 // Codegen emits full module implementations, so it needs signatures for
156 // intra-module calls that the checker intentionally omits.
157 let mut fn_sigs = tc_result.fn_sigs.clone();
158 {
159 let pairs: Vec<(String, Vec<TopLevel>)> = modules
160 .iter()
161 .map(|m| {
162 let items: Vec<TopLevel> = m
163 .fn_defs
164 .iter()
165 .map(|fd| TopLevel::FnDef(fd.clone()))
166 .chain(m.type_defs.iter().map(|td| TopLevel::TypeDef(td.clone())))
167 .collect();
168 (m.prefix.clone(), items)
169 })
170 .collect();
171 let registry = crate::visibility::SymbolRegistry::from_modules_all(&pairs);
172 for entry in ®istry.entries {
173 if fn_sigs.contains_key(&entry.canonical_name) {
174 continue;
175 }
176 if let crate::visibility::SymbolKind::Function {
177 params,
178 return_type,
179 effects,
180 ..
181 } = &entry.kind
182 {
183 let parsed_params: Vec<crate::types::Type> = params
184 .iter()
185 .map(|(_, ty_str)| crate::types::parse_type_str(ty_str))
186 .collect();
187 let ret = crate::types::parse_type_str(return_type);
188 fn_sigs.insert(
189 entry.canonical_name.clone(),
190 (parsed_params, ret, effects.clone()),
191 );
192 }
193 }
194 }
195
196 // Detection layer for buffer-build sinks + fusion sites. The
197 // ACTUAL rewrite + synthesis must happen BEFORE the resolver
198 // pass (callers run it via `ir::run_buffer_build_pass` between
199 // TCO and resolver) — the detector matches on `Expr::Ident`
200 // shapes that resolver later rewrites to `Expr::Resolved`. We
201 // rerun detection here against the final items so the resulting
202 // ctx fields reflect what's actually in the AST. With pre-
203 // resolver pass having already run, sinks/sites should be the
204 // same set (sinks are fns, not call sites; fusion sites were
205 // rewritten away so the post-rewrite count is zero in normal flow).
206 let detect_fns: Vec<&FnDef> = fn_defs
207 .iter()
208 .chain(modules.iter().flat_map(|m| m.fn_defs.iter()))
209 .collect();
210 let buffer_build_sinks = crate::ir::compute_buffer_build_sinks(&detect_fns);
211 let buffer_fusion_sites = crate::ir::find_fusion_sites(&detect_fns, &buffer_build_sinks);
212 // The synthesizer already ran in the pre-resolver compile pass
213 // (`ir::run_buffer_build_pass`); the resulting `<fn>__buffered`
214 // variants live in `items` (or in dep `module.fn_defs`) directly,
215 // so we just collect references for the ctx field instead of
216 // re-synthesizing — re-running here would duplicate every fn
217 // and confuse the WASM emitter's fn_indices table.
218 let synthesized_buffered_fns: Vec<FnDef> = fn_defs
219 .iter()
220 .chain(modules.iter().flat_map(|m| m.fn_defs.iter()))
221 .filter(|fd| fd.name.ends_with("__buffered"))
222 .cloned()
223 .collect();
224 // 0.15 Traversal — register signatures for the four buffer-build
225 // internal intrinsics. Without these in fn_sigs, downstream
226 // `infer_aver_type` on `__buf_append(...)` etc. returns None and
227 // `expr_is_heap_ptr` falls through to false — meaning TCO
228 // compaction doesn't retain the buffer pointer across GC, the
229 // buffer object gets relocated by collect_end, and the next
230 // iteration reads through the stale pointer producing
231 // `memory access out of bounds` traps. Buffer parses to
232 // Type::Named("Buffer") which is_heap_type accepts.
233 {
234 let buffer_ty = || crate::types::Type::Named("Buffer".to_string());
235 let str_ty = || crate::types::Type::Str;
236 let int_ty = || crate::types::Type::Int;
237 let intrinsic_sigs: &[(&str, Vec<crate::types::Type>, crate::types::Type)] = &[
238 ("__buf_new", vec![int_ty()], buffer_ty()),
239 ("__buf_append", vec![buffer_ty(), str_ty()], buffer_ty()),
240 (
241 "__buf_append_sep_unless_first",
242 vec![buffer_ty(), str_ty()],
243 buffer_ty(),
244 ),
245 ("__buf_finalize", vec![buffer_ty()], str_ty()),
246 ];
247 for (name, params, ret) in intrinsic_sigs {
248 fn_sigs.insert(name.to_string(), (params.clone(), ret.clone(), vec![]));
249 }
250 }
251
252 // Inject signatures for synthesized variants into fn_sigs so the
253 // WASM emitter's type-section pass produces correct param/return
254 // wasm types (the fallback path emits `all-i64` which breaks
255 // validation when a body calls intrinsics with i32 buffer ptrs).
256 for fd in synthesized_buffered_fns.iter() {
257 if fn_sigs.contains_key(&fd.name) {
258 continue;
259 }
260 let param_types: Vec<crate::types::Type> = fd
261 .params
262 .iter()
263 .map(|(_, ty_str)| crate::types::parse_type_str(ty_str))
264 .collect();
265 let ret = crate::types::parse_type_str(&fd.return_type);
266 fn_sigs.insert(
267 fd.name.clone(),
268 (
269 param_types,
270 ret,
271 fd.effects.iter().map(|e| e.node.clone()).collect(),
272 ),
273 );
274 }
275
276 CodegenContext {
277 items,
278 fn_sigs,
279 memo_fns,
280 memo_safe_types: tc_result.memo_safe_types.clone(),
281 type_defs,
282 fn_defs,
283 project_name,
284 modules,
285 module_prefixes,
286 #[cfg(feature = "runtime")]
287 policy: None,
288 emit_replay_runtime: false,
289 runtime_policy_from_env: false,
290 guest_entry: None,
291 emit_self_host_support: false,
292 extra_fn_defs: Vec::new(),
293 mutual_tco_members,
294 buffer_build_sinks,
295 buffer_fusion_sites,
296 synthesized_buffered_fns,
297 }
298}