1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
/// Aver → target language transpilation.
///
/// The codegen module transforms a type-checked Aver AST into source code
/// for a target language. Current backends: Rust deployment and Lean proof export.
pub(crate) mod builtin_helpers;
pub(crate) mod builtin_records;
pub(crate) mod builtins;
pub mod common;
#[cfg(feature = "runtime")]
pub mod dafny;
#[cfg(feature = "runtime")]
pub mod lean;
#[cfg(feature = "runtime")]
pub mod recursion;
#[cfg(feature = "runtime")]
pub mod rust;
#[cfg(feature = "wasm-compile")]
pub mod wasm;
use std::collections::{HashMap, HashSet};
use crate::ast::{FnDef, TopLevel, TypeDef};
use crate::types::checker::TypeCheckResult;
/// Information about a dependent module loaded for codegen.
pub struct ModuleInfo {
/// Qualified module path, e.g. "Models.User".
pub prefix: String,
/// Direct `depends [...]` entries from the source module.
pub depends: Vec<String>,
/// Type definitions from the module.
pub type_defs: Vec<TypeDef>,
/// Function definitions from the module (excluding `main`).
pub fn_defs: Vec<FnDef>,
}
/// Collected context from the Aver program, shared across all backends.
pub struct CodegenContext {
/// All top-level items (post-TCO transform, post-typecheck).
pub items: Vec<TopLevel>,
/// Function signatures: name → (param_types, return_type, effects).
pub fn_sigs: HashMap<String, (Vec<crate::types::Type>, crate::types::Type, Vec<String>)>,
/// Functions eligible for auto-memoization.
pub memo_fns: HashSet<String>,
/// Set of type names whose values are memo-safe.
pub memo_safe_types: HashSet<String>,
/// User-defined type definitions (for struct/enum generation).
pub type_defs: Vec<TypeDef>,
/// User-defined function definitions.
pub fn_defs: Vec<FnDef>,
/// Project/binary name.
pub project_name: String,
/// Dependent modules loaded for inlining.
pub modules: Vec<ModuleInfo>,
/// Set of module prefixes for qualified name resolution (e.g. "Models.User").
pub module_prefixes: HashSet<String>,
/// Embedded runtime policy from `aver.toml` for generated code.
#[cfg(feature = "runtime")]
pub policy: Option<crate::config::ProjectConfig>,
/// Emit generated scoped runtime support (replay and/or runtime-loaded policy).
pub emit_replay_runtime: bool,
/// Load runtime policy from the active module root instead of embedding it.
pub runtime_policy_from_env: bool,
/// Explicit guest entry boundary for scoped replay/policy.
pub guest_entry: Option<String>,
/// Emit extra generated helpers needed only by the cached self-host helper.
pub emit_self_host_support: bool,
/// Extra fn_defs visible during current module emission (not in `fn_defs` or `modules`).
/// Set temporarily by the Rust backend when emitting a dependent module so that
/// `find_fn_def_by_name` can resolve same-module calls.
pub extra_fn_defs: Vec<FnDef>,
/// Functions that are part of a mutual-TCO SCC group (emitted as trampoline + wrappers).
/// Functions NOT in this set but with TailCalls are emitted as plain self-TCO loops.
pub mutual_tco_members: HashSet<String>,
/// Buffer-build sink fns (`List.prepend`/`reverse` builders consumed
/// by `String.join`). The Rust backend emits a `<fn>__buffered`
/// variant alongside each entry; the WASM backend rewrites bodies
/// to call `rt_buffer_*` helpers. Detection lives in `ir::buffer_build`.
pub buffer_build_sinks: HashMap<String, crate::ir::BufferBuildShape>,
/// Fusion sites detected for `String.join(<sink>(...), sep)` calls.
/// Each entry pairs an enclosing fn + line + sink fn name; the
/// emitter rewrites these call expressions to use buffered variants
/// in place of the producer + consumer chain.
pub buffer_fusion_sites: Vec<crate::ir::FusionSite>,
/// Synthesized `<fn>__buffered` variants for every buffer-build
/// sink, produced by `ir::synthesize_buffered_variants`. These are
/// real `FnDef`s with proper body AST; backends iterate over them
/// alongside `fn_defs` so they reach codegen through the same
/// pipeline (TCO / no-alloc / mutual-recursion all apply
/// identically). Empty when no sinks are detected.
pub synthesized_buffered_fns: Vec<FnDef>,
}
/// Output files from a codegen backend.
pub struct ProjectOutput {
/// Files to write: (relative_path, content).
pub files: Vec<(String, String)>,
}
/// Build a CodegenContext from parsed + type-checked items.
pub fn build_context(
items: Vec<TopLevel>,
tc_result: &TypeCheckResult,
memo_fns: HashSet<String>,
project_name: String,
modules: Vec<ModuleInfo>,
) -> CodegenContext {
let type_defs: Vec<TypeDef> = items
.iter()
.filter_map(|item| {
if let TopLevel::TypeDef(td) = item {
Some(td.clone())
} else {
None
}
})
.collect();
let fn_defs: Vec<FnDef> = items
.iter()
.filter_map(|item| {
if let TopLevel::FnDef(fd) = item {
Some(fd.clone())
} else {
None
}
})
.collect();
let module_prefixes: HashSet<String> = modules.iter().map(|m| m.prefix.clone()).collect();
// Compute which functions are in mutual-TCO SCC groups (emitted as trampoline + wrappers).
let mut mutual_tco_members = HashSet::new();
{
// Entry module (non-main)
let entry_fns: Vec<&FnDef> = fn_defs.iter().filter(|fd| fd.name != "main").collect();
for group in crate::call_graph::tailcall_scc_components(&entry_fns) {
for fd in &group {
mutual_tco_members.insert(fd.name.clone());
}
}
// Dependent modules
for module in &modules {
let mod_fns: Vec<&FnDef> = module.fn_defs.iter().collect();
for group in crate::call_graph::tailcall_scc_components(&mod_fns) {
for fd in &group {
mutual_tco_members.insert(fd.name.clone());
}
}
}
}
// Start with checker's fn_sigs (exposed API), then add signatures for
// ALL module functions (including private helpers) via SymbolRegistry.
// Codegen emits full module implementations, so it needs signatures for
// intra-module calls that the checker intentionally omits.
let mut fn_sigs = tc_result.fn_sigs.clone();
{
let pairs: Vec<(String, Vec<TopLevel>)> = modules
.iter()
.map(|m| {
let items: Vec<TopLevel> = m
.fn_defs
.iter()
.map(|fd| TopLevel::FnDef(fd.clone()))
.chain(m.type_defs.iter().map(|td| TopLevel::TypeDef(td.clone())))
.collect();
(m.prefix.clone(), items)
})
.collect();
let registry = crate::visibility::SymbolRegistry::from_modules_all(&pairs);
for entry in ®istry.entries {
if fn_sigs.contains_key(&entry.canonical_name) {
continue;
}
if let crate::visibility::SymbolKind::Function {
params,
return_type,
effects,
..
} = &entry.kind
{
let parsed_params: Vec<crate::types::Type> = params
.iter()
.map(|(_, ty_str)| crate::types::parse_type_str(ty_str))
.collect();
let ret = crate::types::parse_type_str(return_type);
fn_sigs.insert(
entry.canonical_name.clone(),
(parsed_params, ret, effects.clone()),
);
}
}
}
// Detection layer for buffer-build sinks + fusion sites. The
// ACTUAL rewrite + synthesis must happen BEFORE the resolver
// pass (callers run it via `ir::run_buffer_build_pass` between
// TCO and resolver) — the detector matches on `Expr::Ident`
// shapes that resolver later rewrites to `Expr::Resolved`. We
// rerun detection here against the final items so the resulting
// ctx fields reflect what's actually in the AST. With pre-
// resolver pass having already run, sinks/sites should be the
// same set (sinks are fns, not call sites; fusion sites were
// rewritten away so the post-rewrite count is zero in normal flow).
let detect_fns: Vec<&FnDef> = fn_defs
.iter()
.chain(modules.iter().flat_map(|m| m.fn_defs.iter()))
.collect();
let buffer_build_sinks = crate::ir::compute_buffer_build_sinks(&detect_fns);
let buffer_fusion_sites = crate::ir::find_fusion_sites(&detect_fns, &buffer_build_sinks);
// The synthesizer already ran in the pre-resolver compile pass
// (`ir::run_buffer_build_pass`); the resulting `<fn>__buffered`
// variants live in `items` (or in dep `module.fn_defs`) directly,
// so we just collect references for the ctx field instead of
// re-synthesizing — re-running here would duplicate every fn
// and confuse the WASM emitter's fn_indices table.
let synthesized_buffered_fns: Vec<FnDef> = fn_defs
.iter()
.chain(modules.iter().flat_map(|m| m.fn_defs.iter()))
.filter(|fd| fd.name.ends_with("__buffered"))
.cloned()
.collect();
// 0.15 Traversal — register signatures for the four buffer-build
// internal intrinsics. Without these in fn_sigs, downstream
// `infer_aver_type` on `__buf_append(...)` etc. returns None and
// `expr_is_heap_ptr` falls through to false — meaning TCO
// compaction doesn't retain the buffer pointer across GC, the
// buffer object gets relocated by collect_end, and the next
// iteration reads through the stale pointer producing
// `memory access out of bounds` traps. Buffer parses to
// Type::Named("Buffer") which is_heap_type accepts.
{
let buffer_ty = || crate::types::Type::Named("Buffer".to_string());
let str_ty = || crate::types::Type::Str;
let int_ty = || crate::types::Type::Int;
let intrinsic_sigs: &[(&str, Vec<crate::types::Type>, crate::types::Type)] = &[
("__buf_new", vec![int_ty()], buffer_ty()),
("__buf_append", vec![buffer_ty(), str_ty()], buffer_ty()),
(
"__buf_append_sep_unless_first",
vec![buffer_ty(), str_ty()],
buffer_ty(),
),
("__buf_finalize", vec![buffer_ty()], str_ty()),
];
for (name, params, ret) in intrinsic_sigs {
fn_sigs.insert(name.to_string(), (params.clone(), ret.clone(), vec![]));
}
}
// Inject signatures for synthesized variants into fn_sigs so the
// WASM emitter's type-section pass produces correct param/return
// wasm types (the fallback path emits `all-i64` which breaks
// validation when a body calls intrinsics with i32 buffer ptrs).
for fd in synthesized_buffered_fns.iter() {
if fn_sigs.contains_key(&fd.name) {
continue;
}
let param_types: Vec<crate::types::Type> = fd
.params
.iter()
.map(|(_, ty_str)| crate::types::parse_type_str(ty_str))
.collect();
let ret = crate::types::parse_type_str(&fd.return_type);
fn_sigs.insert(
fd.name.clone(),
(
param_types,
ret,
fd.effects.iter().map(|e| e.node.clone()).collect(),
),
);
}
CodegenContext {
items,
fn_sigs,
memo_fns,
memo_safe_types: tc_result.memo_safe_types.clone(),
type_defs,
fn_defs,
project_name,
modules,
module_prefixes,
#[cfg(feature = "runtime")]
policy: None,
emit_replay_runtime: false,
runtime_policy_from_env: false,
guest_entry: None,
emit_self_host_support: false,
extra_fn_defs: Vec::new(),
mutual_tco_members,
buffer_build_sinks,
buffer_fusion_sites,
synthesized_buffered_fns,
}
}