aver/ir/analyze.rs
1//! IR-level analysis pass — derives policy-independent (body classification,
2//! locals count) and policy-parametrized (alloc info) facts about each
3//! `FnDef`. Runs as the last stage of the canonical pipeline so consumers
4//! (codegen, dump, future inliner) can read derived metadata from one
5//! place instead of recomputing it.
6//!
7//! Why a unified analysis stage instead of ad-hoc calls scattered through
8//! VM compilation and WASM emission: every backend that wanted "is this
9//! fn no-alloc" or "what's its body shape" had its own call into
10//! `compute_alloc_info` / `classify_thin_fn_def`. Same input, same
11//! computation, repeated. Centralising means: one walk per program, one
12//! place to extend with new analyses, and `aver compile --emit-ir` /
13//! `--explain-passes` see the same numbers the codegen does.
14//!
15//! Backend-specific bits stay backend-specific: `AllocPolicy` is provided
16//! by the caller (`VmAllocPolicy`, `WasmAllocPolicy`, or the dump's
17//! conservative `DumpAllocPolicy`). Policy-independent facts (`body_shape`,
18//! `thin_kind`, `local_count`) are computed unconditionally.
19//!
20//! ## Scope: per-module by design (Aver module DAG invariant)
21//!
22//! `analyze` runs on one module at a time — the entry items, or a single
23//! dep module loaded from disk. It does NOT cross module boundaries to
24//! discover, say, a mutual-TCO SCC that spans two modules. This is correct
25//! by Aver's module-graph invariant, not a limitation:
26//!
27//! - `module M depends [A, B, ...]` — depends is a static list of names,
28//! no dynamic resolution.
29//! - Module loading (`load_module_recursive`) enforces a DAG via the
30//! `loaded: HashSet` cycle guard — a re-entered module name returns
31//! early, so the dep graph is acyclic by construction.
32//! - In a DAG, all calls flow from dependents to dependencies: module A
33//! calls into module B if A depends on B (transitively); B never calls
34//! back into A. Mutually-recursive call chains require a cycle in the
35//! call graph; a cycle requires either (i) a self-call within one
36//! module, or (ii) a cycle in the module DAG — and the latter is
37//! forbidden.
38//!
39//! Consequence: every SCC of mutually-recursive functions lives entirely
40//! within a single module. Per-module analysis covers every real case;
41//! "cross-module mutual TCO" is mathematically impossible in Aver.
42//! Backends that want a global `mutual_tco_members` set just union the
43//! per-module sets they get from each module's `AnalysisResult`.
44//!
45//! Same logic for `recursive_fns`, `recursive_call_count`, and the
46//! tail-call SCC computation that drives trampoline emission. None of
47//! these need to see cross-module data.
48
49use std::collections::{HashMap, HashSet};
50
51use crate::ast::{FnBody, FnDef, TopLevel};
52use crate::call_graph::{find_recursive_fns, tailcall_scc_components};
53use crate::ir::{
54 AllocPolicy, BodyExprPlan, BodyPlan, CallLowerCtx, ThinKind, classify_thin_fn_def,
55};
56
57/// Backend-neutral allocation policy useful for diagnostic dumps. Mirrors
58/// the VM/WASM "pure non-alloc builtins" whitelist exactly so the
59/// `[no_alloc]` annotation reads the same on both runtime backends.
60/// Constructors with payloads are treated as allocating; nullary
61/// constructors (`Option.None`, `Result.Ok` with primitive seed) are not.
62///
63/// Codegen pipelines should use the backend-specific policy
64/// (`VmAllocPolicy`, `WasmAllocPolicy`); this type is for `--emit-ir`,
65/// `aver bench`, and other tools that want a sensible answer without
66/// committing to a target.
67pub struct NeutralAllocPolicy;
68
69impl AllocPolicy for NeutralAllocPolicy {
70 fn builtin_allocates(&self, name: &str) -> bool {
71 !matches!(
72 name,
73 "Int.abs"
74 | "Int.min"
75 | "Int.max"
76 | "Float.fromInt"
77 | "Float.abs"
78 | "Float.floor"
79 | "Float.ceil"
80 | "Float.round"
81 | "Float.min"
82 | "Float.max"
83 | "Float.sin"
84 | "Float.cos"
85 | "Float.sqrt"
86 | "Float.pow"
87 | "Float.atan2"
88 | "Float.pi"
89 | "Char.toCode"
90 | "String.len"
91 | "String.byteLength"
92 | "String.startsWith"
93 | "String.endsWith"
94 | "String.contains"
95 | "List.len"
96 | "List.contains"
97 | "Vector.len"
98 | "Map.size"
99 | "Map.contains"
100 | "Set.size"
101 | "Set.contains"
102 | "Bool.and"
103 | "Bool.or"
104 | "Bool.not"
105 )
106 }
107 fn constructor_allocates(&self, _name: &str, has_payload: bool) -> bool {
108 has_payload
109 }
110}
111
112#[derive(Debug, Clone, Default)]
113pub struct AnalysisResult {
114 pub fn_analyses: HashMap<String, FnAnalysis>,
115 /// Names of fns that participate in a multi-member tail-call SCC
116 /// (mutual recursion). Backends emit a trampoline + thin wrappers
117 /// for each member; singleton self-recursive fns go through plain
118 /// TCO and aren't in this set.
119 pub mutual_tco_members: HashSet<String>,
120 /// Names of fns that call themselves directly or transitively.
121 /// Used by the type checker's flow analysis, the proof exporters,
122 /// and the memo-eligibility check.
123 pub recursive_fns: HashSet<String>,
124}
125
126#[derive(Debug, Clone)]
127pub struct FnAnalysis {
128 /// `Some(true)` = proven to allocate under the supplied `AllocPolicy`;
129 /// `Some(false)` = proven not to; `None` = no policy was configured.
130 pub allocates: Option<bool>,
131 pub thin_kind: Option<ThinKind>,
132 pub body_shape: BodyShape,
133 pub local_count: Option<u16>,
134 /// `true` if this fn participates in a mutual-TCO SCC (not a singleton
135 /// self-recursive fn — those are plain TCO and don't need trampoline
136 /// codegen). Backends use this to gate trampoline emission.
137 pub mutual_tco_member: bool,
138 /// `true` if the fn calls itself directly or transitively. Used by
139 /// the type checker's flow analysis, the proof exporters, and the
140 /// memo-eligibility check.
141 pub recursive: bool,
142 /// Number of recursive call sites in the fn body. `0` for non-recursive
143 /// fns. Memo eligibility requires `>= 2` (single-call recursion is
144 /// linear and the memo overhead would dominate).
145 pub recursive_call_count: usize,
146}
147
148#[derive(Debug, Clone, Copy, PartialEq, Eq)]
149pub enum BodyShape {
150 /// `BodyPlan::SingleExpr` whose head is a leaf op (literal, ident,
151 /// resolved local, etc.) — the fastest possible body shape.
152 LeafExpr,
153 /// `BodyPlan::SingleExpr` of any other kind (call, match, ctor, …).
154 SingleExpr,
155 /// `BodyPlan::Block { stmts: N, .. }` — a multi-stmt block (let
156 /// bindings + tail expr).
157 Block(usize),
158 /// Body didn't match any of the classifier's recognised shapes;
159 /// usually means the body is a `match` or some other top-level
160 /// expression the thin-body classifier doesn't model.
161 Unclassified(usize),
162}
163
164/// Run the analysis on all `TopLevel::FnDef` items in `items`. The
165/// `alloc_policy` is optional — when `None`, `FnAnalysis::allocates`
166/// stays `None` for every fn (every other field is still computed).
167pub fn analyze(
168 items: &[TopLevel],
169 alloc_policy: Option<&dyn AllocPolicy>,
170 ctx: &impl CallLowerCtx,
171) -> AnalysisResult {
172 let fn_defs: Vec<&FnDef> = items
173 .iter()
174 .filter_map(|i| match i {
175 TopLevel::FnDef(fd) => Some(fd),
176 _ => None,
177 })
178 .collect();
179
180 let alloc_info = alloc_policy.map(|policy| {
181 // The trait object can't be passed straight to the generic
182 // `compute_alloc_info<P: AllocPolicy>`, so wrap it in a
183 // monomorphic adapter that re-dispatches through `dyn`.
184 struct PolicyRef<'a>(&'a dyn AllocPolicy);
185 impl AllocPolicy for PolicyRef<'_> {
186 fn builtin_allocates(&self, name: &str) -> bool {
187 self.0.builtin_allocates(name)
188 }
189 fn constructor_allocates(&self, name: &str, has_payload: bool) -> bool {
190 self.0.constructor_allocates(name, has_payload)
191 }
192 }
193 crate::ir::compute_alloc_info(&fn_defs, &PolicyRef(policy))
194 });
195
196 // Mutual-TCO membership — split out from `find_tco_groups` because
197 // backends want to know "is this fn in a multi-member SCC" specifically
198 // (singletons go through plain TCO and don't need trampoline codegen).
199 // Identical computation to what every codegen backend (`codegen::mod`,
200 // `codegen::rust::mod`) was running on its own; centralising means
201 // they read from `FnAnalysis` instead.
202 let entry_fns_for_scc: Vec<&FnDef> = fn_defs
203 .iter()
204 .filter(|fd| fd.name != "main")
205 .copied()
206 .collect();
207 let mut mutual_tco_set: HashSet<String> = HashSet::new();
208 for group in tailcall_scc_components(&entry_fns_for_scc) {
209 if group.len() < 2 {
210 continue; // singleton self-recursive fn — handled by plain TCO
211 }
212 for fd in group {
213 mutual_tco_set.insert(fd.name.clone());
214 }
215 }
216
217 let recursive_set = find_recursive_fns(items);
218 let recursive_call_counts = crate::call_graph::recursive_callsite_counts(items);
219
220 let mut fn_analyses: HashMap<String, FnAnalysis> = HashMap::with_capacity(fn_defs.len());
221 for fd in &fn_defs {
222 let plan = classify_thin_fn_def(fd, ctx);
223 let body_shape = match &plan {
224 Some(p) => match &p.body {
225 BodyPlan::SingleExpr(BodyExprPlan::Leaf(_)) => BodyShape::LeafExpr,
226 BodyPlan::SingleExpr(_) => BodyShape::SingleExpr,
227 BodyPlan::Block { stmts, .. } => BodyShape::Block(stmts.len()),
228 },
229 None => {
230 let FnBody::Block(stmts) = fd.body.as_ref();
231 BodyShape::Unclassified(stmts.len())
232 }
233 };
234 let analysis = FnAnalysis {
235 allocates: alloc_info.as_ref().and_then(|m| m.get(&fd.name).copied()),
236 thin_kind: plan.as_ref().map(|p| p.kind),
237 body_shape,
238 local_count: fd.resolution.as_ref().map(|r| r.local_count),
239 mutual_tco_member: mutual_tco_set.contains(&fd.name),
240 recursive: recursive_set.contains(&fd.name),
241 recursive_call_count: recursive_call_counts.get(&fd.name).copied().unwrap_or(0),
242 };
243 fn_analyses.insert(fd.name.clone(), analysis);
244 }
245
246 AnalysisResult {
247 fn_analyses,
248 mutual_tco_members: mutual_tco_set,
249 recursive_fns: recursive_set,
250 }
251}