Skip to main content

aver/ir/
analyze.rs

1//! IR-level analysis pass — derives policy-independent (body classification,
2//! locals count) and policy-parametrized (alloc info) facts about each
3//! `FnDef`. Runs as the last stage of the canonical pipeline so consumers
4//! (codegen, dump, future inliner) can read derived metadata from one
5//! place instead of recomputing it.
6//!
7//! Why a unified analysis stage instead of ad-hoc calls scattered through
8//! VM compilation and WASM emission: every backend that wanted "is this
9//! fn no-alloc" or "what's its body shape" had its own call into
10//! `compute_alloc_info` / `classify_thin_fn_def`. Same input, same
11//! computation, repeated. Centralising means: one walk per program, one
12//! place to extend with new analyses, and `aver compile --emit-ir` /
13//! `--explain-passes` see the same numbers the codegen does.
14//!
15//! Backend-specific bits stay backend-specific: `AllocPolicy` is provided
16//! by the caller (`VmAllocPolicy`, `WasmAllocPolicy`, or the dump's
17//! conservative `DumpAllocPolicy`). Policy-independent facts (`body_shape`,
18//! `thin_kind`, `local_count`) are computed unconditionally.
19//!
20//! ## Scope: per-module by design (Aver module DAG invariant)
21//!
22//! `analyze` runs on one module at a time — the entry items, or a single
23//! dep module loaded from disk. It does NOT cross module boundaries to
24//! discover, say, a mutual-TCO SCC that spans two modules. This is correct
25//! by Aver's module-graph invariant, not a limitation:
26//!
27//! - `module M depends [A, B, ...]` — depends is a static list of names,
28//!   no dynamic resolution.
29//! - Module loading (`load_module_recursive`) enforces a DAG via the
30//!   `loaded: HashSet` cycle guard — a re-entered module name returns
31//!   early, so the dep graph is acyclic by construction.
32//! - In a DAG, all calls flow from dependents to dependencies: module A
33//!   calls into module B if A depends on B (transitively); B never calls
34//!   back into A. Mutually-recursive call chains require a cycle in the
35//!   call graph; a cycle requires either (i) a self-call within one
36//!   module, or (ii) a cycle in the module DAG — and the latter is
37//!   forbidden.
38//!
39//! Consequence: every SCC of mutually-recursive functions lives entirely
40//! within a single module. Per-module analysis covers every real case;
41//! "cross-module mutual TCO" is mathematically impossible in Aver.
42//! Backends that want a global `mutual_tco_members` set just union the
43//! per-module sets they get from each module's `AnalysisResult`.
44//!
45//! Same logic for `recursive_fns`, `recursive_call_count`, and the
46//! tail-call SCC computation that drives trampoline emission. None of
47//! these need to see cross-module data.
48
49use std::collections::{HashMap, HashSet};
50
51use crate::ast::{FnBody, FnDef, TopLevel};
52use crate::call_graph::{find_recursive_fns, tailcall_scc_components};
53use crate::ir::{
54    AllocPolicy, BodyExprPlan, BodyPlan, CallLowerCtx, ThinKind, classify_thin_fn_def,
55};
56
57/// Backend-neutral allocation policy useful for diagnostic dumps. Mirrors
58/// the VM/WASM "pure non-alloc builtins" whitelist exactly so the
59/// `[no_alloc]` annotation reads the same on both runtime backends.
60/// Constructors with payloads are treated as allocating; nullary
61/// constructors (`Option.None`, `Result.Ok` with primitive seed) are not.
62///
63/// Codegen pipelines should use the backend-specific policy
64/// (`VmAllocPolicy`, `WasmAllocPolicy`); this type is for `--emit-ir`,
65/// `aver bench`, and other tools that want a sensible answer without
66/// committing to a target.
67pub struct NeutralAllocPolicy;
68
69impl AllocPolicy for NeutralAllocPolicy {
70    fn builtin_allocates(&self, name: &str) -> bool {
71        !matches!(
72            name,
73            "Int.abs"
74                | "Int.min"
75                | "Int.max"
76                | "Float.fromInt"
77                | "Float.abs"
78                | "Float.floor"
79                | "Float.ceil"
80                | "Float.round"
81                | "Float.min"
82                | "Float.max"
83                | "Float.sin"
84                | "Float.cos"
85                | "Float.sqrt"
86                | "Float.pow"
87                | "Float.atan2"
88                | "Float.pi"
89                | "Char.toCode"
90                | "String.len"
91                | "String.byteLength"
92                | "String.startsWith"
93                | "String.endsWith"
94                | "String.contains"
95                | "List.len"
96                | "List.contains"
97                | "Vector.len"
98                | "Map.size"
99                | "Map.contains"
100                | "Set.size"
101                | "Set.contains"
102                | "Bool.and"
103                | "Bool.or"
104                | "Bool.not"
105        )
106    }
107    fn constructor_allocates(&self, _name: &str, has_payload: bool) -> bool {
108        has_payload
109    }
110}
111
112#[derive(Debug, Clone, Default)]
113pub struct AnalysisResult {
114    pub fn_analyses: HashMap<String, FnAnalysis>,
115    /// Names of fns that participate in a multi-member tail-call SCC
116    /// (mutual recursion). Backends emit a trampoline + thin wrappers
117    /// for each member; singleton self-recursive fns go through plain
118    /// TCO and aren't in this set.
119    pub mutual_tco_members: HashSet<String>,
120    /// Names of fns that call themselves directly or transitively.
121    /// Used by the type checker's flow analysis, the proof exporters,
122    /// and the memo-eligibility check.
123    pub recursive_fns: HashSet<String>,
124}
125
126#[derive(Debug, Clone)]
127pub struct FnAnalysis {
128    /// `Some(true)` = proven to allocate under the supplied `AllocPolicy`;
129    /// `Some(false)` = proven not to; `None` = no policy was configured.
130    pub allocates: Option<bool>,
131    pub thin_kind: Option<ThinKind>,
132    pub body_shape: BodyShape,
133    pub local_count: Option<u16>,
134    /// `true` if this fn participates in a mutual-TCO SCC (not a singleton
135    /// self-recursive fn — those are plain TCO and don't need trampoline
136    /// codegen). Backends use this to gate trampoline emission.
137    pub mutual_tco_member: bool,
138    /// `true` if the fn calls itself directly or transitively. Used by
139    /// the type checker's flow analysis, the proof exporters, and the
140    /// memo-eligibility check.
141    pub recursive: bool,
142    /// Number of recursive call sites in the fn body. `0` for non-recursive
143    /// fns. Memo eligibility requires `>= 2` (single-call recursion is
144    /// linear and the memo overhead would dominate).
145    pub recursive_call_count: usize,
146}
147
148#[derive(Debug, Clone, Copy, PartialEq, Eq)]
149pub enum BodyShape {
150    /// `BodyPlan::SingleExpr` whose head is a leaf op (literal, ident,
151    /// resolved local, etc.) — the fastest possible body shape.
152    LeafExpr,
153    /// `BodyPlan::SingleExpr` of any other kind (call, match, ctor, …).
154    SingleExpr,
155    /// `BodyPlan::Block { stmts: N, .. }` — a multi-stmt block (let
156    /// bindings + tail expr).
157    Block(usize),
158    /// Body didn't match any of the classifier's recognised shapes;
159    /// usually means the body is a `match` or some other top-level
160    /// expression the thin-body classifier doesn't model.
161    Unclassified(usize),
162}
163
164/// Run the analysis on all `TopLevel::FnDef` items in `items`. The
165/// `alloc_policy` is optional — when `None`, `FnAnalysis::allocates`
166/// stays `None` for every fn (every other field is still computed).
167pub fn analyze(
168    items: &[TopLevel],
169    alloc_policy: Option<&dyn AllocPolicy>,
170    ctx: &impl CallLowerCtx,
171) -> AnalysisResult {
172    let fn_defs: Vec<&FnDef> = items
173        .iter()
174        .filter_map(|i| match i {
175            TopLevel::FnDef(fd) => Some(fd),
176            _ => None,
177        })
178        .collect();
179
180    let alloc_info = alloc_policy.map(|policy| {
181        // The trait object can't be passed straight to the generic
182        // `compute_alloc_info<P: AllocPolicy>`, so wrap it in a
183        // monomorphic adapter that re-dispatches through `dyn`.
184        struct PolicyRef<'a>(&'a dyn AllocPolicy);
185        impl AllocPolicy for PolicyRef<'_> {
186            fn builtin_allocates(&self, name: &str) -> bool {
187                self.0.builtin_allocates(name)
188            }
189            fn constructor_allocates(&self, name: &str, has_payload: bool) -> bool {
190                self.0.constructor_allocates(name, has_payload)
191            }
192        }
193        crate::ir::compute_alloc_info(&fn_defs, &PolicyRef(policy))
194    });
195
196    // Mutual-TCO membership — split out from `find_tco_groups` because
197    // backends want to know "is this fn in a multi-member SCC" specifically
198    // (singletons go through plain TCO and don't need trampoline codegen).
199    // Identical computation to what every codegen backend (`codegen::mod`,
200    // `codegen::rust::mod`) was running on its own; centralising means
201    // they read from `FnAnalysis` instead.
202    let entry_fns_for_scc: Vec<&FnDef> = fn_defs
203        .iter()
204        .filter(|fd| fd.name != "main")
205        .copied()
206        .collect();
207    let mut mutual_tco_set: HashSet<String> = HashSet::new();
208    for group in tailcall_scc_components(&entry_fns_for_scc) {
209        if group.len() < 2 {
210            continue; // singleton self-recursive fn — handled by plain TCO
211        }
212        for fd in group {
213            mutual_tco_set.insert(fd.name.clone());
214        }
215    }
216
217    let recursive_set = find_recursive_fns(items);
218    let recursive_call_counts = crate::call_graph::recursive_callsite_counts(items);
219
220    let mut fn_analyses: HashMap<String, FnAnalysis> = HashMap::with_capacity(fn_defs.len());
221    for fd in &fn_defs {
222        let plan = classify_thin_fn_def(fd, ctx);
223        let body_shape = match &plan {
224            Some(p) => match &p.body {
225                BodyPlan::SingleExpr(BodyExprPlan::Leaf(_)) => BodyShape::LeafExpr,
226                BodyPlan::SingleExpr(_) => BodyShape::SingleExpr,
227                BodyPlan::Block { stmts, .. } => BodyShape::Block(stmts.len()),
228            },
229            None => {
230                let FnBody::Block(stmts) = fd.body.as_ref();
231                BodyShape::Unclassified(stmts.len())
232            }
233        };
234        let analysis = FnAnalysis {
235            allocates: alloc_info.as_ref().and_then(|m| m.get(&fd.name).copied()),
236            thin_kind: plan.as_ref().map(|p| p.kind),
237            body_shape,
238            local_count: fd.resolution.as_ref().map(|r| r.local_count),
239            mutual_tco_member: mutual_tco_set.contains(&fd.name),
240            recursive: recursive_set.contains(&fd.name),
241            recursive_call_count: recursive_call_counts.get(&fd.name).copied().unwrap_or(0),
242        };
243        fn_analyses.insert(fd.name.clone(), analysis);
244    }
245
246    AnalysisResult {
247        fn_analyses,
248        mutual_tco_members: mutual_tco_set,
249        recursive_fns: recursive_set,
250    }
251}