nyx_scanner/pointer/domain.rs
1//! Abstract domain for field-sensitive Steensgaard points-to.
2//!
3//! Locations are interned to compact `LocId(u32)` handles so the
4//! union-find resolver can operate on dense integer keys. Field
5//! locations are keyed structurally by `(parent_loc_id, field_id)` ,
6//! interning a `Field(parent, f)` always returns the same `LocId` no
7//! matter how many times the same `(parent, f)` pair is requested.
8
9use crate::cfg::BodyId;
10use crate::ssa::ir::FieldId;
11use smallvec::SmallVec;
12use std::collections::HashMap;
13
14/// Maximum nesting depth for `Field(...)` chains before folding to `Top`.
15///
16/// Bounds the per-body work for pathological recursive walks like
17/// `a.next.next.next.…` and matches the bound called out in the
18/// pointer-analysis prompt.
19pub const MAX_FIELD_DEPTH: u8 = 3;
20
21/// Maximum members per [`PointsToSet`] before we collapse the set to
22/// the over-approximation `{Top}`. Keeps both the set and downstream
23/// constraint propagation bounded; mirrors the spirit of
24/// [`crate::ssa::heap::effective_max_pointsto`] without sharing the
25/// exact value (this analysis runs flow-insensitively across the body
26/// so its sets are typically smaller).
27pub const MAX_POINTSTO_MEMBERS: usize = 16;
28
29/// Compact handle for an interned [`AbsLoc`].
30///
31/// All abstract locations referenced by a single body share one
32/// [`LocInterner`], `LocId`s are only meaningful relative to that
33/// interner. IDs are assigned densely from 0 and are stable for the
34/// lifetime of the interner so the union-find can index parent / rank
35/// arrays directly.
36#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
37pub struct LocId(pub u32);
38
39/// Sentinel "anywhere" location. Always `LocId(0)`, the interner
40/// reserves the first slot at construction so callers can compare
41/// against it cheaply.
42pub const LOC_TOP: LocId = LocId(0);
43
44/// Abstract heap location in the points-to lattice.
45///
46/// A pointer-targets-this kind of fact. Cyclic field chains (e.g.
47/// `a.next.next.…`) are bounded by [`MAX_FIELD_DEPTH`]; once the cap
48/// is exceeded the chain folds to [`AbsLoc::Top`].
49#[derive(Clone, Debug, PartialEq, Eq, Hash)]
50pub enum AbsLoc {
51 /// "Anywhere", the over-approximation used when precision is
52 /// unrecoverable (e.g. a value sourced from outside the analysed
53 /// body, or a points-to set that exceeded the cap).
54 Top,
55 /// Allocation site within a body, identified by the SSA value of
56 /// the defining instruction. SSA guarantees a single definition
57 /// per value, so the SSA value uniquely names the allocation site.
58 ///
59 /// `body` disambiguates allocations across bodies in the same
60 /// file. The interned `u32` is the `SsaValue.0` of the call /
61 /// constructor instruction.
62 Alloc(BodyId, u32),
63 /// Function parameter, the abstract identity of the value
64 /// supplied by the caller for parameter `index`. The receiver
65 /// (`self` / `this`) uses [`AbsLoc::SelfParam`] instead.
66 Param(BodyId, usize),
67 /// Implicit method receiver (`self` / `this`). Distinct from
68 /// `Param(_, _)` so callers don't have to encode an "is the
69 /// receiver" sentinel index.
70 SelfParam(BodyId),
71 /// Heap field of a parent location: `parent.f`. `parent` is
72 /// itself a [`LocId`], chains of field accesses produce nested
73 /// `Field` locations. Depth is bounded by [`MAX_FIELD_DEPTH`].
74 Field { parent: LocId, field: FieldId },
75}
76
77/// Per-body interner mapping [`AbsLoc`] → dense [`LocId`].
78///
79/// Owns the canonical store: callers only hold [`LocId`]s and resolve
80/// them through the interner. The first slot ([`LOC_TOP`]) is always
81/// `Top`, so the union-find resolver can short-circuit "is this Top?"
82/// queries with a single integer compare.
83#[derive(Clone, Debug)]
84pub struct LocInterner {
85 /// Locations indexed by `LocId.0`.
86 locs: Vec<AbsLoc>,
87 /// Reverse lookup: `(BodyId, alloc-ssa-value)` → `LocId`.
88 alloc_lookup: HashMap<(BodyId, u32), LocId>,
89 /// Reverse lookup: `(BodyId, param-index)` → `LocId`.
90 param_lookup: HashMap<(BodyId, usize), LocId>,
91 /// Reverse lookup for `SelfParam`.
92 self_param_lookup: HashMap<BodyId, LocId>,
93 /// Reverse lookup for `Field { parent, field }`.
94 field_lookup: HashMap<(LocId, FieldId), LocId>,
95 /// Interned depth of each location (0 for non-Field). Used to
96 /// fold deeply-nested `Field` chains to [`AbsLoc::Top`].
97 depths: Vec<u8>,
98}
99
100impl Default for LocInterner {
101 fn default() -> Self {
102 Self::new()
103 }
104}
105
106impl LocInterner {
107 /// Create a fresh interner with [`LOC_TOP`] pre-installed.
108 pub fn new() -> Self {
109 Self {
110 locs: vec![AbsLoc::Top],
111 alloc_lookup: HashMap::new(),
112 param_lookup: HashMap::new(),
113 self_param_lookup: HashMap::new(),
114 field_lookup: HashMap::new(),
115 depths: vec![0],
116 }
117 }
118
119 /// Total number of interned locations (including the reserved
120 /// [`LOC_TOP`] slot).
121 #[inline]
122 pub fn len(&self) -> usize {
123 self.locs.len()
124 }
125
126 /// Whether the interner only holds the reserved [`LOC_TOP`] slot.
127 #[inline]
128 pub fn is_empty(&self) -> bool {
129 self.locs.len() <= 1
130 }
131
132 /// Resolve a [`LocId`] back to its [`AbsLoc`]. Panics on out-of-
133 /// range ids, only ids the interner produced are valid.
134 #[inline]
135 pub fn resolve(&self, id: LocId) -> &AbsLoc {
136 &self.locs[id.0 as usize]
137 }
138
139 /// Depth of an interned location. `0` for non-`Field` locations;
140 /// `1 + depth(parent)` for `Field { parent, .. }`.
141 #[inline]
142 pub fn depth(&self, id: LocId) -> u8 {
143 self.depths[id.0 as usize]
144 }
145
146 /// Intern an `Alloc` location.
147 pub fn intern_alloc(&mut self, body: BodyId, ssa_value: u32) -> LocId {
148 if let Some(&id) = self.alloc_lookup.get(&(body, ssa_value)) {
149 return id;
150 }
151 let id = self.push(AbsLoc::Alloc(body, ssa_value), 0);
152 self.alloc_lookup.insert((body, ssa_value), id);
153 id
154 }
155
156 /// Intern a positional `Param` location.
157 pub fn intern_param(&mut self, body: BodyId, index: usize) -> LocId {
158 if let Some(&id) = self.param_lookup.get(&(body, index)) {
159 return id;
160 }
161 let id = self.push(AbsLoc::Param(body, index), 0);
162 self.param_lookup.insert((body, index), id);
163 id
164 }
165
166 /// Intern a `SelfParam` location for the given body.
167 pub fn intern_self_param(&mut self, body: BodyId) -> LocId {
168 if let Some(&id) = self.self_param_lookup.get(&body) {
169 return id;
170 }
171 let id = self.push(AbsLoc::SelfParam(body), 0);
172 self.self_param_lookup.insert(body, id);
173 id
174 }
175
176 /// Intern a `Field { parent, field }` location. Returns
177 /// [`LOC_TOP`] when `parent` is `Top` or when the resulting depth
178 /// would exceed [`MAX_FIELD_DEPTH`].
179 pub fn intern_field(&mut self, parent: LocId, field: FieldId) -> LocId {
180 if parent == LOC_TOP {
181 return LOC_TOP;
182 }
183 let parent_depth = self.depth(parent);
184 if parent_depth >= MAX_FIELD_DEPTH {
185 return LOC_TOP;
186 }
187 let key = (parent, field);
188 if let Some(&id) = self.field_lookup.get(&key) {
189 return id;
190 }
191 let id = self.push(AbsLoc::Field { parent, field }, parent_depth + 1);
192 self.field_lookup.insert(key, id);
193 id
194 }
195
196 fn push(&mut self, loc: AbsLoc, depth: u8) -> LocId {
197 let id = LocId(self.locs.len() as u32);
198 self.locs.push(loc);
199 self.depths.push(depth);
200 id
201 }
202}
203
204/// Coarse classification of a value's points-to set, used by consumers
205/// (Hierarchy: resource lifecycle) that don't need full set membership but
206/// do need to know "is this value's heap identity a *field* of some
207/// other value, or does it stand on its own?".
208///
209/// The classifier is intentionally narrow: only [`PtrProxyHint::FieldOnly`]
210/// is interesting to today's consumers, every other shape (empty, root,
211/// `Top`, mixed) collapses to [`PtrProxyHint::Other`] so the consumer
212/// keeps its existing behaviour.
213#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
214pub enum PtrProxyHint {
215 /// Every member of the points-to set is an [`AbsLoc::Field`]. The
216 /// value is a sub-object alias, e.g. `m` in `m := c.mu`.
217 FieldOnly,
218 /// Anything else: the set is empty, contains a root location
219 /// ([`AbsLoc::SelfParam`] / [`AbsLoc::Param`] / [`AbsLoc::Alloc`]),
220 /// contains [`AbsLoc::Top`], or mixes fields with roots. Consumers
221 /// fall back to their default behaviour.
222 Other,
223}
224
225/// Bounded points-to set: a small sorted vector of [`LocId`]s.
226///
227/// "Bounded" means the set silently collapses to `{Top}` on overflow;
228/// downstream consumers treat `Top`-containing sets as
229/// over-approximations exactly the same way [`AbsLoc::Top`] is treated
230/// at the singleton level.
231#[derive(Clone, Debug, PartialEq, Eq)]
232pub struct PointsToSet {
233 /// Sorted, deduped list of locations. When the cap is exceeded
234 /// the set is replaced by `[LOC_TOP]`.
235 ids: SmallVec<[LocId; 4]>,
236}
237
238impl Default for PointsToSet {
239 fn default() -> Self {
240 Self::empty()
241 }
242}
243
244impl PointsToSet {
245 /// Empty set, the value points to nothing tracked by the
246 /// analysis (e.g. a scalar constant).
247 pub fn empty() -> Self {
248 Self {
249 ids: SmallVec::new(),
250 }
251 }
252
253 /// Singleton set wrapping `id`.
254 pub fn singleton(id: LocId) -> Self {
255 let mut ids = SmallVec::new();
256 ids.push(id);
257 Self { ids }
258 }
259
260 /// `{Top}`, the universal over-approximation.
261 pub fn top() -> Self {
262 Self::singleton(LOC_TOP)
263 }
264
265 /// True when the set contains [`LOC_TOP`] (i.e. has saturated to
266 /// the over-approximation).
267 pub fn is_top(&self) -> bool {
268 self.ids.contains(&LOC_TOP)
269 }
270
271 pub fn is_empty(&self) -> bool {
272 self.ids.is_empty()
273 }
274
275 pub fn len(&self) -> usize {
276 self.ids.len()
277 }
278
279 /// Iterate over members in sorted order.
280 pub fn iter(&self) -> impl Iterator<Item = LocId> + '_ {
281 self.ids.iter().copied()
282 }
283
284 /// Whether `id` is one of the set members (or the set is `Top`).
285 pub fn contains(&self, id: LocId) -> bool {
286 if self.is_top() {
287 return true;
288 }
289 self.ids.binary_search(&id).is_ok()
290 }
291
292 /// Insert `id`, maintaining sort/dedup. Saturates to `{Top}`
293 /// when the set would exceed [`MAX_POINTSTO_MEMBERS`].
294 pub fn insert(&mut self, id: LocId) {
295 if self.is_top() {
296 return;
297 }
298 if id == LOC_TOP {
299 self.ids.clear();
300 self.ids.push(LOC_TOP);
301 return;
302 }
303 match self.ids.binary_search(&id) {
304 Ok(_) => {}
305 Err(pos) => {
306 if self.ids.len() >= MAX_POINTSTO_MEMBERS {
307 self.ids.clear();
308 self.ids.push(LOC_TOP);
309 } else {
310 self.ids.insert(pos, id);
311 }
312 }
313 }
314 }
315
316 /// Set-union, in place. Returns `true` when `self` changed ,
317 /// the constraint solver uses the bit to decide whether the
318 /// containing equivalence class needs another pass.
319 pub fn union_in_place(&mut self, other: &PointsToSet) -> bool {
320 if self.is_top() {
321 return false;
322 }
323 if other.is_top() {
324 let was_top = self.is_top();
325 self.ids.clear();
326 self.ids.push(LOC_TOP);
327 return !was_top;
328 }
329 let mut changed = false;
330 for id in other.iter() {
331 if id == LOC_TOP {
332 let was_top = self.is_top();
333 self.ids.clear();
334 self.ids.push(LOC_TOP);
335 return !was_top;
336 }
337 match self.ids.binary_search(&id) {
338 Ok(_) => {}
339 Err(pos) => {
340 if self.ids.len() >= MAX_POINTSTO_MEMBERS {
341 self.ids.clear();
342 self.ids.push(LOC_TOP);
343 return true;
344 }
345 self.ids.insert(pos, id);
346 changed = true;
347 }
348 }
349 }
350 changed
351 }
352}
353
354#[cfg(test)]
355mod tests {
356 use super::*;
357
358 fn body() -> BodyId {
359 BodyId(0)
360 }
361
362 #[test]
363 fn loc_top_is_zero() {
364 let interner = LocInterner::new();
365 assert_eq!(interner.len(), 1);
366 assert_eq!(interner.resolve(LOC_TOP), &AbsLoc::Top);
367 }
368
369 #[test]
370 fn alloc_intern_dedupes() {
371 let mut interner = LocInterner::new();
372 let a = interner.intern_alloc(body(), 7);
373 let b = interner.intern_alloc(body(), 7);
374 let c = interner.intern_alloc(body(), 8);
375 assert_eq!(a, b);
376 assert_ne!(a, c);
377 }
378
379 #[test]
380 fn param_intern_dedupes_by_index() {
381 let mut interner = LocInterner::new();
382 let p0 = interner.intern_param(body(), 0);
383 let p1 = interner.intern_param(body(), 1);
384 let p0_again = interner.intern_param(body(), 0);
385 assert_eq!(p0, p0_again);
386 assert_ne!(p0, p1);
387 }
388
389 #[test]
390 fn field_intern_dedupes_structurally() {
391 let mut interner = LocInterner::new();
392 let parent = interner.intern_self_param(body());
393 let f = FieldId(7);
394 let a = interner.intern_field(parent, f);
395 let b = interner.intern_field(parent, f);
396 assert_eq!(a, b, "same parent + same field id ⇒ same loc id");
397 }
398
399 #[test]
400 fn field_chain_depth_bounded() {
401 let mut interner = LocInterner::new();
402 let mut cur = interner.intern_self_param(body());
403 let f = FieldId(1);
404 for _ in 0..MAX_FIELD_DEPTH {
405 cur = interner.intern_field(cur, f);
406 assert_ne!(cur, LOC_TOP, "depth ≤ MAX should not fold");
407 }
408 let folded = interner.intern_field(cur, f);
409 assert_eq!(folded, LOC_TOP, "exceeding MAX_FIELD_DEPTH folds to Top");
410 }
411
412 #[test]
413 fn field_of_top_is_top() {
414 let mut interner = LocInterner::new();
415 let folded = interner.intern_field(LOC_TOP, FieldId(0));
416 assert_eq!(folded, LOC_TOP);
417 }
418
419 #[test]
420 fn pointsto_set_empty_singleton_top() {
421 assert!(PointsToSet::empty().is_empty());
422 assert!(PointsToSet::top().is_top());
423 let mut interner = LocInterner::new();
424 let p = interner.intern_self_param(body());
425 let s = PointsToSet::singleton(p);
426 assert!(s.contains(p));
427 assert!(!s.is_top());
428 }
429
430 #[test]
431 fn pointsto_set_insert_and_union() {
432 let mut interner = LocInterner::new();
433 let p0 = interner.intern_param(body(), 0);
434 let p1 = interner.intern_param(body(), 1);
435 let mut a = PointsToSet::singleton(p0);
436 let b = PointsToSet::singleton(p1);
437 let changed = a.union_in_place(&b);
438 assert!(changed);
439 assert_eq!(a.len(), 2);
440 assert!(a.contains(p0));
441 assert!(a.contains(p1));
442 // Re-union is idempotent.
443 let changed2 = a.union_in_place(&b);
444 assert!(!changed2);
445 }
446
447 #[test]
448 fn pointsto_set_saturates_to_top_on_overflow() {
449 let mut interner = LocInterner::new();
450 let mut s = PointsToSet::empty();
451 for i in 0..(MAX_POINTSTO_MEMBERS as u32 + 4) {
452 s.insert(interner.intern_alloc(body(), i));
453 }
454 assert!(s.is_top(), "set should collapse to {{Top}} on overflow");
455 }
456
457 #[test]
458 fn pointsto_set_union_with_top_is_top() {
459 let mut interner = LocInterner::new();
460 let p = interner.intern_param(body(), 0);
461 let mut a = PointsToSet::singleton(p);
462 let changed = a.union_in_place(&PointsToSet::top());
463 assert!(changed);
464 assert!(a.is_top());
465 }
466}