Skip to main content

nyx_scanner/pointer/
domain.rs

1//! Abstract domain for field-sensitive Steensgaard points-to.
2//!
3//! Locations are interned to compact `LocId(u32)` handles so the
4//! union-find resolver can operate on dense integer keys.  Field
5//! locations are keyed structurally by `(parent_loc_id, field_id)` ,
6//! interning a `Field(parent, f)` always returns the same `LocId` no
7//! matter how many times the same `(parent, f)` pair is requested.
8
9use crate::cfg::BodyId;
10use crate::ssa::ir::FieldId;
11use smallvec::SmallVec;
12use std::collections::HashMap;
13
14/// Maximum nesting depth for `Field(...)` chains before folding to `Top`.
15///
16/// Bounds the per-body work for pathological recursive walks like
17/// `a.next.next.next.…` and matches the bound called out in the
18/// pointer-analysis prompt.
19pub const MAX_FIELD_DEPTH: u8 = 3;
20
21/// Maximum members per [`PointsToSet`] before we collapse the set to
22/// the over-approximation `{Top}`.  Keeps both the set and downstream
23/// constraint propagation bounded; mirrors the spirit of
24/// [`crate::ssa::heap::effective_max_pointsto`] without sharing the
25/// exact value (this analysis runs flow-insensitively across the body
26/// so its sets are typically smaller).
27pub const MAX_POINTSTO_MEMBERS: usize = 16;
28
29/// Compact handle for an interned [`AbsLoc`].
30///
31/// All abstract locations referenced by a single body share one
32/// [`LocInterner`], `LocId`s are only meaningful relative to that
33/// interner.  IDs are assigned densely from 0 and are stable for the
34/// lifetime of the interner so the union-find can index parent / rank
35/// arrays directly.
36#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
37pub struct LocId(pub u32);
38
39/// Sentinel "anywhere" location.  Always `LocId(0)`, the interner
40/// reserves the first slot at construction so callers can compare
41/// against it cheaply.
42pub const LOC_TOP: LocId = LocId(0);
43
44/// Abstract heap location in the points-to lattice.
45///
46/// A pointer-targets-this kind of fact.  Cyclic field chains (e.g.
47/// `a.next.next.…`) are bounded by [`MAX_FIELD_DEPTH`]; once the cap
48/// is exceeded the chain folds to [`AbsLoc::Top`].
49#[derive(Clone, Debug, PartialEq, Eq, Hash)]
50pub enum AbsLoc {
51    /// "Anywhere", the over-approximation used when precision is
52    /// unrecoverable (e.g. a value sourced from outside the analysed
53    /// body, or a points-to set that exceeded the cap).
54    Top,
55    /// Allocation site within a body, identified by the SSA value of
56    /// the defining instruction.  SSA guarantees a single definition
57    /// per value, so the SSA value uniquely names the allocation site.
58    ///
59    /// `body` disambiguates allocations across bodies in the same
60    /// file.  The interned `u32` is the `SsaValue.0` of the call /
61    /// constructor instruction.
62    Alloc(BodyId, u32),
63    /// Function parameter, the abstract identity of the value
64    /// supplied by the caller for parameter `index`.  The receiver
65    /// (`self` / `this`) uses [`AbsLoc::SelfParam`] instead.
66    Param(BodyId, usize),
67    /// Implicit method receiver (`self` / `this`).  Distinct from
68    /// `Param(_, _)` so callers don't have to encode an "is the
69    /// receiver" sentinel index.
70    SelfParam(BodyId),
71    /// Heap field of a parent location: `parent.f`.  `parent` is
72    /// itself a [`LocId`], chains of field accesses produce nested
73    /// `Field` locations.  Depth is bounded by [`MAX_FIELD_DEPTH`].
74    Field { parent: LocId, field: FieldId },
75}
76
77/// Per-body interner mapping [`AbsLoc`] → dense [`LocId`].
78///
79/// Owns the canonical store: callers only hold [`LocId`]s and resolve
80/// them through the interner.  The first slot ([`LOC_TOP`]) is always
81/// `Top`, so the union-find resolver can short-circuit "is this Top?"
82/// queries with a single integer compare.
83#[derive(Clone, Debug)]
84pub struct LocInterner {
85    /// Locations indexed by `LocId.0`.
86    locs: Vec<AbsLoc>,
87    /// Reverse lookup: `(BodyId, alloc-ssa-value)` → `LocId`.
88    alloc_lookup: HashMap<(BodyId, u32), LocId>,
89    /// Reverse lookup: `(BodyId, param-index)` → `LocId`.
90    param_lookup: HashMap<(BodyId, usize), LocId>,
91    /// Reverse lookup for `SelfParam`.
92    self_param_lookup: HashMap<BodyId, LocId>,
93    /// Reverse lookup for `Field { parent, field }`.
94    field_lookup: HashMap<(LocId, FieldId), LocId>,
95    /// Interned depth of each location (0 for non-Field).  Used to
96    /// fold deeply-nested `Field` chains to [`AbsLoc::Top`].
97    depths: Vec<u8>,
98}
99
100impl Default for LocInterner {
101    fn default() -> Self {
102        Self::new()
103    }
104}
105
106impl LocInterner {
107    /// Create a fresh interner with [`LOC_TOP`] pre-installed.
108    pub fn new() -> Self {
109        Self {
110            locs: vec![AbsLoc::Top],
111            alloc_lookup: HashMap::new(),
112            param_lookup: HashMap::new(),
113            self_param_lookup: HashMap::new(),
114            field_lookup: HashMap::new(),
115            depths: vec![0],
116        }
117    }
118
119    /// Total number of interned locations (including the reserved
120    /// [`LOC_TOP`] slot).
121    #[inline]
122    pub fn len(&self) -> usize {
123        self.locs.len()
124    }
125
126    /// Whether the interner only holds the reserved [`LOC_TOP`] slot.
127    #[inline]
128    pub fn is_empty(&self) -> bool {
129        self.locs.len() <= 1
130    }
131
132    /// Resolve a [`LocId`] back to its [`AbsLoc`].  Panics on out-of-
133    /// range ids, only ids the interner produced are valid.
134    #[inline]
135    pub fn resolve(&self, id: LocId) -> &AbsLoc {
136        &self.locs[id.0 as usize]
137    }
138
139    /// Depth of an interned location.  `0` for non-`Field` locations;
140    /// `1 + depth(parent)` for `Field { parent, .. }`.
141    #[inline]
142    pub fn depth(&self, id: LocId) -> u8 {
143        self.depths[id.0 as usize]
144    }
145
146    /// Intern an `Alloc` location.
147    pub fn intern_alloc(&mut self, body: BodyId, ssa_value: u32) -> LocId {
148        if let Some(&id) = self.alloc_lookup.get(&(body, ssa_value)) {
149            return id;
150        }
151        let id = self.push(AbsLoc::Alloc(body, ssa_value), 0);
152        self.alloc_lookup.insert((body, ssa_value), id);
153        id
154    }
155
156    /// Intern a positional `Param` location.
157    pub fn intern_param(&mut self, body: BodyId, index: usize) -> LocId {
158        if let Some(&id) = self.param_lookup.get(&(body, index)) {
159            return id;
160        }
161        let id = self.push(AbsLoc::Param(body, index), 0);
162        self.param_lookup.insert((body, index), id);
163        id
164    }
165
166    /// Intern a `SelfParam` location for the given body.
167    pub fn intern_self_param(&mut self, body: BodyId) -> LocId {
168        if let Some(&id) = self.self_param_lookup.get(&body) {
169            return id;
170        }
171        let id = self.push(AbsLoc::SelfParam(body), 0);
172        self.self_param_lookup.insert(body, id);
173        id
174    }
175
176    /// Intern a `Field { parent, field }` location.  Returns
177    /// [`LOC_TOP`] when `parent` is `Top` or when the resulting depth
178    /// would exceed [`MAX_FIELD_DEPTH`].
179    pub fn intern_field(&mut self, parent: LocId, field: FieldId) -> LocId {
180        if parent == LOC_TOP {
181            return LOC_TOP;
182        }
183        let parent_depth = self.depth(parent);
184        if parent_depth >= MAX_FIELD_DEPTH {
185            return LOC_TOP;
186        }
187        let key = (parent, field);
188        if let Some(&id) = self.field_lookup.get(&key) {
189            return id;
190        }
191        let id = self.push(AbsLoc::Field { parent, field }, parent_depth + 1);
192        self.field_lookup.insert(key, id);
193        id
194    }
195
196    fn push(&mut self, loc: AbsLoc, depth: u8) -> LocId {
197        let id = LocId(self.locs.len() as u32);
198        self.locs.push(loc);
199        self.depths.push(depth);
200        id
201    }
202}
203
204/// Coarse classification of a value's points-to set, used by consumers
205/// (Hierarchy: resource lifecycle) that don't need full set membership but
206/// do need to know "is this value's heap identity a *field* of some
207/// other value, or does it stand on its own?".
208///
209/// The classifier is intentionally narrow: only [`PtrProxyHint::FieldOnly`]
210/// is interesting to today's consumers, every other shape (empty, root,
211/// `Top`, mixed) collapses to [`PtrProxyHint::Other`] so the consumer
212/// keeps its existing behaviour.
213#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
214pub enum PtrProxyHint {
215    /// Every member of the points-to set is an [`AbsLoc::Field`].  The
216    /// value is a sub-object alias, e.g. `m` in `m := c.mu`.
217    FieldOnly,
218    /// Anything else: the set is empty, contains a root location
219    /// ([`AbsLoc::SelfParam`] / [`AbsLoc::Param`] / [`AbsLoc::Alloc`]),
220    /// contains [`AbsLoc::Top`], or mixes fields with roots.  Consumers
221    /// fall back to their default behaviour.
222    Other,
223}
224
225/// Bounded points-to set: a small sorted vector of [`LocId`]s.
226///
227/// "Bounded" means the set silently collapses to `{Top}` on overflow;
228/// downstream consumers treat `Top`-containing sets as
229/// over-approximations exactly the same way [`AbsLoc::Top`] is treated
230/// at the singleton level.
231#[derive(Clone, Debug, PartialEq, Eq)]
232pub struct PointsToSet {
233    /// Sorted, deduped list of locations.  When the cap is exceeded
234    /// the set is replaced by `[LOC_TOP]`.
235    ids: SmallVec<[LocId; 4]>,
236}
237
238impl Default for PointsToSet {
239    fn default() -> Self {
240        Self::empty()
241    }
242}
243
244impl PointsToSet {
245    /// Empty set, the value points to nothing tracked by the
246    /// analysis (e.g. a scalar constant).
247    pub fn empty() -> Self {
248        Self {
249            ids: SmallVec::new(),
250        }
251    }
252
253    /// Singleton set wrapping `id`.
254    pub fn singleton(id: LocId) -> Self {
255        let mut ids = SmallVec::new();
256        ids.push(id);
257        Self { ids }
258    }
259
260    /// `{Top}`, the universal over-approximation.
261    pub fn top() -> Self {
262        Self::singleton(LOC_TOP)
263    }
264
265    /// True when the set contains [`LOC_TOP`] (i.e. has saturated to
266    /// the over-approximation).
267    pub fn is_top(&self) -> bool {
268        self.ids.contains(&LOC_TOP)
269    }
270
271    pub fn is_empty(&self) -> bool {
272        self.ids.is_empty()
273    }
274
275    pub fn len(&self) -> usize {
276        self.ids.len()
277    }
278
279    /// Iterate over members in sorted order.
280    pub fn iter(&self) -> impl Iterator<Item = LocId> + '_ {
281        self.ids.iter().copied()
282    }
283
284    /// Whether `id` is one of the set members (or the set is `Top`).
285    pub fn contains(&self, id: LocId) -> bool {
286        if self.is_top() {
287            return true;
288        }
289        self.ids.binary_search(&id).is_ok()
290    }
291
292    /// Insert `id`, maintaining sort/dedup.  Saturates to `{Top}`
293    /// when the set would exceed [`MAX_POINTSTO_MEMBERS`].
294    pub fn insert(&mut self, id: LocId) {
295        if self.is_top() {
296            return;
297        }
298        if id == LOC_TOP {
299            self.ids.clear();
300            self.ids.push(LOC_TOP);
301            return;
302        }
303        match self.ids.binary_search(&id) {
304            Ok(_) => {}
305            Err(pos) => {
306                if self.ids.len() >= MAX_POINTSTO_MEMBERS {
307                    self.ids.clear();
308                    self.ids.push(LOC_TOP);
309                } else {
310                    self.ids.insert(pos, id);
311                }
312            }
313        }
314    }
315
316    /// Set-union, in place.  Returns `true` when `self` changed ,
317    /// the constraint solver uses the bit to decide whether the
318    /// containing equivalence class needs another pass.
319    pub fn union_in_place(&mut self, other: &PointsToSet) -> bool {
320        if self.is_top() {
321            return false;
322        }
323        if other.is_top() {
324            let was_top = self.is_top();
325            self.ids.clear();
326            self.ids.push(LOC_TOP);
327            return !was_top;
328        }
329        let mut changed = false;
330        for id in other.iter() {
331            if id == LOC_TOP {
332                let was_top = self.is_top();
333                self.ids.clear();
334                self.ids.push(LOC_TOP);
335                return !was_top;
336            }
337            match self.ids.binary_search(&id) {
338                Ok(_) => {}
339                Err(pos) => {
340                    if self.ids.len() >= MAX_POINTSTO_MEMBERS {
341                        self.ids.clear();
342                        self.ids.push(LOC_TOP);
343                        return true;
344                    }
345                    self.ids.insert(pos, id);
346                    changed = true;
347                }
348            }
349        }
350        changed
351    }
352}
353
354#[cfg(test)]
355mod tests {
356    use super::*;
357
358    fn body() -> BodyId {
359        BodyId(0)
360    }
361
362    #[test]
363    fn loc_top_is_zero() {
364        let interner = LocInterner::new();
365        assert_eq!(interner.len(), 1);
366        assert_eq!(interner.resolve(LOC_TOP), &AbsLoc::Top);
367    }
368
369    #[test]
370    fn alloc_intern_dedupes() {
371        let mut interner = LocInterner::new();
372        let a = interner.intern_alloc(body(), 7);
373        let b = interner.intern_alloc(body(), 7);
374        let c = interner.intern_alloc(body(), 8);
375        assert_eq!(a, b);
376        assert_ne!(a, c);
377    }
378
379    #[test]
380    fn param_intern_dedupes_by_index() {
381        let mut interner = LocInterner::new();
382        let p0 = interner.intern_param(body(), 0);
383        let p1 = interner.intern_param(body(), 1);
384        let p0_again = interner.intern_param(body(), 0);
385        assert_eq!(p0, p0_again);
386        assert_ne!(p0, p1);
387    }
388
389    #[test]
390    fn field_intern_dedupes_structurally() {
391        let mut interner = LocInterner::new();
392        let parent = interner.intern_self_param(body());
393        let f = FieldId(7);
394        let a = interner.intern_field(parent, f);
395        let b = interner.intern_field(parent, f);
396        assert_eq!(a, b, "same parent + same field id ⇒ same loc id");
397    }
398
399    #[test]
400    fn field_chain_depth_bounded() {
401        let mut interner = LocInterner::new();
402        let mut cur = interner.intern_self_param(body());
403        let f = FieldId(1);
404        for _ in 0..MAX_FIELD_DEPTH {
405            cur = interner.intern_field(cur, f);
406            assert_ne!(cur, LOC_TOP, "depth ≤ MAX should not fold");
407        }
408        let folded = interner.intern_field(cur, f);
409        assert_eq!(folded, LOC_TOP, "exceeding MAX_FIELD_DEPTH folds to Top");
410    }
411
412    #[test]
413    fn field_of_top_is_top() {
414        let mut interner = LocInterner::new();
415        let folded = interner.intern_field(LOC_TOP, FieldId(0));
416        assert_eq!(folded, LOC_TOP);
417    }
418
419    #[test]
420    fn pointsto_set_empty_singleton_top() {
421        assert!(PointsToSet::empty().is_empty());
422        assert!(PointsToSet::top().is_top());
423        let mut interner = LocInterner::new();
424        let p = interner.intern_self_param(body());
425        let s = PointsToSet::singleton(p);
426        assert!(s.contains(p));
427        assert!(!s.is_top());
428    }
429
430    #[test]
431    fn pointsto_set_insert_and_union() {
432        let mut interner = LocInterner::new();
433        let p0 = interner.intern_param(body(), 0);
434        let p1 = interner.intern_param(body(), 1);
435        let mut a = PointsToSet::singleton(p0);
436        let b = PointsToSet::singleton(p1);
437        let changed = a.union_in_place(&b);
438        assert!(changed);
439        assert_eq!(a.len(), 2);
440        assert!(a.contains(p0));
441        assert!(a.contains(p1));
442        // Re-union is idempotent.
443        let changed2 = a.union_in_place(&b);
444        assert!(!changed2);
445    }
446
447    #[test]
448    fn pointsto_set_saturates_to_top_on_overflow() {
449        let mut interner = LocInterner::new();
450        let mut s = PointsToSet::empty();
451        for i in 0..(MAX_POINTSTO_MEMBERS as u32 + 4) {
452            s.insert(interner.intern_alloc(body(), i));
453        }
454        assert!(s.is_top(), "set should collapse to {{Top}} on overflow");
455    }
456
457    #[test]
458    fn pointsto_set_union_with_top_is_top() {
459        let mut interner = LocInterner::new();
460        let p = interner.intern_param(body(), 0);
461        let mut a = PointsToSet::singleton(p);
462        let changed = a.union_in_place(&PointsToSet::top());
463        assert!(changed);
464        assert!(a.is_top());
465    }
466}