Skip to main content

nyx_scanner/pointer/
analysis.rs

1//! Field-sensitive Steensgaard points-to analysis driver.
2//!
3//! Flow-insensitive union-find over SSA values; field sensitivity comes
4//! from representing each `obj.f` access as a structural
5//! [`AbsLoc::Field`] keyed by `(parent_loc, field)`.
6
7use std::collections::HashMap;
8
9use crate::cfg::BodyId;
10use crate::ssa::ir::{FieldId, SsaBody, SsaInst, SsaOp, SsaValue};
11
12use super::domain::{AbsLoc, LOC_TOP, LocId, LocInterner, PointsToSet, PtrProxyHint};
13
14/// Maximum constraint-solver iterations before bailing.  Each pass
15/// walks every instruction once; in practice the analysis converges
16/// in a small number of passes for any well-formed body.
17const MAX_FIXPOINT_ITERS: usize = 8;
18
19/// Container-read callees that pull a single element out of a
20/// collection without a key.  Cross-language; non-listed callees still
21/// get fresh-alloc behaviour, so the list is conservative.
22fn is_container_read_callee(callee: &str) -> bool {
23    let bare = match callee.rsplit_once('.') {
24        Some((_, m)) => m,
25        None => callee,
26    };
27    matches!(
28        bare,
29        "shift"
30            | "pop"
31            | "peek"
32            | "front"
33            | "back"
34            | "first"
35            | "last"
36            | "head"
37            | "tail"
38            | "dequeue"
39            | "remove"
40            | "popleft"
41            // synthetic callee for subscript reads (`arr[i]`, `map[k]`)
42            | "__index_get__"
43    )
44}
45
46/// Container-write callees, mirror of `is_container_read_callee`.
47pub fn is_container_write_callee(callee: &str) -> bool {
48    let bare = match callee.rsplit_once('.') {
49        Some((_, m)) => m,
50        None => callee,
51    };
52    matches!(
53        bare,
54        "push"
55            | "pushback"
56            | "push_back"
57            | "pushfront"
58            | "push_front"
59            | "append"
60            | "add"
61            | "insert"
62            | "enqueue"
63            | "unshift"
64            // synthetic callee for subscript writes (`arr[i] = v`, `map[k] = v`)
65            | "__index_set__"
66    )
67}
68
69/// Public re-export of `is_container_read_callee` for the taint engine.
70pub fn is_container_read_callee_pub(callee: &str) -> bool {
71    is_container_read_callee(callee)
72}
73
74/// Derive a [`crate::summary::points_to::FieldPointsToSummary`] from
75/// per-body points-to facts.
76///
77/// Records two channels:
78///
79/// 1. **Reads**, walks every [`SsaOp::FieldProj`] in the body; for
80///    each `loc ∈ pt(receiver)` that resolves to a parameter
81///    location ([`AbsLoc::Param`] / [`AbsLoc::SelfParam`]), records
82///    the projected field name into the summary's
83///    `param_field_reads`.
84/// 2. **Writes**, walks the body's [`SsaBody::field_writes`] side-
85///    table (populated by SSA lowering's W1 synth-Assign hook) and
86///    records each `(receiver, FieldId)` pair against the receiver's
87///    pt set the same way reads are recorded.
88///
89/// Field name resolution goes through the body's
90/// [`SsaBody::field_interner`] because [`crate::ssa::ir::FieldId`]
91/// is body-local, names are the only stable cross-file identity.
92///
93/// Receiver (`SelfParam`) reads/writes are recorded under the
94/// [`u32::MAX`] sentinel parameter index, mirroring the convention in
95/// `SsaFuncSummary::receiver_to_*` fields.
96///
97/// The container-element sentinel field [`FieldId::ELEM`] is recorded
98/// under the special name `"<elem>"` so callers can recognise the
99/// abstract-element flow without leaking the implementation detail
100/// of the sentinel `u32::MAX` value across the wire.
101pub fn extract_field_points_to(
102    body: &SsaBody,
103    facts: &PointsToFacts,
104) -> crate::summary::points_to::FieldPointsToSummary {
105    use crate::summary::points_to::FieldPointsToSummary;
106    let mut out = FieldPointsToSummary::empty();
107    if body.field_interner.is_empty() && body.field_writes.is_empty() {
108        return out;
109    }
110    // Resolve a body-local FieldId to its cross-wire-stable name.
111    // Returns `None` when the id is out of range (deserialised body
112    // with a fresh interner) or doesn't correspond to a real field.
113    let field_name = |field: FieldId| -> Option<String> {
114        if field == FieldId::ELEM {
115            Some("<elem>".to_string())
116        } else if (field.0 as usize) < body.field_interner.len() {
117            Some(body.field_interner.resolve(field).to_string())
118        } else {
119            None
120        }
121    };
122    // Apply a single read or write to the summary, dispatching on
123    // the abstract location's parameter / receiver shape.
124    let record =
125        |loc: LocId, name: &str, out: &mut FieldPointsToSummary, is_write: bool| match facts
126            .interner
127            .resolve(loc)
128        {
129            crate::pointer::AbsLoc::Param(_, idx) => {
130                if is_write {
131                    out.add_write(*idx as u32, name);
132                } else {
133                    out.add_read(*idx as u32, name);
134                }
135            }
136            crate::pointer::AbsLoc::SelfParam(_) => {
137                if is_write {
138                    out.add_write(u32::MAX, name);
139                } else {
140                    out.add_read(u32::MAX, name);
141                }
142            }
143            _ => {}
144        };
145
146    // Channel 1: reads from FieldProj.
147    for block in &body.blocks {
148        for inst in block.body.iter() {
149            if let SsaOp::FieldProj {
150                receiver, field, ..
151            } = &inst.op
152            {
153                let pt = facts.pt(*receiver);
154                if pt.is_empty() || pt.is_top() {
155                    continue;
156                }
157                let Some(name) = field_name(*field) else {
158                    continue;
159                };
160                for loc in pt.iter() {
161                    record(loc, &name, &mut out, /* is_write */ false);
162                }
163            }
164        }
165    }
166
167    // Channel 2: writes from the synth-Assign side-table.  Each
168    // entry maps the synthetic Assign's defined value → (receiver
169    // SsaValue, FieldId).  The receiver's pt set determines which
170    // parameter index the write attributes to.
171    for (receiver, field) in body.field_writes.values() {
172        let pt = facts.pt(*receiver);
173        if pt.is_empty() || pt.is_top() {
174            continue;
175        }
176        let Some(name) = field_name(*field) else {
177            continue;
178        };
179        for loc in pt.iter() {
180            record(loc, &name, &mut out, /* is_write */ true);
181        }
182    }
183
184    out
185}
186
187/// Per-body points-to result.
188///
189/// Owns the body-local [`LocInterner`] and a flat `SsaValue → PointsToSet`
190/// table.  The table is dense, one slot per SSA value, so lookups
191/// are O(1).
192#[derive(Clone, Debug)]
193pub struct PointsToFacts {
194    /// Body the facts were computed for; used as the disambiguator
195    /// inside [`crate::pointer::AbsLoc::Param`] / `Alloc` / `SelfParam`.
196    pub body: BodyId,
197    /// Interner for the [`super::domain::AbsLoc`] referenced by the
198    /// per-value points-to sets.
199    pub interner: LocInterner,
200    /// `pt(v)` for every SSA value in the body.  Unreachable / unused
201    /// slots are `PointsToSet::empty()`.
202    by_value: Vec<PointsToSet>,
203}
204
205impl PointsToFacts {
206    /// Empty result, every value points to nothing.  Used by callers
207    /// that need a "no facts" placeholder when the analysis is
208    /// disabled or the body could not be analysed.
209    pub fn empty(body: BodyId) -> Self {
210        Self {
211            body,
212            interner: LocInterner::new(),
213            by_value: Vec::new(),
214        }
215    }
216
217    /// Borrow the points-to set for `v`.  Returns an empty set when
218    /// `v` is out of range (e.g. a value defined by an instruction
219    /// the analysis didn't visit).
220    pub fn pt(&self, v: SsaValue) -> &PointsToSet {
221        let idx = v.0 as usize;
222        static EMPTY: once_cell::sync::Lazy<PointsToSet> =
223            once_cell::sync::Lazy::new(PointsToSet::empty);
224        self.by_value.get(idx).unwrap_or(&EMPTY)
225    }
226
227    /// True when every value has an empty points-to set.  Used as a
228    /// fast-path skip in callers that only care about non-trivial
229    /// aliasing.
230    pub fn is_trivial(&self) -> bool {
231        self.by_value.iter().all(|s| s.is_empty())
232    }
233
234    /// Number of SSA values covered by the facts table.
235    pub fn len(&self) -> usize {
236        self.by_value.len()
237    }
238
239    /// True when no SSA values are covered by the facts table.
240    pub fn is_empty(&self) -> bool {
241        self.by_value.is_empty()
242    }
243
244    /// Classify a value's points-to set into a [`PtrProxyHint`] for
245    /// consumers that only care about the "is this a sub-field alias"
246    /// distinction.  Returns [`PtrProxyHint::Other`] for empty sets,
247    /// `Top`, and any set containing a root location ([`AbsLoc::SelfParam`]
248    /// / [`AbsLoc::Param`] / [`AbsLoc::Alloc`]).  Returns
249    /// [`PtrProxyHint::FieldOnly`] iff every member is an
250    /// [`AbsLoc::Field`].
251    ///
252    pub fn proxy_hint(&self, v: SsaValue) -> PtrProxyHint {
253        let set = self.pt(v);
254        if set.is_empty() || set.is_top() {
255            return PtrProxyHint::Other;
256        }
257        for id in set.iter() {
258            match self.interner.resolve(id) {
259                AbsLoc::Field { .. } => {}
260                _ => return PtrProxyHint::Other,
261            }
262        }
263        PtrProxyHint::FieldOnly
264    }
265
266    /// Build a `var_name → PtrProxyHint` map by scanning the body's
267    /// value defs for the latest definition of each named variable.
268    /// Names that resolve to no variable, or whose latest definition is
269    /// `Other`, are omitted, only `FieldOnly` entries appear.
270    ///
271    /// Iterates over [`SsaBody::value_defs`] in *reverse* order so the
272    /// last (post-renaming) SSA definition for each name wins.  Used by
273    /// the resource-lifecycle pass to look up `pt(receiver_text)` in
274    /// `apply_call` without re-walking the SSA body.
275    pub fn name_proxy_hints(
276        &self,
277        body: &SsaBody,
278    ) -> std::collections::HashMap<String, PtrProxyHint> {
279        let mut out = std::collections::HashMap::new();
280        for (idx, def) in body.value_defs.iter().enumerate().rev() {
281            let Some(name) = def.var_name.as_ref() else {
282                continue;
283            };
284            if out.contains_key(name) {
285                continue;
286            }
287            let hint = self.proxy_hint(SsaValue(idx as u32));
288            if hint == PtrProxyHint::FieldOnly {
289                out.insert(name.clone(), hint);
290            }
291        }
292        out
293    }
294}
295
296/// Analyse a single body and return its [`PointsToFacts`].
297///
298/// `body_id` is used as the disambiguator inside the abstract
299/// locations, supplying a stable id (e.g. the file's
300/// `BodyMeta.id`) lets callers compare facts emitted by different
301/// bodies in the same file.
302pub fn analyse_body(body: &SsaBody, body_id: BodyId) -> PointsToFacts {
303    let mut state = AnalysisState::new(body_id, body.num_values());
304
305    // Pass 1, emit constraints from ops that don't depend on
306    // representative resolution (Param, SelfParam, Call result,
307    // etc.).  These produce the "leaf" points-to sets.
308    for block in &body.blocks {
309        for inst in block.phis.iter().chain(block.body.iter()) {
310            state.transfer_inst(body_id, inst);
311        }
312    }
313
314    // Pass 2+, propagate through field projections, phis, and
315    // assignments until a fixpoint.  Field projections need iteration
316    // because a `FieldProj` whose receiver's representative changes
317    // (via a later unification) must re-emit its constraint with the
318    // new representative.
319    for _ in 0..MAX_FIXPOINT_ITERS {
320        let mut changed = false;
321        for block in &body.blocks {
322            for inst in block.phis.iter().chain(block.body.iter()) {
323                changed |= state.propagate_inst(inst);
324            }
325        }
326        if !changed {
327            break;
328        }
329    }
330
331    state.into_facts()
332}
333
334// ── Constraint solver internals ────────────────────────────────────
335
336/// Mutable analysis state, the interner, points-to table, and
337/// union-find arrays.  Lives inside `analyse_body` only.
338struct AnalysisState {
339    /// Body-id forwarded to [`PointsToFacts::body`] when the analysis
340    /// completes.  Recorded here so `into_facts` can preserve the
341    /// caller-supplied id instead of defaulting to `BodyId(0)`.
342    body_id: BodyId,
343    interner: LocInterner,
344    pt: Vec<PointsToSet>,
345    parent: Vec<u32>,
346    rank: Vec<u8>,
347}
348
349impl AnalysisState {
350    fn new(body_id: BodyId, num_values: usize) -> Self {
351        Self {
352            body_id,
353            interner: LocInterner::new(),
354            pt: vec![PointsToSet::empty(); num_values],
355            parent: (0..num_values as u32).collect(),
356            rank: vec![0; num_values],
357        }
358    }
359
360    /// Union-find find with path compression.
361    fn find(&mut self, mut v: u32) -> u32 {
362        if v as usize >= self.parent.len() {
363            return v;
364        }
365        // Walk to root.
366        let mut root = v;
367        while self.parent[root as usize] != root {
368            root = self.parent[root as usize];
369        }
370        // Compress.
371        while self.parent[v as usize] != root {
372            let next = self.parent[v as usize];
373            self.parent[v as usize] = root;
374            v = next;
375        }
376        root
377    }
378
379    /// Union `a` and `b` by rank.  Returns the new representative.
380    /// Merges the points-to sets of the two classes into the new
381    /// representative's slot.
382    fn union(&mut self, a: u32, b: u32) -> u32 {
383        let ra = self.find(a);
384        let rb = self.find(b);
385        if ra == rb {
386            return ra;
387        }
388        let (winner, loser) = match self.rank[ra as usize].cmp(&self.rank[rb as usize]) {
389            std::cmp::Ordering::Less => (rb, ra),
390            std::cmp::Ordering::Greater => (ra, rb),
391            std::cmp::Ordering::Equal => {
392                self.rank[ra as usize] += 1;
393                (ra, rb)
394            }
395        };
396        self.parent[loser as usize] = winner;
397        // Move the loser's points-to set into the winner's slot.
398        let loser_pt = std::mem::take(&mut self.pt[loser as usize]);
399        let _ = self.pt[winner as usize].union_in_place(&loser_pt);
400        winner
401    }
402
403    /// `pt(rep) ∪= {loc}`.
404    fn add_loc(&mut self, ssa: u32, loc: LocId) -> bool {
405        let rep = self.find(ssa) as usize;
406        let mut delta = PointsToSet::singleton(loc);
407        // Inline insert via union to keep the saturation logic in one place.
408        let changed = self.pt[rep].union_in_place(&delta);
409        // `delta` no longer needed.
410        let _ = &mut delta;
411        changed
412    }
413
414    /// `pt(rep_a) ∪= pt(rep_b)`.  Caller is responsible for passing
415    /// already-resolved representatives if it wants Steensgaard
416    /// unification, see `union` for that.
417    fn copy_pt(&mut self, dst: u32, src: u32) -> bool {
418        let dr = self.find(dst);
419        let sr = self.find(src);
420        if dr == sr {
421            return false;
422        }
423        // Take a clone of the source set so we can mutate dst.
424        let src_pt = self.pt[sr as usize].clone();
425        self.pt[dr as usize].union_in_place(&src_pt)
426    }
427
428    /// First-pass transfer for an instruction.  Emits constraints
429    /// that don't depend on representative-stable resolution.
430    fn transfer_inst(&mut self, body_id: BodyId, inst: &SsaInst) {
431        let v = inst.value.0;
432        if (v as usize) >= self.pt.len() {
433            return;
434        }
435        match &inst.op {
436            SsaOp::Param { index } => {
437                let loc = self.interner.intern_param(body_id, *index);
438                self.add_loc(v, loc);
439            }
440            SsaOp::SelfParam => {
441                let loc = self.interner.intern_self_param(body_id);
442                self.add_loc(v, loc);
443            }
444            SsaOp::CatchParam => {
445                // Exception bindings come from the runtime, model as
446                // an opaque allocation-site keyed by the SSA value.
447                let loc = self.interner.intern_alloc(body_id, v);
448                self.add_loc(v, loc);
449            }
450            SsaOp::Call {
451                callee, receiver, ..
452            } => {
453                // container element retrieval ops
454                // (`shift`, `pop`, `peek`, `front`, …) project through
455                // the abstract `Field(pt(receiver), ELEM)` cell so
456                // per-element taint flows independently of the SSA
457                // value referencing the container.  The receiver's
458                // points-to set may not be fully resolved on this
459                // pass, so we *also* add a fresh allocation site as a
460                // fallback, the fixpoint pass below absorbs the
461                // proper Field projection once the receiver's set
462                // converges.
463                let loc = self.interner.intern_alloc(body_id, v);
464                self.add_loc(v, loc);
465                if let Some(rcv) = receiver
466                    && is_container_read_callee(callee)
467                    && (rcv.0 as usize) < self.parent.len()
468                {
469                    let rcv_rep = self.find(rcv.0) as usize;
470                    let rcv_pt = self.pt[rcv_rep].clone();
471                    if !rcv_pt.is_empty() && !rcv_pt.is_top() {
472                        for parent_loc in rcv_pt.iter() {
473                            let proj = self.interner.intern_field(parent_loc, FieldId::ELEM);
474                            self.add_loc(v, proj);
475                        }
476                    }
477                }
478            }
479            SsaOp::Assign(uses) => {
480                // Steensgaard unification: rep(v) ∪= rep(u_i).  We
481                // unify here and then re-propagate during the
482                // fixpoint pass to absorb later field projections.
483                for &u in uses {
484                    if (u.0 as usize) < self.parent.len() {
485                        self.union(v, u.0);
486                    }
487                }
488            }
489            SsaOp::Phi(operands) => {
490                for (_, u) in operands {
491                    if (u.0 as usize) < self.parent.len() {
492                        self.union(v, u.0);
493                    }
494                }
495            }
496            SsaOp::FieldProj { .. } => {
497                // Resolved during the fixpoint pass, see
498                // `propagate_inst`.
499            }
500            SsaOp::Source | SsaOp::Const(_) | SsaOp::Nop | SsaOp::Undef => {
501                // Scalars / no-ops: empty points-to set.
502            }
503        }
504    }
505
506    /// Fixpoint-pass transfer.  Re-runs constraints whose result
507    /// depends on the current set of representatives, i.e. field
508    /// projections, phis, and assignments may need to absorb new
509    /// members emitted after the first pass.  Returns `true` when
510    /// any points-to set changed.
511    fn propagate_inst(&mut self, inst: &SsaInst) -> bool {
512        let v = inst.value.0;
513        if (v as usize) >= self.pt.len() {
514            return false;
515        }
516        match &inst.op {
517            SsaOp::FieldProj {
518                receiver, field, ..
519            } => {
520                if (receiver.0 as usize) >= self.parent.len() {
521                    return false;
522                }
523                let rcv_rep = self.find(receiver.0) as usize;
524                let mut new_pt = PointsToSet::empty();
525                let rcv_pt = self.pt[rcv_rep].clone();
526                if rcv_pt.is_top() {
527                    new_pt.insert(LOC_TOP);
528                } else if rcv_pt.is_empty() {
529                    // Nothing to project from yet.
530                    return false;
531                } else {
532                    for parent_loc in rcv_pt.iter() {
533                        let proj = self.interner.intern_field(parent_loc, *field);
534                        new_pt.insert(proj);
535                    }
536                }
537                let v_rep = self.find(v) as usize;
538                self.pt[v_rep].union_in_place(&new_pt)
539            }
540            SsaOp::Assign(uses) => {
541                let mut changed = false;
542                for &u in uses {
543                    if (u.0 as usize) < self.parent.len() {
544                        // Steensgaard unification already happened in
545                        // pass 1; re-copying the points-to set
546                        // absorbs any members added since.
547                        changed |= self.copy_pt(v, u.0);
548                    }
549                }
550                changed
551            }
552            SsaOp::Phi(operands) => {
553                let mut changed = false;
554                for (_, u) in operands {
555                    if (u.0 as usize) < self.parent.len() {
556                        changed |= self.copy_pt(v, u.0);
557                    }
558                }
559                changed
560            }
561            // No re-propagation needed for leaf ops.
562            _ => false,
563        }
564    }
565
566    /// Materialise the dense `SsaValue → PointsToSet` table.  Each
567    /// value's set is the set of its representative, values in the
568    /// same Steensgaard class share the same set.
569    fn into_facts(mut self) -> PointsToFacts {
570        let mut by_value = Vec::with_capacity(self.pt.len());
571        // Resolve every value through the union-find before returning
572        // so consumers see the unified set without having to re-find.
573        let mut rep_cache: HashMap<u32, PointsToSet> = HashMap::new();
574        let n = self.pt.len();
575        for v in 0..n as u32 {
576            let rep = self.find(v);
577            let set = rep_cache
578                .entry(rep)
579                .or_insert_with(|| self.pt[rep as usize].clone())
580                .clone();
581            by_value.push(set);
582        }
583        PointsToFacts {
584            body: self.body_id,
585            interner: self.interner,
586            by_value,
587        }
588    }
589}
590
591#[cfg(test)]
592mod tests {
593    use super::*;
594
595    use crate::cfg::Cfg;
596    use crate::ssa::ir::{
597        BlockId, FieldId, FieldInterner, SsaBlock, SsaBody, SsaInst, SsaOp, SsaValue, Terminator,
598        ValueDef,
599    };
600    use petgraph::graph::NodeIndex;
601    use smallvec::{SmallVec, smallvec};
602    use std::collections::HashMap;
603
604    fn body_id() -> BodyId {
605        BodyId(0)
606    }
607
608    /// Helpers for building synthetic SSA bodies in tests.  We
609    /// fabricate bodies directly rather than running the full lowering
610    /// pipeline so the tests stay focused on the points-to behaviour.
611    struct BodyBuilder {
612        defs: Vec<ValueDef>,
613        body_insts: Vec<SsaInst>,
614        next_value: u32,
615        field_interner: FieldInterner,
616    }
617
618    impl BodyBuilder {
619        fn new() -> Self {
620            Self {
621                defs: Vec::new(),
622                body_insts: Vec::new(),
623                next_value: 0,
624                field_interner: FieldInterner::new(),
625            }
626        }
627
628        fn fresh(&mut self, name: Option<&str>) -> SsaValue {
629            let v = SsaValue(self.next_value);
630            self.next_value += 1;
631            self.defs.push(ValueDef {
632                var_name: name.map(|s| s.to_string()),
633                cfg_node: NodeIndex::new(0),
634                block: BlockId(0),
635            });
636            v
637        }
638
639        fn emit(&mut self, value: SsaValue, op: SsaOp, name: Option<&str>) {
640            self.body_insts.push(SsaInst {
641                value,
642                op,
643                cfg_node: NodeIndex::new(0),
644                var_name: name.map(|s| s.to_string()),
645                span: (0, 0),
646            });
647        }
648
649        fn intern_field(&mut self, name: &str) -> FieldId {
650            self.field_interner.intern(name)
651        }
652
653        fn build(self) -> SsaBody {
654            SsaBody {
655                blocks: vec![SsaBlock {
656                    id: BlockId(0),
657                    phis: vec![],
658                    body: self.body_insts,
659                    terminator: Terminator::Return(None),
660                    preds: SmallVec::new(),
661                    succs: SmallVec::new(),
662                }],
663                entry: BlockId(0),
664                value_defs: self.defs,
665                cfg_node_map: HashMap::new(),
666                exception_edges: vec![],
667                field_interner: self.field_interner,
668                field_writes: std::collections::HashMap::new(),
669
670                synthetic_externals: std::collections::HashSet::new(),
671            }
672        }
673    }
674
675    /// `let c = self; let m = c.mu;` , pt(m) must be `{Field(SelfParam, mu)}`,
676    /// distinct from pt(c) = `{SelfParam}`.
677    #[test]
678    fn field_subobject_distinct_from_receiver() {
679        let mut b = BodyBuilder::new();
680        let c = b.fresh(Some("c"));
681        b.emit(c, SsaOp::SelfParam, Some("c"));
682
683        let mu_field = b.intern_field("mu");
684        let m = b.fresh(Some("c.mu"));
685        b.emit(
686            m,
687            SsaOp::FieldProj {
688                receiver: c,
689                field: mu_field,
690                projected_type: None,
691            },
692            Some("c.mu"),
693        );
694
695        let body = b.build();
696        let facts = analyse_body(&body, body_id());
697
698        let pt_c = facts.pt(c);
699        let pt_m = facts.pt(m);
700
701        assert_eq!(pt_c.len(), 1, "pt(c) should be a singleton SelfParam");
702        assert_eq!(pt_m.len(), 1, "pt(c.mu) should be a singleton Field");
703        assert!(!pt_m.is_top());
704
705        // The two sets must not overlap.
706        for c_loc in pt_c.iter() {
707            for m_loc in pt_m.iter() {
708                assert_ne!(c_loc, m_loc, "field and receiver share a location");
709            }
710        }
711
712        // And the field's parent must be the receiver's location.
713        let m_loc = pt_m.iter().next().unwrap();
714        match facts.interner.resolve(m_loc) {
715            crate::pointer::AbsLoc::Field { parent, field } => {
716                assert_eq!(*field, mu_field);
717                assert_eq!(*parent, pt_c.iter().next().unwrap());
718            }
719            other => panic!("expected Field, got {other:?}"),
720        }
721    }
722
723    /// `let y = x;` , y and x share the same points-to set.
724    #[test]
725    fn copy_propagation_unifies() {
726        let mut b = BodyBuilder::new();
727        let x = b.fresh(Some("x"));
728        b.emit(x, SsaOp::Param { index: 0 }, Some("x"));
729
730        let y = b.fresh(Some("y"));
731        b.emit(y, SsaOp::Assign(smallvec![x]), Some("y"));
732
733        let body = b.build();
734        let facts = analyse_body(&body, body_id());
735
736        assert_eq!(
737            facts.pt(x),
738            facts.pt(y),
739            "Steensgaard unifies pt(y) with pt(x) via the copy"
740        );
741        assert!(!facts.pt(y).is_empty());
742    }
743
744    /// `if (cond) z = a; else z = b;` , phi at the merge unifies
745    /// `pt(z)` with both `pt(a)` and `pt(b)`.
746    #[test]
747    fn phi_unifies_branches() {
748        let mut b = BodyBuilder::new();
749        let a = b.fresh(Some("a"));
750        b.emit(a, SsaOp::Param { index: 0 }, Some("a"));
751        let b_v = b.fresh(Some("b"));
752        b.emit(b_v, SsaOp::Param { index: 1 }, Some("b"));
753
754        // Phi(0: a, 0: b), predecessor block ids are placeholders.
755        let z = b.fresh(Some("z"));
756        b.emit(
757            z,
758            SsaOp::Phi(smallvec![(BlockId(0), a), (BlockId(0), b_v)]),
759            Some("z"),
760        );
761
762        let body = b.build();
763        let facts = analyse_body(&body, body_id());
764
765        let pt_z = facts.pt(z);
766        // Steensgaard unifies the three classes; pt(z) == pt(a) == pt(b)
767        // and contains both Param locations.
768        assert_eq!(pt_z, facts.pt(a));
769        assert_eq!(pt_z, facts.pt(b_v));
770        assert_eq!(pt_z.len(), 2);
771    }
772
773    /// `node = node.next;`, the `FieldProj` self-cycle must
774    /// terminate via the union-find / depth bound, not loop.
775    #[test]
776    fn self_referential_field_chain_terminates() {
777        let mut b = BodyBuilder::new();
778        let node = b.fresh(Some("node"));
779        b.emit(node, SsaOp::Param { index: 0 }, Some("node"));
780
781        let next_field = b.intern_field("next");
782        // Repeated pattern: `node = node.next` modeled as
783        // fp = FieldProj(node, next); node' = Assign([fp])
784        for _ in 0..6 {
785            let fp = b.fresh(Some("node.next"));
786            b.emit(
787                fp,
788                SsaOp::FieldProj {
789                    receiver: node,
790                    field: next_field,
791                    projected_type: None,
792                },
793                Some("node.next"),
794            );
795            let new_node = b.fresh(Some("node"));
796            b.emit(new_node, SsaOp::Assign(smallvec![fp]), Some("node"));
797            // The original `node` and the new one are unified by Assign,
798            // creating the self-cycle.  We don't update `node` here so
799            // every iteration emits a fresh FieldProj on the original.
800        }
801
802        let body = b.build();
803        // The bounded `MAX_FIELD_DEPTH` + union-find termination guarantees
804        // analysis returns; this test would hang or panic on regression.
805        let facts = analyse_body(&body, body_id());
806        let pt_node = facts.pt(node);
807        // Either we converge to a non-empty set including a Field chain,
808        // or we saturate to Top, either is a valid termination outcome.
809        assert!(!pt_node.is_empty());
810    }
811
812    /// `Source` introduces no points-to facts (taint is a separate
813    /// lattice; points-to only models heap reach).
814    #[test]
815    fn source_op_has_empty_pt() {
816        let mut b = BodyBuilder::new();
817        let s = b.fresh(Some("s"));
818        b.emit(s, SsaOp::Source, Some("s"));
819
820        let body = b.build();
821        let facts = analyse_body(&body, body_id());
822        assert!(facts.pt(s).is_empty());
823    }
824
825    /// `Call` produces a fresh allocation-site location for its result ,
826    /// distinct from its arguments.
827    #[test]
828    fn call_result_is_fresh_alloc() {
829        let mut b = BodyBuilder::new();
830        let arg = b.fresh(Some("x"));
831        b.emit(arg, SsaOp::Param { index: 0 }, Some("x"));
832
833        let result = b.fresh(Some("r"));
834        b.emit(
835            result,
836            SsaOp::Call {
837                callee: "make_thing".into(),
838                callee_text: None,
839                args: vec![smallvec![arg]],
840                receiver: None,
841            },
842            Some("r"),
843        );
844
845        let body = b.build();
846        let facts = analyse_body(&body, body_id());
847
848        let pt_arg = facts.pt(arg);
849        let pt_result = facts.pt(result);
850        assert!(!pt_result.is_empty());
851        assert!(!pt_arg.is_empty());
852        // No member shared between the two sets.
853        for ra in pt_arg.iter() {
854            for rr in pt_result.iter() {
855                assert_ne!(ra, rr);
856            }
857        }
858    }
859
860    /// Driver smoke-test: the analysis runs on an SsaBody produced by
861    /// the real lowering pipeline without panicking.  This pins the
862    /// "no behaviour change" gate, analysis runs to completion on
863    /// representative input.
864    #[test]
865    fn smoke_runs_on_lowered_body() {
866        // We don't exercise the real lowering here (that needs a full
867        // CFG fixture); the synthetic builder above covers the IR
868        // surface area.  Just confirm the entry point is callable
869        // with an empty body.
870        let body = SsaBody {
871            blocks: vec![SsaBlock {
872                id: BlockId(0),
873                phis: vec![],
874                body: vec![],
875                terminator: Terminator::Return(None),
876                preds: SmallVec::new(),
877                succs: SmallVec::new(),
878            }],
879            entry: BlockId(0),
880            value_defs: vec![],
881            cfg_node_map: HashMap::new(),
882            exception_edges: vec![],
883            field_interner: FieldInterner::new(),
884            field_writes: std::collections::HashMap::new(),
885
886            synthetic_externals: std::collections::HashSet::new(),
887        };
888        let facts = analyse_body(&body, body_id());
889        assert!(facts.is_trivial());
890        assert_eq!(facts.len(), 0);
891
892        let _ = std::marker::PhantomData::<Cfg>;
893    }
894
895    /// Contract pin: a value defined by a `FieldProj`
896    /// classifies as [`PtrProxyHint::FieldOnly`].  Consumed by the
897    /// resource-lifecycle pass to recognise field-aliased locals.
898    #[test]
899    fn proxy_hint_field_only_for_field_proj_value() {
900        let mut b = BodyBuilder::new();
901        let c = b.fresh(Some("c"));
902        b.emit(c, SsaOp::SelfParam, Some("c"));
903        let mu = b.intern_field("mu");
904        let m = b.fresh(Some("m"));
905        b.emit(
906            m,
907            SsaOp::FieldProj {
908                receiver: c,
909                field: mu,
910                projected_type: None,
911            },
912            Some("m"),
913        );
914
915        let body = b.build();
916        let facts = analyse_body(&body, BodyId(7));
917        assert_eq!(
918            facts.body,
919            BodyId(7),
920            "PointsToFacts must preserve caller-supplied BodyId"
921        );
922        assert_eq!(facts.proxy_hint(m), crate::pointer::PtrProxyHint::FieldOnly);
923        assert_eq!(facts.proxy_hint(c), crate::pointer::PtrProxyHint::Other);
924    }
925
926    /// container-read callee classifier covers a
927    /// representative sample across nyx's languages.  Pinned because
928    /// the taint engine relies on the same classifier.
929    #[test]
930    fn container_read_callee_classifier_covers_common_methods() {
931        for c in [
932            "shift",
933            "pop",
934            "peek",
935            "front",
936            "back",
937            "queue.shift",
938            "list.pop",
939            "deque.popleft",
940            "stack.peek",
941            "vec.first",
942        ] {
943            assert!(is_container_read_callee(c), "expected container read: {c}");
944        }
945        for c in ["push", "append", "insert", "myMethod", "process"] {
946            assert!(
947                !is_container_read_callee(c),
948                "non-read should classify false: {c}"
949            );
950        }
951    }
952
953    /// container-write classifier (mirror).
954    #[test]
955    fn container_write_callee_classifier() {
956        for c in [
957            "push",
958            "pushback",
959            "push_back",
960            "append",
961            "insert",
962            "enqueue",
963            "list.append",
964        ] {
965            assert!(is_container_write_callee(c), "expected write: {c}");
966        }
967        for c in ["pop", "shift", "process", "lookup"] {
968            assert!(
969                !is_container_write_callee(c),
970                "non-write should classify false: {c}"
971            );
972        }
973    }
974
975    /// a `Call("shift", receiver=container)` projects
976    /// `Field(pt(container), ELEM)` into the result, alongside the
977    /// fresh allocation site that fall-back paths still emit.
978    #[test]
979    fn container_read_call_projects_through_elem_field() {
980        let mut b = BodyBuilder::new();
981        // `arr` is the parameter container.
982        let arr = b.fresh(Some("arr"));
983        b.emit(arr, SsaOp::Param { index: 0 }, Some("arr"));
984        // `e := arr.shift()`, container read.
985        let e = b.fresh(Some("e"));
986        b.emit(
987            e,
988            SsaOp::Call {
989                callee: "shift".into(),
990                callee_text: None,
991                args: vec![],
992                receiver: Some(arr),
993            },
994            Some("e"),
995        );
996
997        let body = b.build();
998        let facts = analyse_body(&body, BodyId(0));
999        let pt_e = facts.pt(e);
1000        // The result must include at least one Field(_, ELEM) member.
1001        let mut saw_elem = false;
1002        for loc in pt_e.iter() {
1003            if let crate::pointer::AbsLoc::Field { field, .. } = facts.interner.resolve(loc)
1004                && *field == FieldId::ELEM
1005            {
1006                saw_elem = true;
1007                break;
1008            }
1009        }
1010        assert!(
1011            saw_elem,
1012            "container read result should include Field(_, ELEM); got {pt_e:?}"
1013        );
1014    }
1015
1016    /// `extract_field_points_to` records a field
1017    /// READ on the parameter index when a `FieldProj` traces back to
1018    /// an `AbsLoc::Param`.
1019    #[test]
1020    fn extract_field_points_to_records_param_reads() {
1021        let mut b = BodyBuilder::new();
1022        // `obj` is parameter 0.
1023        let obj = b.fresh(Some("obj"));
1024        b.emit(obj, SsaOp::Param { index: 0 }, Some("obj"));
1025        // `let n = obj.name;`, field projection from a param.
1026        let name_field = b.intern_field("name");
1027        let n = b.fresh(Some("n"));
1028        b.emit(
1029            n,
1030            SsaOp::FieldProj {
1031                receiver: obj,
1032                field: name_field,
1033                projected_type: None,
1034            },
1035            Some("n"),
1036        );
1037
1038        let body = b.build();
1039        let facts = analyse_body(&body, BodyId(0));
1040        let summary = extract_field_points_to(&body, &facts);
1041        let entry = summary
1042            .param_field_reads
1043            .iter()
1044            .find(|(p, _)| *p == 0)
1045            .expect("param 0 read recorded");
1046        assert!(entry.1.iter().any(|s| s == "name"));
1047    }
1048
1049    /// `extract_field_points_to` records field
1050    /// WRITES from the body's `field_writes` side-table populated by
1051    /// SSA lowering.  A synth Assign whose receiver traces back to
1052    /// `AbsLoc::Param` produces a `param_field_writes` entry.
1053    #[test]
1054    fn extract_field_points_to_records_param_writes() {
1055        let mut b = BodyBuilder::new();
1056        // `obj` is parameter 0.
1057        let obj = b.fresh(Some("obj"));
1058        b.emit(obj, SsaOp::Param { index: 0 }, Some("obj"));
1059        // Synth Assign mimicking `obj.cache = rhs`: define `cache`
1060        // field id and a synthetic value whose op is Assign.  The
1061        // side-table maps `synth_v -> (obj, cache_id)`.
1062        let cache_id = b.intern_field("cache");
1063        let rhs = b.fresh(Some("rhs"));
1064        b.emit(rhs, SsaOp::Source, Some("rhs"));
1065        let synth = b.fresh(Some("obj"));
1066        b.emit(synth, SsaOp::Assign(smallvec![rhs]), Some("obj"));
1067
1068        let mut body = b.build();
1069        body.field_writes.insert(synth, (obj, cache_id));
1070
1071        let facts = analyse_body(&body, BodyId(0));
1072        let summary = extract_field_points_to(&body, &facts);
1073        let entry = summary
1074            .param_field_writes
1075            .iter()
1076            .find(|(p, _)| *p == 0)
1077            .expect("param 0 write must be recorded from field_writes");
1078        assert!(
1079            entry.1.iter().any(|s| s == "cache"),
1080            "expected 'cache' in writes; got {:?}",
1081            entry.1,
1082        );
1083    }
1084
1085    /// writes through the receiver (`this.f =
1086    /// rhs`) are recorded under the same `u32::MAX` sentinel as
1087    /// reads.
1088    #[test]
1089    fn extract_field_points_to_records_self_writes_under_sentinel() {
1090        let mut b = BodyBuilder::new();
1091        let this = b.fresh(Some("this"));
1092        b.emit(this, SsaOp::SelfParam, Some("this"));
1093        let cache_id = b.intern_field("cache");
1094        let rhs = b.fresh(Some("rhs"));
1095        b.emit(rhs, SsaOp::Source, Some("rhs"));
1096        let synth = b.fresh(Some("this"));
1097        b.emit(synth, SsaOp::Assign(smallvec![rhs]), Some("this"));
1098
1099        let mut body = b.build();
1100        body.field_writes.insert(synth, (this, cache_id));
1101
1102        let facts = analyse_body(&body, BodyId(0));
1103        let summary = extract_field_points_to(&body, &facts);
1104        let entry = summary
1105            .param_field_writes
1106            .iter()
1107            .find(|(p, _)| *p == u32::MAX)
1108            .expect("receiver write recorded under u32::MAX sentinel");
1109        assert!(entry.1.iter().any(|s| s == "cache"));
1110    }
1111
1112    /// container-element writes (`<elem>`
1113    /// marker) flow through the same channel as named-field writes
1114    /// when the synth Assign carries `FieldId::ELEM`.
1115    #[test]
1116    fn extract_field_points_to_records_elem_writes() {
1117        let mut b = BodyBuilder::new();
1118        let arr = b.fresh(Some("arr"));
1119        b.emit(arr, SsaOp::Param { index: 0 }, Some("arr"));
1120        let rhs = b.fresh(Some("rhs"));
1121        b.emit(rhs, SsaOp::Source, Some("rhs"));
1122        let synth = b.fresh(Some("arr"));
1123        b.emit(synth, SsaOp::Assign(smallvec![rhs]), Some("arr"));
1124
1125        let mut body = b.build();
1126        body.field_writes.insert(synth, (arr, FieldId::ELEM));
1127
1128        let facts = analyse_body(&body, BodyId(0));
1129        let summary = extract_field_points_to(&body, &facts);
1130        let entry = summary
1131            .param_field_writes
1132            .iter()
1133            .find(|(p, _)| *p == 0)
1134            .expect("ELEM write on param 0 recorded");
1135        assert!(
1136            entry.1.iter().any(|s| s == "<elem>"),
1137            "ELEM marker '<elem>' must surface unchanged across the wire",
1138        );
1139    }
1140
1141    /// receiver projections are recorded under the
1142    /// `u32::MAX` sentinel parameter index (mirror of
1143    /// `SsaFuncSummary::receiver_to_*`).
1144    #[test]
1145    fn extract_field_points_to_records_self_reads_under_sentinel() {
1146        let mut b = BodyBuilder::new();
1147        let this = b.fresh(Some("this"));
1148        b.emit(this, SsaOp::SelfParam, Some("this"));
1149        let cache = b.intern_field("cache");
1150        let c = b.fresh(Some("c"));
1151        b.emit(
1152            c,
1153            SsaOp::FieldProj {
1154                receiver: this,
1155                field: cache,
1156                projected_type: None,
1157            },
1158            Some("c"),
1159        );
1160
1161        let body = b.build();
1162        let facts = analyse_body(&body, BodyId(0));
1163        let summary = extract_field_points_to(&body, &facts);
1164        let entry = summary
1165            .param_field_reads
1166            .iter()
1167            .find(|(p, _)| *p == u32::MAX)
1168            .expect("receiver read recorded under u32::MAX sentinel");
1169        assert!(entry.1.iter().any(|s| s == "cache"));
1170    }
1171
1172    /// `name_proxy_hints` returns one entry per source-level variable
1173    /// whose latest SSA def has [`PtrProxyHint::FieldOnly`].  Names that
1174    /// don't qualify are omitted entirely so the consumer's lookup
1175    /// stays cheap.
1176    /// W5: subscript-read synthetic callee `__index_get__` must be
1177    /// recognised by the public container-read predicate so the W2/W4
1178    /// taint hooks fire on subscript reads (`arr[i]`, `cmds[0]`).
1179    #[test]
1180    fn subscript_get_classifies_as_container_read() {
1181        assert!(is_container_read_callee_pub("__index_get__"));
1182        assert!(is_container_read_callee_pub("arr.__index_get__"));
1183    }
1184
1185    /// W5: subscript-write synthetic callee `__index_set__` must be
1186    /// recognised by the public container-write predicate so the W2
1187    /// taint hook fires on subscript writes (`arr[i] = v`).
1188    #[test]
1189    fn subscript_set_classifies_as_container_write() {
1190        assert!(is_container_write_callee("__index_set__"));
1191        assert!(is_container_write_callee("arr.__index_set__"));
1192    }
1193
1194    /// W5: regression guard, neither synth name should match the
1195    /// opposite predicate, otherwise the W2 read/write hooks would
1196    /// double-fire on the same call.
1197    #[test]
1198    fn subscript_synth_callees_do_not_cross_classify() {
1199        assert!(!is_container_read_callee_pub("__index_set__"));
1200        assert!(!is_container_write_callee("__index_get__"));
1201    }
1202
1203    #[test]
1204    fn name_proxy_hints_collects_field_only_locals() {
1205        let mut b = BodyBuilder::new();
1206        // `c` is the receiver, root location, hint=Other.
1207        let c = b.fresh(Some("c"));
1208        b.emit(c, SsaOp::SelfParam, Some("c"));
1209        // `m := c.mu`, field projection, hint=FieldOnly.
1210        let mu = b.intern_field("mu");
1211        let m = b.fresh(Some("m"));
1212        b.emit(
1213            m,
1214            SsaOp::FieldProj {
1215                receiver: c,
1216                field: mu,
1217                projected_type: None,
1218            },
1219            Some("m"),
1220        );
1221
1222        let body = b.build();
1223        let facts = analyse_body(&body, BodyId(0));
1224        let hints = facts.name_proxy_hints(&body);
1225        assert_eq!(
1226            hints.get("m"),
1227            Some(&crate::pointer::PtrProxyHint::FieldOnly)
1228        );
1229        assert!(
1230            !hints.contains_key("c"),
1231            "root receiver must not appear in the FieldOnly map"
1232        );
1233    }
1234}