#![allow(clippy::collapsible_if, clippy::new_without_default)]
use std::collections::{HashMap, HashSet};
use crate::ssa::const_prop::ConstLattice;
use crate::ssa::heap::{HeapObjectId, PointsToResult};
use crate::ssa::ir::{SsaBody, SsaValue};
use super::value::SymbolicValue;
const MAX_HEAP_ENTRIES: usize = 64;
const MAX_FIELDS_PER_OBJECT: usize = 8;
pub const MAX_TRACKED_INDICES: usize = 16;
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct HeapKey {
pub object: HeapObjectId,
pub field: FieldSlot,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum FieldSlot {
Named(String),
Elements,
Index(u64),
}
impl PartialOrd for FieldSlot {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for FieldSlot {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
use std::cmp::Ordering;
match (self, other) {
(FieldSlot::Elements, FieldSlot::Elements) => Ordering::Equal,
(FieldSlot::Elements, _) => Ordering::Less,
(_, FieldSlot::Elements) => Ordering::Greater,
(FieldSlot::Index(a), FieldSlot::Index(b)) => a.cmp(b),
(FieldSlot::Index(_), FieldSlot::Named(_)) => Ordering::Less,
(FieldSlot::Named(_), FieldSlot::Index(_)) => Ordering::Greater,
(FieldSlot::Named(a), FieldSlot::Named(b)) => a.cmp(b),
}
}
}
#[derive(Clone, Debug)]
pub struct FieldAccessRecord {
pub object_name: String,
pub field_name: String,
pub ssa_value: SsaValue,
}
#[derive(Clone, Debug)]
pub struct SymbolicHeap {
fields: HashMap<HeapKey, SymbolicValue>,
tainted_keys: HashSet<HeapKey>,
field_accesses: Vec<FieldAccessRecord>,
}
impl SymbolicHeap {
pub fn new() -> Self {
SymbolicHeap {
fields: HashMap::new(),
tainted_keys: HashSet::new(),
field_accesses: Vec::new(),
}
}
pub fn store(&mut self, key: HeapKey, value: SymbolicValue, tainted: bool) {
if let FieldSlot::Index(_) = &key.field {
if !self.fields.contains_key(&key)
&& self.count_indices_for(key.object) >= MAX_TRACKED_INDICES
{
self.collapse_indices_to_elements(key.object);
let elem_key = HeapKey {
object: key.object,
field: FieldSlot::Elements,
};
self.fields.insert(elem_key.clone(), value);
if tainted {
self.tainted_keys.insert(elem_key);
}
return;
}
}
if !self.fields.contains_key(&key) {
if self.fields.len() >= MAX_HEAP_ENTRIES {
return; }
if !matches!(key.field, FieldSlot::Index(_))
&& self.fields_for_object(key.object) >= MAX_FIELDS_PER_OBJECT
{
return; }
}
self.fields.insert(key.clone(), value);
if tainted {
self.tainted_keys.insert(key);
} else {
self.tainted_keys.remove(&key);
}
}
pub fn load(&self, key: &HeapKey) -> SymbolicValue {
if let FieldSlot::Index(_) = &key.field {
if let Some(val) = self.fields.get(key) {
return val.clone();
}
let elem_key = HeapKey {
object: key.object,
field: FieldSlot::Elements,
};
return self
.fields
.get(&elem_key)
.cloned()
.unwrap_or(SymbolicValue::Unknown);
}
self.fields
.get(key)
.cloned()
.unwrap_or(SymbolicValue::Unknown)
}
pub fn is_tainted(&self, key: &HeapKey) -> bool {
if self.tainted_keys.contains(key) {
return true;
}
if let FieldSlot::Index(_) = &key.field {
let elem_key = HeapKey {
object: key.object,
field: FieldSlot::Elements,
};
return self.tainted_keys.contains(&elem_key);
}
false
}
pub fn entries(&self) -> impl Iterator<Item = (&HeapKey, &SymbolicValue)> {
self.fields.iter()
}
pub fn record_access(&mut self, record: FieldAccessRecord) {
self.field_accesses.push(record);
}
pub fn field_accesses(&self) -> &[FieldAccessRecord] {
&self.field_accesses
}
pub fn fingerprint(&self) -> u64 {
if self.fields.is_empty() {
return 0;
}
let mut keys: Vec<&HeapKey> = self.fields.keys().collect();
keys.sort_by(|a, b| {
let obj_a = (a.object.0).0;
let obj_b = (b.object.0).0;
obj_a.cmp(&obj_b).then_with(|| a.field.cmp(&b.field))
});
let mut h: u64 = 0;
for key in keys {
let val = &self.fields[key];
let tainted: u64 = if self.tainted_keys.contains(key) {
1
} else {
0
};
let val_tag: u64 = match val {
SymbolicValue::Concrete(n) => (*n as u64).wrapping_mul(31),
SymbolicValue::ConcreteStr(s) => {
let mut sh: u64 = 0;
for b in s.bytes().take(8) {
sh = sh.wrapping_mul(31).wrapping_add(b as u64);
}
sh
}
SymbolicValue::Unknown => 0xFF,
_ => 0xFE,
};
let field_tag: u64 = match &key.field {
FieldSlot::Elements => 0,
FieldSlot::Index(n) => 1u64.wrapping_add(*n),
FieldSlot::Named(_) => 2, };
h = h
.wrapping_mul(67)
.wrapping_add(val_tag)
.wrapping_add(tainted << 32)
.wrapping_add(field_tag << 48);
}
h
}
pub fn widen(&mut self) {
let objects_with_indices: HashSet<HeapObjectId> = self
.fields
.keys()
.filter(|k| matches!(k.field, FieldSlot::Index(_)))
.map(|k| k.object)
.collect();
for obj in objects_with_indices {
self.collapse_indices_to_elements(obj);
}
for value in self.fields.values_mut() {
*value = SymbolicValue::Unknown;
}
}
fn fields_for_object(&self, object: HeapObjectId) -> usize {
self.fields
.keys()
.filter(|k| k.object == object && !matches!(k.field, FieldSlot::Index(_)))
.count()
}
fn count_indices_for(&self, object: HeapObjectId) -> usize {
self.fields
.keys()
.filter(|k| k.object == object && matches!(k.field, FieldSlot::Index(_)))
.count()
}
fn collapse_indices_to_elements(&mut self, object: HeapObjectId) {
let index_keys: Vec<HeapKey> = self
.fields
.keys()
.filter(|k| k.object == object && matches!(k.field, FieldSlot::Index(_)))
.cloned()
.collect();
let any_tainted = index_keys.iter().any(|k| self.tainted_keys.contains(k));
for k in &index_keys {
self.fields.remove(k);
self.tainted_keys.remove(k);
}
let elem_key = HeapKey {
object,
field: FieldSlot::Elements,
};
if any_tainted {
self.tainted_keys.insert(elem_key.clone());
}
self.fields.insert(elem_key, SymbolicValue::Unknown);
}
}
pub fn resolve_index_slot(
index_val: SsaValue,
const_values: &HashMap<SsaValue, ConstLattice>,
) -> FieldSlot {
if let Some(ConstLattice::Int(n)) = const_values.get(&index_val) {
if *n >= 0 && (*n as u64) < MAX_TRACKED_INDICES as u64 {
return FieldSlot::Index(*n as u64);
}
}
FieldSlot::Elements
}
pub fn split_field_access(dotted: &str) -> Option<(&str, &str)> {
let dot_pos = dotted.rfind('.')?;
if dot_pos == 0 || dot_pos == dotted.len() - 1 {
return None;
}
Some((&dotted[..dot_pos], &dotted[dot_pos + 1..]))
}
pub fn resolve_receiver_ssa(
receiver_name: &str,
ssa: &SsaBody,
current_value: SsaValue,
) -> Option<SsaValue> {
let limit = (current_value.0 as usize).min(ssa.value_defs.len());
for idx in (0..limit).rev() {
if let Some(ref name) = ssa.value_defs[idx].var_name {
if name == receiver_name {
return Some(SsaValue(idx as u32));
}
}
}
None
}
pub fn resolve_singleton_object(
ssa_val: SsaValue,
points_to: &PointsToResult,
) -> Option<HeapObjectId> {
let pts = points_to.get(ssa_val)?;
if pts.len() == 1 {
pts.iter().next().copied()
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
fn obj(n: u32) -> HeapObjectId {
HeapObjectId(SsaValue(n))
}
fn named_key(obj_id: u32, field: &str) -> HeapKey {
HeapKey {
object: obj(obj_id),
field: FieldSlot::Named(field.to_string()),
}
}
fn elements_key(obj_id: u32) -> HeapKey {
HeapKey {
object: obj(obj_id),
field: FieldSlot::Elements,
}
}
#[test]
fn store_load_roundtrip() {
let mut heap = SymbolicHeap::new();
let key = named_key(0, "name");
let val = SymbolicValue::ConcreteStr("alice".to_string());
heap.store(key.clone(), val.clone(), false);
assert_eq!(heap.load(&key), val);
}
#[test]
fn load_missing_returns_unknown() {
let heap = SymbolicHeap::new();
let key = named_key(0, "name");
assert_eq!(heap.load(&key), SymbolicValue::Unknown);
}
#[test]
fn taint_propagation_through_store_load() {
let mut heap = SymbolicHeap::new();
let key = named_key(0, "name");
heap.store(key.clone(), SymbolicValue::Symbol(SsaValue(10)), true);
assert!(heap.is_tainted(&key));
heap.store(key.clone(), SymbolicValue::Concrete(42), false);
assert!(!heap.is_tainted(&key));
}
#[test]
fn max_heap_entries_eviction() {
let mut heap = SymbolicHeap::new();
for i in 0..MAX_HEAP_ENTRIES as u32 {
let key = named_key(i, "f");
heap.store(key, SymbolicValue::Concrete(i as i64), false);
}
assert_eq!(heap.fields.len(), MAX_HEAP_ENTRIES);
let overflow_key = named_key(999, "overflow");
heap.store(overflow_key.clone(), SymbolicValue::Concrete(999), false);
assert_eq!(heap.load(&overflow_key), SymbolicValue::Unknown);
assert_eq!(heap.fields.len(), MAX_HEAP_ENTRIES);
}
#[test]
fn max_fields_per_object_eviction() {
let mut heap = SymbolicHeap::new();
for i in 0..MAX_FIELDS_PER_OBJECT {
let key = named_key(0, &format!("field_{i}"));
heap.store(key, SymbolicValue::Concrete(i as i64), false);
}
assert_eq!(heap.fields_for_object(obj(0)), MAX_FIELDS_PER_OBJECT);
let overflow_key = named_key(0, "overflow");
heap.store(overflow_key.clone(), SymbolicValue::Concrete(99), false);
assert_eq!(heap.load(&overflow_key), SymbolicValue::Unknown);
assert_eq!(heap.fields_for_object(obj(0)), MAX_FIELDS_PER_OBJECT);
let other_key = named_key(1, "ok");
heap.store(other_key.clone(), SymbolicValue::Concrete(1), false);
assert_eq!(heap.load(&other_key), SymbolicValue::Concrete(1));
}
#[test]
fn widen_preserves_taint_clears_values() {
let mut heap = SymbolicHeap::new();
let key = named_key(0, "name");
heap.store(
key.clone(),
SymbolicValue::ConcreteStr("alice".to_string()),
true,
);
heap.widen();
assert_eq!(heap.load(&key), SymbolicValue::Unknown);
assert!(heap.is_tainted(&key));
}
#[test]
fn split_field_access_cases() {
assert_eq!(split_field_access("obj.field"), Some(("obj", "field")));
assert_eq!(split_field_access("a.b.c"), Some(("a.b", "c")));
assert_eq!(split_field_access("noDot"), None);
assert_eq!(split_field_access(".field"), None);
assert_eq!(split_field_access("obj."), None);
assert_eq!(split_field_access(""), None);
assert_eq!(split_field_access("."), None);
}
#[test]
fn resolve_singleton_returns_none_for_absent() {
let pts = PointsToResult::empty();
assert_eq!(resolve_singleton_object(SsaValue(0), &pts), None);
assert_eq!(resolve_singleton_object(SsaValue(99), &pts), None);
}
#[test]
fn field_slot_named_vs_elements_distinct() {
let mut heap = SymbolicHeap::new();
let named = named_key(0, "items");
let elements = elements_key(0);
heap.store(named.clone(), SymbolicValue::Concrete(1), false);
heap.store(elements.clone(), SymbolicValue::Concrete(2), true);
assert_eq!(heap.load(&named), SymbolicValue::Concrete(1));
assert_eq!(heap.load(&elements), SymbolicValue::Concrete(2));
assert!(!heap.is_tainted(&named));
assert!(heap.is_tainted(&elements));
}
#[test]
fn field_access_recording() {
let mut heap = SymbolicHeap::new();
assert!(heap.field_accesses().is_empty());
heap.record_access(FieldAccessRecord {
object_name: "user".to_string(),
field_name: "name".to_string(),
ssa_value: SsaValue(5),
});
assert_eq!(heap.field_accesses().len(), 1);
assert_eq!(heap.field_accesses()[0].object_name, "user");
assert_eq!(heap.field_accesses()[0].field_name, "name");
}
fn index_key(obj_id: u32, idx: u64) -> HeapKey {
HeapKey {
object: obj(obj_id),
field: FieldSlot::Index(idx),
}
}
#[test]
fn per_index_store_load() {
let mut heap = SymbolicHeap::new();
heap.store(index_key(0, 0), SymbolicValue::Concrete(10), false);
assert_eq!(heap.load(&index_key(0, 0)), SymbolicValue::Concrete(10));
assert_eq!(heap.load(&index_key(0, 1)), SymbolicValue::Unknown);
assert_eq!(heap.load(&elements_key(0)), SymbolicValue::Unknown);
}
#[test]
fn index_load_falls_back_to_elements() {
let mut heap = SymbolicHeap::new();
heap.store(elements_key(0), SymbolicValue::Concrete(99), false);
assert_eq!(heap.load(&index_key(0, 0)), SymbolicValue::Concrete(99));
assert_eq!(heap.load(&index_key(0, 5)), SymbolicValue::Concrete(99));
}
#[test]
fn index_taint_includes_elements_taint() {
let mut heap = SymbolicHeap::new();
heap.store(elements_key(0), SymbolicValue::Unknown, true);
assert!(heap.is_tainted(&index_key(0, 0)));
assert!(heap.is_tainted(&index_key(0, 7)));
assert!(!heap.is_tainted(&index_key(1, 0)));
}
#[test]
fn index_and_elements_coexist() {
let mut heap = SymbolicHeap::new();
heap.store(index_key(0, 0), SymbolicValue::Concrete(10), false);
heap.store(elements_key(0), SymbolicValue::Concrete(99), true);
assert_eq!(heap.load(&index_key(0, 0)), SymbolicValue::Concrete(10));
assert_eq!(heap.load(&index_key(0, 1)), SymbolicValue::Concrete(99));
assert!(heap.is_tainted(&index_key(0, 0)));
}
#[test]
fn elements_store_after_index_preserves_value() {
let mut heap = SymbolicHeap::new();
heap.store(
index_key(0, 1),
SymbolicValue::ConcreteStr("safe".to_string()),
false,
);
heap.store(elements_key(0), SymbolicValue::Unknown, true);
assert_eq!(
heap.load(&index_key(0, 1)),
SymbolicValue::ConcreteStr("safe".to_string())
);
assert!(heap.is_tainted(&index_key(0, 1)));
}
#[test]
fn index_overflow_collapses() {
let mut heap = SymbolicHeap::new();
for i in 0..MAX_TRACKED_INDICES as u64 {
let tainted = i == (MAX_TRACKED_INDICES as u64 - 1);
heap.store(index_key(0, i), SymbolicValue::Concrete(i as i64), tainted);
}
assert_eq!(heap.count_indices_for(obj(0)), MAX_TRACKED_INDICES);
heap.store(
index_key(0, MAX_TRACKED_INDICES as u64),
SymbolicValue::Concrete(999),
false,
);
assert_eq!(heap.count_indices_for(obj(0)), 0);
assert!(heap.is_tainted(&elements_key(0)));
assert_eq!(heap.load(&elements_key(0)), SymbolicValue::Concrete(999));
}
#[test]
fn widen_collapses_indices() {
let mut heap = SymbolicHeap::new();
heap.store(index_key(0, 0), SymbolicValue::Concrete(10), true);
heap.store(index_key(0, 1), SymbolicValue::Concrete(20), false);
heap.widen();
assert_eq!(heap.count_indices_for(obj(0)), 0);
assert_eq!(heap.load(&elements_key(0)), SymbolicValue::Unknown);
assert!(heap.is_tainted(&elements_key(0)));
}
#[test]
fn fingerprint_distinguishes_indices() {
let mut h1 = SymbolicHeap::new();
h1.store(index_key(0, 0), SymbolicValue::Concrete(42), false);
let mut h2 = SymbolicHeap::new();
h2.store(index_key(0, 1), SymbolicValue::Concrete(42), false);
assert_ne!(h1.fingerprint(), h2.fingerprint());
}
#[test]
fn resolve_index_slot_cases() {
let mut cv = HashMap::new();
cv.insert(SsaValue(0), ConstLattice::Int(3));
cv.insert(SsaValue(1), ConstLattice::Int(-1));
cv.insert(SsaValue(2), ConstLattice::Int(MAX_TRACKED_INDICES as i64));
cv.insert(SsaValue(3), ConstLattice::Str("hello".into()));
assert_eq!(resolve_index_slot(SsaValue(0), &cv), FieldSlot::Index(3));
assert_eq!(resolve_index_slot(SsaValue(1), &cv), FieldSlot::Elements);
assert_eq!(resolve_index_slot(SsaValue(2), &cv), FieldSlot::Elements);
assert_eq!(resolve_index_slot(SsaValue(3), &cv), FieldSlot::Elements);
assert_eq!(resolve_index_slot(SsaValue(99), &cv), FieldSlot::Elements);
}
#[test]
fn field_slot_ordering() {
let slots = vec![
FieldSlot::Named("b".to_string()),
FieldSlot::Index(1),
FieldSlot::Elements,
FieldSlot::Named("a".to_string()),
FieldSlot::Index(0),
];
let mut sorted = slots.clone();
sorted.sort();
assert_eq!(
sorted,
vec![
FieldSlot::Elements,
FieldSlot::Index(0),
FieldSlot::Index(1),
FieldSlot::Named("a".to_string()),
FieldSlot::Named("b".to_string()),
]
);
}
}