yaxpeax_core/analyses/
memory_layout.rs

1use yaxpeax_arch::Arch;
2use std::collections::HashMap;
3use analyses::DFG;
4use data::ValueLocations;
5use analyses::static_single_assignment::{DFGRef, SSA, SSAValues};
6use std::cell::RefCell;
7use std::rc::Rc;
8use std::fmt;
9
10/// some kind of a description of memory usage in a dfg
11#[derive(Debug)]
12pub struct MemoryRegion<A: Arch + ValueLocations + SSAValues> where A::Data: Eq + fmt::Display {
13    /// accesses is a collection of memory accesses, read or write, to delineate this region. it is
14    /// both the offset into this region where the access occurs, and the size of the region.
15    ///
16    /// perhaps one day this will map to values such that a unified data flow graph can exist
17    /// describing aliasable (memory) and non-aliasable (register) regions.
18    ///
19    /// clearly, registers in some machines may alias; the complicating distinction is that memory
20    /// has unbounded aliasing and indirection, where registers in most architectures are not
21    /// indirectly accessed and have a well-defined set of aliasing rules (f.ex x86's
22    /// rax->ecx->cx->{cl,ch})
23//    accesses: Vec<usize>,
24    // revised: offset -> size -> read/write
25    // this is to support cases where an offset is accessed by two different sizes
26    // as might be the case for unions or truncation.
27    // TODO: these could be keyed by A::Address
28    pub accesses: HashMap<Rc<Item<ValueOrImmediate<A>>>, HashMap<u64, UsePattern>>,
29    // TODO: how to avoid recording memcpy-like accesses that disregard internal structure? does
30    // that get resolved here, or by MemoryLayout? something else entirely? hm...
31}
32
33impl<A: Arch + ValueLocations + SSAValues> Clone for MemoryRegion<A> where A::Data: Eq + fmt::Display {
34    fn clone(&self) -> Self {
35        Self {
36            accesses: self.accesses.clone(),
37        }
38    }
39}
40
41// #[repr(transparent)]
42#[derive(Copy, Clone)]
43pub struct UsePattern(pub u8);
44
45impl fmt::Debug for UsePattern {
46    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
47        match (self.is_read(), self.is_write()) {
48            (false, false) => f.write_str("(no access)"),
49            (false, true) => f.write_str("{ write }"),
50            (true, false) => f.write_str("{ read }"),
51            (true, true) => f.write_str("{ read, write }"),
52        }
53    }
54}
55
56#[allow(dead_code)]
57impl UsePattern {
58    fn none() -> Self {
59        UsePattern(0)
60    }
61    fn read() -> Self {
62        Self::none().with_read()
63    }
64    fn write() -> Self {
65        Self::none().with_write()
66    }
67    fn merge(&self, other: &UsePattern) -> Self {
68        UsePattern(self.0 | other.0)
69    }
70    fn with_read(mut self) -> Self {
71        self.mark_read();
72        self
73    }
74    fn with_write(mut self) -> Self {
75        self.mark_write();
76        self
77    }
78    fn mark_read(&mut self) {
79        self.0 |= 1;
80    }
81    fn mark_write(&mut self) {
82        self.0 |= 2;
83    }
84    pub fn is_read(&self) -> bool {
85        (self.0 & 1) != 0
86    }
87    pub fn is_write(&self) -> bool {
88        (self.0 & 2) != 0
89    }
90}
91
92impl<A: Arch + ValueLocations + SSAValues> MemoryRegion<A> where A::Data: Eq + fmt::Display {
93    fn new() -> Self {
94        Self {
95            accesses: HashMap::new(),
96        }
97    }
98}
99
100#[derive(Debug)]
101pub struct IndirectLayout<A: Arch + ValueLocations + SSAValues> where A::Data: Eq + fmt::Display {
102    /// a mapping to distinct regions in this indirect area from the base values inferred for their
103    /// accesses.
104    ///
105    /// the expectation^Whope is that analyses can canonicalize effective addresses so the
106    /// least-bound value can be used as a base, and bound addends can restrict the aliasing set so
107    /// a write to some value does not necessarily cause all referents through `A::Location` to be
108    /// clobbered.
109    regions_uses: Option<Rc<RefCell<HashMap<ValueOrImmediate<A>, MemoryRegion<A>>>>>,
110    regions_defs: Option<Rc<RefCell<HashMap<ValueOrImmediate<A>, MemoryRegion<A>>>>>,
111    ssa_use: Option<DFGRef<A>>,
112    ssa_def: Option<DFGRef<A>>,
113}
114
115pub struct MemoryLayout<'ssa, A: Arch + ValueLocations + SSAValues> where A::Data: Eq + fmt::Display {
116    pub ssa: &'ssa SSA<A>,
117    /// map SSA values at some `A::Location` to their referent layout.
118    /// key is likely versions of an architecture's Location::Memory.
119    pub segments: RefCell<HashMap<ValueOrImmediate<A>, Rc<RefCell<HashMap<ValueOrImmediate<A>, MemoryRegion<A>>>>>>,
120}
121
122/*
123#[derive(Copy, Clone)]
124pub enum LocationAccess<A: Arch> {
125    // access, and its size
126    Access(A::Address),
127}
128*/
129
130/// in most cases, `Base` and `Addend` should both be `Self`.
131pub trait MemoryAccessBaseInference {
132    /// type of the base address for some. as a common example, the value of a machine's
133    /// stack pointer will often be of this type.
134    type Base;
135    /// type of the offsets for some access. this is defined with respect to `Self::Base`;
136    /// it's possible to simply say `Self::Addend` is always zero, but memory layout inference will
137    /// be better behaved if `Self::Addend` can be selected so that a common `Self::Base` is
138    /// reported. as a common example, offsets for stack-local storage probably should be
139    /// represented by `Addend`. additionally, value sets in registers would make for good
140    /// `Self::Addend` candidates.
141    type Addend;
142
143    /// given some `Value`, `self`, try to interpret `self` as a base plus addend-style memory access. the base should be a constant or a fully-unbounded variable, where `addend` may be a constant, bounded variable, or value set.
144    fn infer_base_and_addend(&self) -> Option<(Self::Base, Self::Addend)>;
145}
146
147/// allow `Self` to have simple aliases for other values of the same type.
148/// for example, an x86_64 `Data` can be `Data::Alias(DFGRef<x86_64>)`. for all purposes other
149/// than display reasoning, these values should be operated on as if they were the underlying
150/// aliased location.
151pub trait Underlying {
152    type Arch: yaxpeax_arch::Arch + SSAValues;
153
154    fn underlying(&self) -> Option<DFGRef<Self::Arch>>;
155
156    fn expression(&self) -> Option<Rc<Item<ValueOrImmediate<Self::Arch>>>> where <<Self as Underlying>::Arch as SSAValues>::Data: Eq + fmt::Display;
157}
158
159impl<A: SSAValues> MemoryAccessBaseInference for Rc<Item<ValueOrImmediate<A>>> where A::Data: Underlying<Arch=A> + Eq + fmt::Display {
160    type Base = Self;
161    type Addend = Self;
162
163    fn infer_base_and_addend(&self) -> Option<(Self::Base, Self::Addend)> {
164//        println!("impl infer_base_and_addend: {:?}", self);
165        let res = match &self.value.dealiased() {
166            Expression::Unknown => None,
167            Expression::Value(ValueOrImmediate::Value(v)) => {
168                if let Some(expr) = v.borrow().data.as_ref().and_then(|x| x.expression()) {
169                    expr.infer_base_and_addend()
170                } else {
171                    Some((Item::value(ValueOrImmediate::Value(Rc::clone(v))), Item::untyped(Expression::Value(ValueOrImmediate::Immediate(0)))))
172                }
173            }
174            Expression::Value(ValueOrImmediate::Immediate(_i)) => None,
175            Expression::Add { left, right } => {
176                if let Expression::Value(ValueOrImmediate::Immediate(i)) = right.value {
177                    if let Some((inner_base, inner_addend)) = left.infer_base_and_addend() {
178                        if let Expression::Value(ValueOrImmediate::Immediate(j)) = inner_addend.value {
179                            Some((inner_base.dealiased(), Item::untyped(Expression::Value(ValueOrImmediate::Immediate(i.wrapping_add(j))))))
180                        } else {
181                            Some((left.dealiased(), right.dealiased()))
182                        }
183                    } else {
184                        Some((left.dealiased(), right.dealiased()))
185                    }
186                } else {
187                    None
188                }
189            }
190            Expression::Sub { left, right } => {
191                if let Expression::Value(ValueOrImmediate::Immediate(i)) = right.value {
192                    if let Some((inner_base, inner_addend)) = left.infer_base_and_addend() {
193                        if let Expression::Value(ValueOrImmediate::Immediate(j)) = inner_addend.value {
194                            Some((inner_base.dealiased(), Item::untyped(Expression::Value(ValueOrImmediate::Immediate(j.wrapping_sub(i))))))
195                        } else {
196                            let negated_right = Item {
197                                ty: right.ty.clone(),
198                                value: Expression::Value(ValueOrImmediate::Immediate(-i))
199                            };
200                            Some((left.dealiased(), Rc::new(negated_right)))
201                        }
202                    } else {
203                        let negated_right = Item {
204                            ty: right.ty.clone(),
205                            value: Expression::Value(ValueOrImmediate::Immediate(-i))
206                        };
207                        Some((left.dealiased(), Rc::new(negated_right)))
208                    }
209                } else {
210                    None
211                }
212            }
213            _ => None,
214        };
215//        println!("  inferred base and addend: {:?}", res);
216        res
217    }
218}
219
220/*
221use yaxpeax_arch::AddressDiff;
222use analyses::Value;
223impl<A: Arch + SSAValues> Value for LocationAccess<A> {
224    fn unknown() -> Self {
225        panic!("LocationAccess::unknown");
226    }
227
228    fn from_set(_xs: &[Self]) -> Self { Self::unknown() }
229
230    fn from_const(c: u64) -> Self {
231        panic!("LocationAccess::from_const");
232    }
233
234    fn to_const(&self) -> Option<u64> {
235        None
236    }
237}
238
239impl From<AddressDiff<u64>> for LocationAccess<amd64> {
240    fn from(diff: AddressDiff<u64>) -> Self {
241        LocationAccess::Const { diff: <amd64 as Arch>::Address::zero().wrapping_offset(diff).to_linear() as u64 }
242    }
243}
244*/
245
246use analyses::{IndirectQuery, ValueIndex};
247
248impl<A: Arch + ValueLocations + SSAValues> IndirectQuery<Rc<Item<ValueOrImmediate<A>>>> for IndirectLayout<A> where A::Data: Underlying<Arch=A> + Eq + fmt::Display {
249    fn try_get_load(&self, index: ValueIndex<Rc<Item<ValueOrImmediate<A>>>>) -> Option<Rc<Item<ValueOrImmediate<A>>>> {
250        if let Some((base, addend)) = index.base.infer_base_and_addend() {
251            // base must be a Value otherwise it's some complex composite, OR unknown, and not
252            // eligible for a base of a memory region
253            if let Expression::Value(v) = &base.as_ref().value {
254                let regions = self.regions_uses.as_ref().expect("indirect load has a corresponding ssa use").borrow();
255                let v: ValueOrImmediate<A> = v.clone();
256                return regions.get(&v)
257                    .and_then(|region| region.accesses.get(&addend))
258                    .and_then(|offset| offset.get(&(index.size as u64)))
259                    .and_then(|pat| if pat.is_read() {
260                        Some(Item::untyped(Expression::Unknown))
261                    } else {
262                        None
263                    });
264            }
265        }
266        None
267    }
268    fn try_get_store(&self, index: ValueIndex<Rc<Item<ValueOrImmediate<A>>>>) -> Option<()> {
269        if let Some((base, addend)) = index.base.infer_base_and_addend() {
270            // base must be a Value otherwise it's some complex composite, OR unknown, and not
271            // eligible for a base of a memory region
272            if let Expression::Value(v) = &base.as_ref().value {
273                let regions = self.regions_defs.as_ref().expect("indirect store has a corresponding ssa def").borrow();
274                let v: ValueOrImmediate<A> = v.clone();
275                return regions.get(&v)
276                    .and_then(|region| region.accesses.get(&addend))
277                    .and_then(|offset| offset.get(&(index.size as u64)))
278                    .and_then(|pat| if pat.is_write() {
279                        Some(())
280                    } else {
281                        None
282                    });
283            }
284        }
285        None
286    }
287    fn load(&self, index: ValueIndex<Rc<Item<ValueOrImmediate<A>>>>) -> Rc<Item<ValueOrImmediate<A>>> {
288        if let Some((base, addend)) = index.base.infer_base_and_addend() {
289            // base must be a Value otherwise it's some complex composite, OR unknown, and not
290            // eligible for a base of a memory region
291            if let Expression::Value(v) = &base.as_ref().value {
292                let mut regions = self.regions_uses.as_ref().expect("indirect load has a corresponding ssa use").borrow_mut();
293                let v: ValueOrImmediate<A> = v.clone();
294                let region = regions.entry(v).or_insert_with(MemoryRegion::new);
295                // TODO: figure out when addends would read other offsets
296                let offset = region.accesses.entry(addend).or_insert_with(HashMap::new);
297                let usage = offset.entry(index.size as u64).or_insert_with(UsePattern::none);
298                usage.mark_read();
299            } else {
300                tracing::error!("base of an access ({:?}) is not a Value, but is {:?}", index.base, base);
301            }
302        } else {
303            tracing::error!("unable to infer base/addend of {:?}", index.base);
304        }
305        Item::untyped(Expression::Unknown)
306    }
307    fn store(&self, index: ValueIndex<Rc<Item<ValueOrImmediate<A>>>>, _value: &Rc<Item<ValueOrImmediate<A>>>) {
308//        eprintln!("store: {:?}", index);
309//        eprintln!("old: {:?}", self.regions_uses);
310//        eprintln!("new: {:?}", self.regions_defs);
311        // copy old stores to this memory
312        if let (Some(uses), Some(defs)) = (self.regions_uses.as_ref(), self.regions_defs.as_ref()) {
313            let mut defs_mut = defs.borrow_mut();
314            for (base, region) in &*uses.borrow() {
315                if defs_mut.contains_key(base) {
316                    // merge
317                    let new_defs = &mut defs_mut.get_mut(base).unwrap().accesses;
318                    for (old_offset, accesses) in &region.accesses {
319                        if new_defs.contains_key(old_offset.as_ref()) {
320                            let new_accesses = new_defs.get_mut(old_offset.as_ref()).unwrap();
321                            // again, merge...
322                            for (sz, pattern) in accesses {
323                                if new_accesses.contains_key(&sz) {
324                                    // merge, somehow
325                                    let new_pattern = &new_accesses[&sz];
326                                    let new_pattern = new_pattern.merge(pattern);
327                                    new_accesses.insert(*sz, new_pattern);
328                                } else {
329                                    new_accesses.insert(*sz, pattern.clone());
330                                }
331                            }
332                        } else {
333                            new_defs.insert(old_offset.clone(), accesses.clone());
334                        }
335                    }
336                } else {
337                    defs_mut.insert(base.clone(), region.to_owned());
338                }
339            }
340        }
341
342        // TODO: use (store?) `value`, which should get memory analysis in a place where it could
343        // try tracking spills/reloads.
344        if let Some((base, addend)) = index.base.infer_base_and_addend() {
345            // base must be a Value otherwise it's some complex composite, OR unknown, and not
346            // eligible for a base of a memory region
347            if let Expression::Value(v) = &base.as_ref().value {
348                let mut regions = self.regions_defs.as_ref().expect("indirect store has a corresponding ssa def").borrow_mut();
349                let v: ValueOrImmediate<A> = v.clone();
350                let region = regions.entry(v).or_insert_with(MemoryRegion::new);
351                // TODO: figure out when addends would write over other offsets
352                let offset = region.accesses.entry(addend).or_insert_with(HashMap::new);
353                let usage = offset.entry(index.size as u64).or_insert_with(UsePattern::none);
354                usage.mark_write();
355            } else {
356                tracing::error!("base of an access ({:?}) is not a Value, but is {:?}", index.base, base);
357            }
358        } else {
359            tracing::error!("unable to infer base/addend of {:?}", index.base);
360        }
361    }
362}
363
364impl<'ssa, A: Arch + ValueLocations + SSAValues> MemoryLayout<'ssa, A> where A::Data: Underlying<Arch=A> + Eq + fmt::Display {
365    fn get_segment(&self, indirection_value: DFGRef<A>) -> Rc<RefCell<HashMap<ValueOrImmediate<A>, MemoryRegion<A>>>> {
366        let mut underlying = Rc::clone(&indirection_value);
367        if let Some(inner) = indirection_value.borrow().data.as_ref().and_then(|x| x.underlying()) {
368            underlying = inner;
369        }
370
371        let segment_base = ValueOrImmediate::Value(underlying);
372        Rc::clone(self.segments.borrow_mut().entry(segment_base)
373            .or_insert_with(|| Rc::new(RefCell::new(HashMap::new()))))
374    }
375
376    pub fn render(&self) {
377        let segments = self.segments.borrow();
378        let mut regions: Vec<&ValueOrImmediate<A>> = segments.keys().collect();
379        regions.sort_by(|l, r| {
380            l.to_string().cmp(&r.to_string())
381        });
382        for region in regions {
383            let segment = segments.get(region).unwrap();
384            println!("at region {}:", region);
385            for (base, segment) in segment.borrow().iter() {
386                println!("          --- address <{}> ---", base.name());
387                let mut exprs: Vec<Rc<Item<ValueOrImmediate<A>>>> = segment.accesses.keys().cloned().collect();
388                use std::cmp::Ordering;
389                exprs.sort_by(|l, r| {
390                    let lv = &l.value;
391                    let rv = &r.value;
392                    if lv == rv {
393                        Ordering::Equal
394                    } else {
395                        match (lv, rv) {
396                            (Expression::Unknown, _) => {
397                                Ordering::Greater
398                            }
399                            (_, Expression::Unknown) => {
400                                Ordering::Less
401                            },
402                            (Expression::Value(lv), Expression::Value(rv)) => {
403                                use analyses::ValueOrImmediate::*;
404                                match (lv, rv) {
405                                    (Immediate(l), Immediate(r)) => {
406                                        l.cmp(r)
407                                    }
408                                    (Immediate(_), _) => {
409                                        Ordering::Less
410                                    }
411                                    (_, Immediate(_)) => {
412                                        Ordering::Greater
413                                    }
414                                    _ => {
415                                        Ordering::Equal
416                                    }
417                                }
418                            }
419                            (Expression::Value(_), _) => {
420                                Ordering::Less
421                            }
422                            (_, Expression::Value(_)) => {
423                                Ordering::Greater
424                            }
425                            _ => {
426                                Ordering::Equal
427                            }
428                        }
429                    }
430                });
431                for expr in exprs {
432                    let accesses = segment.accesses.get(&expr).unwrap();
433                    assert!(accesses.len() > 0);
434
435                    let mut base_str = expr.to_string();
436                    while base_str.len() < 6 {
437                        base_str = format!(" {}", base_str);
438                    }
439
440                    let mut access_sizes: Vec<&u64> = accesses.keys().collect();
441                    access_sizes.sort();
442                    for size in access_sizes.iter() {
443                        let access = accesses.get(size).unwrap();
444                        let access_str = format!("{}{}-",
445                            if access.is_read() { "r" } else { "-" },
446                            if access.is_write() { "w" } else { "-" },
447                        );
448                        println!("  {}  | {:#06x}:         {}     |", base_str, size, access_str);
449                    }
450                }
451                println!("          ---------------------------");
452            }
453        }
454    }
455}
456
457use yaxpeax_x86::long_mode::{Arch as amd64};
458use analyses::Expression;
459use analyses::Item;
460use analyses::ValueOrImmediate;
461impl<'ssa> DFG<Rc<Item<ValueOrImmediate<amd64>>>, amd64, <amd64 as Arch>::Address> for MemoryLayout<'ssa, amd64> {
462    type Indirect = IndirectLayout<amd64>;
463
464    fn indirect_loc(&self, when: <amd64 as Arch>::Address, loc: <amd64 as ValueLocations>::Location) -> IndirectLayout<amd64> {
465        let ssa_def = self.ssa.try_get_def(when, loc.clone());
466        let ssa_use = self.ssa.try_get_use(when, loc.clone());
467        if ssa_def.is_none() {
468//            println!("no ssa def for {} at {}", loc, when);
469        }
470        if ssa_use.is_none() {
471//            println!("no ssa use for {} at {}", loc, when);
472        }
473        let regions_defs = ssa_def.as_ref().map(|value| {
474            self.get_segment(Rc::clone(value))
475        });
476        let regions_uses = ssa_use.as_ref().map(|value| {
477            self.get_segment(Rc::clone(value))
478        });
479        IndirectLayout {
480            regions_defs,
481            regions_uses,
482            ssa_def,
483            ssa_use,
484        }
485    }
486    fn read_loc(&self, when: <amd64 as Arch>::Address, loc: <amd64 as ValueLocations>::Location) -> Rc<Item<ValueOrImmediate<amd64>>> {
487        // TODO: HACK: ignore weird rip semantics for now
488        if loc != crate::arch::x86_64::analyses::data_flow::Location::RIP {
489            // TODO: return a value?
490            let dfg_ref = self.ssa.get_use(when, loc).as_rc();
491            return Item::untyped(Expression::value(ValueOrImmediate::Value(dfg_ref)));
492        }
493        // LocationAccess::Unknown
494        Item::untyped(Expression::Unknown)
495    }
496    fn write_loc(&mut self, when: <amd64 as Arch>::Address, loc: <amd64 as ValueLocations>::Location, value: Rc<Item<ValueOrImmediate<amd64>>>) {
497        // TODO: HACK: ignore weird rip semantics for now
498        if loc != crate::arch::x86_64::analyses::data_flow::Location::RIP {
499            let dest = self.ssa.get_def(when, loc);
500            if dest.value.borrow().data.is_none() {
501                dest.update(crate::arch::x86_64::analyses::data_flow::Data::Expression(value));
502            }
503        }
504    }
505}
506/*
507impl<'ssa, A: Arch + ValueLocations + SSAValues> DFG<LocationAccess, A, A::Address> for MemoryLayout<'ssa, A> {
508    fn read_loc(&self, when: A::Address, loc: A::Location) -> LocationAccess {
509        if let Some(when) = self.per_address_layout.get(&when) {
510            if let Some(accesses) = when.get(&loc) {
511                return LocationAccess::Memory { size: 0, offset: 0 };
512            }
513        }
514
515        LocationAccess::Unknown
516    }
517    fn write_loc(&mut self, when: A::Address, loc: A::Location, value: LocationAccess) {
518        if let crate::arch::x86_64::analyses::data_flow::Location::Memory(region) = loc {
519            let accesses = self.per_address_layout
520                .entry(when)
521                .or_insert_with(|| HashMap::new())
522                .entry(loc)
523                .or_insert_with(|| Vec::new());
524            // TODO: sizes should be regions starting at 0 of the kinds of accesses that have been
525            // seen..
526            accesses.push((Use::Write, region));
527        }
528    }
529}
530*/