yaxpeax_core/analyses/static_single_assignment/
data.rs

1use std::rc::Rc;
2use std::fmt::Debug;
3use std::fmt;
4use std::cell::RefCell;
5use std::hash::Hash;
6use std::collections::HashMap;
7
8use yaxpeax_arch::Arch;
9use yaxpeax_arch::AddressDisplay;
10
11use data::types::Typed;
12use data::modifier;
13use data::ValueLocations;
14use data::Direction;
15
16
17use num_traits::Zero;
18
19pub type DFGRef<A> = Rc<RefCell<Value<A>>>;
20pub type RWMap<A> = HashMap<(<A as ValueLocations>::Location, Direction), DFGRef<A>>;
21#[derive(Clone, Debug)]
22pub struct PhiOp<A: SSAValues> { pub out: DFGRef<A>, pub ins: Vec<DFGRef<A>> }
23pub type PhiLocations<A> = HashMap<<A as ValueLocations>::Location, PhiOp<A>>;
24
25#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
26pub enum DefSource<A> {
27    /// The defined value comes from an instruction in the underlying binary
28    Instruction,
29    /// The defined value comes from a phi pseudo-op
30    Phi,
31    /// The defined value is some custom definition - possibly automatically added or manually
32    /// declared.
33    Modifier(modifier::Precedence),
34    /// Defined on the edge between two basic blocks - due to some value modifier, likely from
35    /// conditionally defining a value after a conditional branch
36    Between(A),
37    /// Definition comes from some site before this DFG, such as the case for function arguments.
38    External,
39}
40
41impl <A: yaxpeax_arch::AddressDisplay> fmt::Display for DefSource<A> {
42    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
43        match self {
44            DefSource::Instruction => write!(f, "instruction"),
45            DefSource::Phi => write!(f, "phi"),
46            DefSource::Modifier(modifier::Precedence::Before) => write!(f, "modifier (before)"),
47            DefSource::Modifier(modifier::Precedence::After) => write!(f, "modifier (after)"),
48            DefSource::Between(addr) => write!(f, "between ({})", addr.show()),
49            DefSource::External => write!(f, "external")
50        }
51    }
52}
53
54unsafe impl<A: Arch + SSAValues> Send for SSA<A> {}
55// look. just rewrite this as a graph (one day). vertices are DFGRef, edges are data
56// dependences. secondary graph with vertices (DFGRef | Address) where edges are Address -> DFGRef
57// (define) -> Address (use of DFGRef)
58//
59// in the mean time, DFGRef are growing an (Address, Location) field lol
60#[derive(Debug)]
61pub struct SSA<A: Arch + SSAValues> where A::Location: Hash + Eq, A::Address: Hash + Eq {
62    // TODO: fairly sure these Rc<RefCell<...>> could all just be raw pointers
63    // these aren't individually freed so Rc shouldn't be necessary?
64    pub instruction_values: HashMap<A::Address, RWMap<A>>,
65    pub modifier_values: HashMap<(A::Address, modifier::Precedence), RWMap<A>>,
66    pub control_dependent_values: HashMap<A::Address, HashMap<A::Address, RWMap<A>>>,
67    pub defs: HashMap<HashedValue<DFGRef<A>>, (A::Address, DefSource<A::Address>)>,
68    pub phi: HashMap<A::Address, PhiLocations<A>>,
69    pub indirect_values: HashMap<A::Address, HashMap<A::Location, HashMap<(A::Data, Direction), DFGRef<A>>>>,
70    // invariant:
71    // for (k, v) in external_defs {
72    //   assert_eq!(k, v.location);
73    //   assert!(v.version.is_none());
74    // }
75    pub external_defs: HashMap<A::Location, DFGRef<A>>,
76}
77
78pub struct SSAQuery<'a, A: Arch + SSAValues> where A::Location: Hash + Eq, A::Address: Hash + Eq {
79    ssa: &'a SSA<A>,
80    addr: A::Address
81}
82
83pub struct NoValueDescriptions;
84
85impl<Location> ValueDescriptionQuery<Location> for NoValueDescriptions {
86    fn modifier_name(&self, _loc: Location, _dir: Direction, _precedence: modifier::Precedence) -> Option<String> { None }
87    fn modifier_value(&self, _loc: Location, _dir: Direction, _precedence: modifier::Precedence) -> Option<String> { None }
88}
89
90pub trait ValueDescriptionQuery<Location> {
91    fn modifier_name(&self, loc: Location, dir: Direction, precedence: modifier::Precedence) -> Option<String>;
92    fn modifier_value(&self, loc: Location, dir: Direction, precedence: modifier::Precedence) -> Option<String>;
93}
94
95impl<'a, A: Arch + SSAValues> ValueDescriptionQuery<A::Location> for SSAQuery<'a, A> where A::Location: Hash + Eq, A::Address: Hash + Eq {
96    fn modifier_name(&self, loc: A::Location, dir: Direction, precedence: modifier::Precedence) -> Option<String> {
97        if let Some(rwmap) = self.ssa.modifier_values.get(&(self.addr, precedence)) {
98            if let Some(entry) = rwmap.get(&(loc.clone(), dir)) {
99                entry.borrow().name.as_ref().map(|x| x.clone()).or_else(|| {
100                    let entry = entry.borrow();
101                    if let Some(version) = entry.version() {
102                        Some(format!("{:?}_{}", entry.location, version))
103                    } else {
104                        Some(format!("{:?}_input", entry.location))
105                    }
106                })
107            } else {
108                Some(format!("modifier values, but no entry for ({:?}, {:?})", loc.clone(), dir))
109            }
110        } else {
111            Some(format!("no modifier values at ({}, {:?})", self.addr.show(), precedence))
112        }
113    }
114
115    fn modifier_value(&self, loc: A::Location, dir: Direction, precedence: modifier::Precedence) -> Option<String> {
116        if let Some(rwmap) = self.ssa.modifier_values.get(&(self.addr, precedence)) {
117            if let Some(entry) = rwmap.get(&(loc.clone(), dir)) {
118                entry.borrow().name.as_ref().map(|x| x.clone()).or_else(|| {
119                    let entry = entry.borrow();
120                    if let Some(data) = entry.data.as_ref() {
121                        Some(format!("{:?})", data))
122                    } else {
123                        None
124                    }
125                })
126            } else {
127                Some(format!("modifier values, but no entry for ({:?}, {:?})", loc.clone(), dir))
128            }
129        } else {
130            Some(format!("no modifier values at ({}, {:?})", self.addr.show(), precedence))
131        }
132    }
133}
134
135pub struct Value<A: SSAValues> where A::Data: Typed {
136    pub name: Option<String>,
137    pub location: A::Location,
138    // None indicates "not written anywhere in this dfg", which indicates this value can
139    // be considered an input from some enclosing control flow
140    pub version: Option<u32>,
141    pub data: Option<A::Data>,
142    pub used: bool,
143}
144
145impl <A: SSAValues> fmt::Debug for Value<A> {
146    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
147        write!(f, "Value {{ ")?;
148        write!(f, "{:?}", self.location)?;
149        if let Some(v) = self.version {
150            write!(f, "_{}", v)?;
151        } else {
152            write!(f, "_input")?;
153        }
154        if let Some(name) = self.name.as_ref() {
155            write!(f, ", \"{}\"", name)?;
156        }
157        if let Some(data) = self.data.as_ref() {
158            write!(f, ", data: {:?}", data)?;
159        } else {
160            write!(f, ", data: None")?;
161        }
162        write!(f, " }}")
163    }
164}
165
166impl <A: SSAValues> fmt::Display for Value<A> {
167    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
168        write!(f, "{{ {:?}", self.location)?;
169        if let Some(v) = self.version {
170            write!(f, "_{}", v)?;
171        } else {
172            write!(f, "_input")?;
173        }
174        if let Some(name) = self.name.as_ref() {
175            write!(f, ", \"{}\"", name)?;
176        }
177        if let Some(data) = self.data.as_ref() {
178            write!(f, ": {}", data.display(false, None))?;
179            // {}", data)?;
180//        } else {
181//            write!(f, "")?;
182        }
183        write!(f, " }}")
184    }
185}
186
187impl <A: SSAValues> Hash for Value<A> where A::Location: Hash, A::Data: Hash {
188    fn hash<H: Hasher>(&self, state: &mut H) {
189        self.location.hash(state);
190        self.version.hash(state);
191        self.data.hash(state);
192    }
193}
194
195pub struct DFGLValue<A: SSAValues> {
196    pub value: DFGRef<A>
197}
198
199use std::cell::Ref;
200impl <A: SSAValues> DFGLValue<A> {
201    pub fn update(&self, new_data: A::Data) {
202        // TODO: check to see if the new value conflicts with what we're setting?
203        self.value.borrow_mut().data.replace(new_data);
204    }
205    pub fn replace(&self, new_data: Option<A::Data>) {
206        // TODO: check to see if the new value conflicts with what we're setting?
207        std::mem::drop(std::mem::replace(&mut self.value.borrow_mut().data, new_data));
208    }
209    pub fn clear(&self) {
210        self.value.borrow_mut().data.take();
211    }
212    pub fn get_data(&self) -> Ref<Option<A::Data>> {
213        Ref::map(self.value.borrow(), |v| &v.data)
214    }
215    pub fn as_rc(self) -> DFGRef<A> {
216        self.value
217    }
218}
219
220#[derive(Debug, Clone)]
221pub struct HashedValue<A: Clone> {
222    pub value: A
223}
224
225use std::hash::Hasher;
226impl <A: SSAValues> Hash for HashedValue<DFGRef<A>> where Value<A>: Hash {
227    fn hash<H: Hasher>(&self, state: &mut H) {
228//        let v: &RefCell<Value<A>> = &*self.value;
229        (self.value.as_ptr()).hash(state);
230    }
231}
232
233impl <A: SSAValues> Eq for HashedValue<DFGRef<A>> { }
234
235impl <A: SSAValues> PartialEq for HashedValue<DFGRef<A>> {
236    fn eq(&self, other: &HashedValue<DFGRef<A>>) -> bool {
237        Rc::ptr_eq(&self.value, &other.value)
238    }
239}
240
241impl <A: SSAValues> PartialEq for Value<A> {
242    fn eq(&self, rhs: &Value<A>) -> bool {
243        self as *const Value<A> == rhs as *const Value<A>
244    }
245}
246impl <A: SSAValues> Eq for Value<A> {}
247
248pub struct ValueDisplay<'data, 'colors, A: SSAValues> {
249    value: &'data Value<A>,
250    show_location: bool,
251    detailed: bool,
252    colors: Option<&'colors crate::ColorSettings>,
253}
254
255impl <'data, 'colors, A: SSAValues> ValueDisplay<'data, 'colors, A> {
256    pub fn with_location(self) -> Self {
257        ValueDisplay {
258            show_location: true,
259            ..self
260        }
261    }
262}
263
264impl <'data, 'colors, A: SSAValues> fmt::Display for ValueDisplay<'data, 'colors, A> {
265    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
266        if !self.detailed {
267            if let Some(name) = self.value.name.as_ref() {
268                return write!(f, "{}", name);
269            } else if let Some(data) = self.value.data.as_ref() {
270                return fmt::Display::fmt(&data.display(self.detailed, self.colors), f);
271            } else {
272                if let Some(v) = self.value.version {
273                    return write!(f, "{:?}_{}", self.value.location, v);
274                } else {
275                    return write!(f, "{:?}_input", self.value.location);
276                }
277            }
278        }
279
280        if self.show_location {
281            if let Some(name) = self.value.name.as_ref() {
282                write!(f, "{}", name)?;
283                if let Some(v) = self.value.version {
284                    write!(f, " [at {:?}_{}]", self.value.location, v)?;
285                } else {
286                    write!(f, " [at {:?}_input]", self.value.location)?;
287                }
288            } else {
289                if let Some(v) = self.value.version {
290                    write!(f, " {:?}_{}", self.value.location, v)?;
291                } else {
292                    write!(f, " {:?}_input", self.value.location)?;
293                }
294            }
295        } else {
296            if let Some(v) = self.value.version {
297                write!(f, "{:?}_{}", self.value.location, v)?;
298            } else {
299                write!(f, "{:?}_input", self.value.location)?;
300            }
301        }
302
303        if let Some(data) = self.value.data.as_ref() {
304            write!(f, " (= {:?})", data)?;
305        }
306
307        Ok(())
308    }
309}
310
311impl <A> Value<A> where A: SSAValues {
312    pub fn version(&self) -> Option<u32> {
313        self.version
314    }
315}
316
317impl <'data, 'colors, A: 'data + SSAValues> DataDisplay<'data, 'colors> for Value<A> {
318    type Displayer = ValueDisplay<'data, 'colors, A>;
319    fn display(&'data self, detailed: bool, colors: Option<&'colors crate::ColorSettings>) -> Self::Displayer {
320        ValueDisplay {
321            value: self,
322            detailed,
323            show_location: false,
324            colors,
325        }
326    }
327}
328
329impl <A: SSAValues + Arch> Value<A> {
330    pub fn new(location: A::Location, version: Option<u32>) -> Value<A> {
331        Value {
332            location: location,
333            version: version,
334            data: None,
335            name: None,
336            used: false,
337        }
338    }
339}
340
341pub trait DataDisplay<'data, 'colors> {
342    type Displayer: fmt::Display;
343    fn display(&'data self, detailed: bool, colors: Option<&'colors crate::ColorSettings>) -> Self::Displayer;
344}
345
346pub trait SSAValues where Self: Arch + ValueLocations {
347    type Data: for<'data, 'colors> DataDisplay<'data, 'colors> + Debug + Hash + Clone + Typed;
348}
349
350pub trait DFGRebase<A: SSAValues + Arch> {
351    fn rebase_references(&self, old_dfg: &SSA<A>, new_dfg: &SSA<A>) -> Self;
352}
353
354impl <A: SSAValues> SSA<A> where A::Address: Hash + Eq, A::Location: Hash + Eq {
355    pub fn log_contents(&self) {
356        println!("instruction_values:");
357        // pub instruction_values: HashMap<A::Address, RWMap<A>>,
358        for (addr, rwmap) in self.instruction_values.iter() {
359            println!("  {:?} -> {{", addr);
360            for ((loc, dir), value) in rwmap.iter() {
361                println!("    {:?} {:?} value={:?}", dir, loc, value);
362            }
363            println!("  }}");
364        }
365        println!("modifier_values:");
366        // pub modifier_values: HashMap<(A::Address, modifier::Precedence), RWMap<A>>,
367        for ((addr, precedence), rwmap) in self.modifier_values.iter() {
368            println!("  {:?}, {:?} -> {:?}", addr, precedence, rwmap);
369        }
370        // pub control_dependent_values: HashMap<A::Address, HashMap<A::Address, RWMap<A>>>,
371        // pub defs: HashMap<HashedValue<DFGRef<A>>, (A::Address, DefSource<A::Address>)>,
372        // pub phi: HashMap<A::Address, PhiLocations<A>>,
373        // pub indirect_values: HashMap<A::Address, HashMap<A::Location, HashMap<(A::Data, Direction), DFGRef<A>>>>,
374        println!("external_defs:");
375        // pub external_defs: HashMap<A::Location, DFGRef<A>>,
376        for (loc, value) in self.external_defs.iter() {
377            println!("  {:?} -> {:?}", loc, value);
378        }
379        println!("indirect_values:");
380        for (addr, values) in self.indirect_values.iter() {
381            println!("  {:?} -> {{", addr);
382            for (loc, value) in values.iter() {
383                println!("  {:?} = {:?}", loc, value);
384            }
385            println!("  }}");
386        }
387    }
388
389    /// collect all locations that have uses of undefined data
390    pub fn get_undefineds(&self) -> Vec<A::Location> {
391        let mut undefineds = Vec::new();
392
393        for map in self.instruction_values.values() {
394            for dfgref in map.values() {
395                let dfgref = dfgref.borrow();
396                if dfgref.version == None && dfgref.used {
397                    undefineds.push(dfgref.location.clone());
398                }
399            }
400        }
401
402        for map in self.modifier_values.values() {
403            for dfgref in map.values() {
404                let dfgref = dfgref.borrow();
405                if dfgref.version == None && dfgref.used {
406                    undefineds.push(dfgref.location.clone());
407                }
408            }
409        }
410
411        for map in self.control_dependent_values.values() {
412            for innermap in map.values() {
413                for dfgref in innermap.values() {
414                    let dfgref = dfgref.borrow();
415                    if dfgref.version == None && dfgref.used {
416                        undefineds.push(dfgref.location.clone());
417                    }
418                }
419            }
420        }
421
422        for map in self.phi.values() {
423            for (loc, phiop) in map.iter() {
424                if !phiop.out.borrow().used {
425                    continue;
426                }
427                for inref in phiop.ins.iter() {
428                    if inref.borrow().version == None {
429                        undefineds.push(loc.clone());
430                    }
431                }
432            }
433        }
434
435        undefineds
436    }
437
438    pub fn query_at<'a>(&'a self, addr: A::Address) -> SSAQuery<'a, A> {
439        SSAQuery {
440            ssa: self,
441            addr
442        }
443    }
444    pub fn get_value(&self, addr: A::Address, loc: A::Location, dir: Direction) -> Option<DFGRef<A>> {
445        self.instruction_values.get(&addr)
446            .and_then(|addr_values| addr_values.get(&(loc, dir)))
447            .map(|x| Rc::clone(x))
448    }
449    pub fn get_transient_value(&self, from: A::Address, to: A::Address, loc: A::Location, dir: Direction) -> Option<DFGRef<A>> {
450        self.control_dependent_values.get(&from)
451            .and_then(|dests| dests.get(&to))
452            .and_then(|values| values.get(&(loc, dir)))
453            .map(|x| Rc::clone(x))
454    }
455    pub fn try_get_def(&self, addr: A::Address, loc: A::Location) -> Option<DFGRef<A>> {
456        self.get_value(addr, loc, Direction::Write)
457    }
458    pub fn try_get_use(&self, addr: A::Address, loc: A::Location) -> Option<DFGRef<A>> {
459        self.get_value(addr, loc, Direction::Read)
460    }
461    // TODO: These should have a #[cfg()] flag to use after heavy fuzzing that does
462    // unreachable_unchecked!() for the None case here.
463    //
464    // that flag should also remove the try_get_* variants
465    pub fn get_def(&self, addr: A::Address, loc: A::Location) -> DFGLValue<A> {
466        DFGLValue {
467            value: self.get_value(addr, loc.clone(), Direction::Write)
468                .unwrap_or_else(|| {
469                    panic!("Failed to get def of {:?} at {}", loc.clone(), addr.show())
470                })
471        }
472    }
473    pub fn get_use(&self, addr: A::Address, loc: A::Location) -> DFGLValue<A> {
474        DFGLValue {
475            value: self.get_value(addr, loc.clone(), Direction::Read)
476                .unwrap_or_else(|| {
477                    panic!("Failed to get use of {:?} at {}", loc.clone(), addr.show())
478                })
479        }
480    }
481
482    pub fn get_transient_def(&self, from: A::Address, to: A::Address, loc: A::Location) -> DFGLValue<A> {
483        DFGLValue { value: self.get_transient_value(from, to, loc, Direction::Write).unwrap() }
484    }
485    pub fn get_transient_use(&self, from: A::Address, to: A::Address, loc: A::Location) -> DFGLValue<A> {
486        DFGLValue { value: self.get_transient_value(from, to, loc, Direction::Read).unwrap() }
487    }
488
489    pub fn try_get_def_site(&self, value: DFGRef<A>) -> Option<&(A::Address, DefSource<A::Address>)> {
490        self.defs.get(&HashedValue { value: Rc::clone(&value) })
491    }
492
493    pub fn get_def_site(&self, value: DFGRef<A>) -> (A::Address, DefSource<A::Address>) {
494        match self.defs.get(&HashedValue { value: Rc::clone(&value) }) {
495            Some(site) => *site,
496            None => {
497                // This is a rather serious bug - we have a value but it was never defined.
498                // well. it should be a bug. this is probably reachable if the def is actually
499                // external to the current control flow graph (for example, function arguments)
500                // this should be backed by a set of fake defs at function entry
501                //
502                // for now, return an `External` def source. the use of address zero is kind of a
503                // lazy hack around this function returning a concrete address. a better address to
504                // return might be the entrypoint of this DFG?
505                (A::Address::zero(), DefSource::External)
506            }
507        }
508    }
509}