yaxpeax_core/data/
mod.rs

1pub mod modifier;
2pub mod types;
3
4use yaxpeax_arch::Arch;
5
6use arch::{AbiDefaults, FunctionImpl, FunctionQuery};
7
8use serde::{Deserialize, Serialize};
9use std::hash::Hash;
10use std::fmt::Debug;
11
12#[derive(Debug, Hash, PartialEq, Eq, Copy, Clone, Serialize, Deserialize)]
13pub enum Direction {
14    Read,
15    Write
16}
17
18/// `AliasInfo` provides a description of aliasing rules for a given instruction set's `Location`s.
19pub trait AliasInfo where Self: Sized {
20    /// retrieve all locations aliased by `self`. an x86_64 example, the `al` register aliases
21    /// `ax`, `eax`, and `rax`, but *not* `ah`. this must not include `self` in the list of
22    /// aliases. this aliasing relationship extends in both directions - `rax` aliases `eax`, `ax`,
23    /// `al`, and `ah`.
24    ///
25    /// TODO: extend this for memory locations known through a disambiguator. `any[rsp_inout +
26    /// 0x1234, 8]` aliases `any[rsp_input + 0x1238, 4]`, and this needs to be reported somehow for
27    /// dfg construction.
28    ///
29    /// `aliases_of` (or some variant) should take a disambiguator and report all paritally- or
30    /// fully-overlapping aliases with `Self`.
31    fn aliases_of(&self) -> Vec<Self>;
32    /// find the widest alias of `self`.
33    ///
34    /// TODO: this interface is probably incorrect. it may not be the case that there exists one
35    /// widest alias. in memory analyses we may find a situation with values like:
36    /// ```text
37    /// Value A: | 0x0000   0x0001 |
38    /// Value B:          | 0x0001   0x0002 |
39    /// ```
40    /// that is to say that value A and value B share the word at address `0x0001`. as a
41    /// consequence, the byte at `0x0001` may not have an alias that aliases all others. often we
42    /// may be able to say "all of memory" as a widest alias, but it may exist that no such concept
43    /// is appropriate for some location in some instruction set.
44    fn maximal_alias_of(&self) -> Self;
45}
46
47/// `ValueLocations` allows decomposition of an instruction into a series of locations and an
48/// indication of them being read or written. this defines the data flow relation between
49/// instructions and all locations in programs.
50///
51/// NOTE: **`ValueLocations` is deprecated in favor of `LocIterator`.**
52///
53/// implementation guidance: for correctness, `decompose` must express the most conservative
54/// locations. as an example, x86_64 "push" should not be defined to use a stack-specific location
55/// - `ValueLocations::decompose` should simply specify memory access, and allow a `Disambiguator`
56/// with appropriate assumptions to refine the memory access into something appropriate for
57/// analysis.
58pub trait ValueLocations: Arch {
59    type Location: Debug + Hash + Eq + Serialize + for<'de> Deserialize<'de> + Clone + AliasInfo;
60
61    fn decompose(op: &Self::Instruction) -> Vec<(Option<Self::Location>, Direction)>;
62}
63
64/// Disambiguator is used with `LocIterator` to allow customizable refinement of locations in an
65/// instruction. While in most architectures, registers are unambiguous, memory locations are often
66/// much more complex. Disambiguation must be flexible because the actual scheme may vary program
67/// to program, compiler to compiler, and binary to binary, all for the same architecture.
68///
69/// As an example for x86_64 variance, a maximally pessimistic analysis may assume all memory
70/// accesses alias. This significantly complicates stack analysis, and for most programs it may be
71/// acceptable to assume that stack-relative accesses do not alias, for example, heap accesses or
72/// static data accesses. Because stack accesses in a single function often are all constant
73/// offsets from a known pointer, a disambiguation to move all stack accesses into a stack-only
74/// region we assume is not aliased by other memory means we can insert variables for stack memory
75/// with a naive analysis. This same claim holds for globals and program accesses, with arbitrary
76/// heap accesses still possibly requiring more intensive analysis.
77pub trait Disambiguator<A: ValueLocations, LocSpec> {
78    fn disambiguate(&self, instr: &A::Instruction, loc: (Option<A::Location>, Direction), spec: LocSpec) -> Option<A::Location>;
79}
80
81/// `LocationAliasDescriptions` is the rules describing how all locations in some data-flow graph
82/// may overlap. it is a logical error for, as an example, for a `LocationAliasDescriptions` to be
83/// used on a graph including locations from a `Disambiguator::disambiguate` that are not in
84/// `Self`.
85pub trait LocationAliasDescriptions<A: ValueLocations> {
86    /// primarily for memory locations; returns `true` if `left` and `right` may refer to any of
87    /// the same state, `false` if `left` and `right` are totally disjoint.
88    /// ```text
89    /// Disambiguator::may_alias(rcx, ch)
90    /// ```
91    /// should always be true.
92    /// ```text
93    /// Disambiguator::may_alias(any[rsp_input + 4, 4], any[rsp_input + 8, 4])
94    /// ```
95    /// should always be false.
96    /// ```text
97    /// Disambiguator::may_alias(any[rcx_input + 4, 4], any[rsp_input + 4, 4])
98    /// ```
99    /// should be true if `rcx` or `rsp` are unknown, or are known to potentially alias.
100    fn may_alias(&self, left: &A::Location, right: &A::Location) -> bool;
101
102    /// what other locations may `loc` overlap with?
103    /// returns the set of locations known to `Self`, which should be all locations in a given
104    /// function, that can overlap with `loc`.
105    /// TODO: some kind of iterator built on `&self` to avoid the vec alloc/collect...
106    fn aliases_for(&self, loc: &A::Location) -> Vec<A::Location>;
107}
108
109pub trait LocIterator<'disambiguator, 'fns, A: ValueLocations, Location: 'static + AbiDefaults, D: Disambiguator<A, Self::LocSpec>, F: FunctionQuery<A::Address, Function=FunctionImpl<Location>>> {
110    type Item;
111    type LocSpec;
112    type Iter: Iterator<Item = Self::Item>;
113    // TODO:
114    // this probably needs to grow to know about a table of functions and a mechanism to pick which
115    // one(s? plural?) is called. plural, because `call [rbp]` may have a known finite set of
116    // targets, and a perfectly fine analysis would consider the union of all reads and writes
117    fn iter_locs(self, loc: A::Address, _: &'disambiguator D, functions: &'fns F) -> Self::Iter;
118}
119
120/*
121#[allow(unused)]
122macro_rules! impl_loc_iterator_transition {
123    ($arch:ty) => {
124        impl <'a> LocIterator<$arch::Location> for &'a $arch::Instruction {
125            type Item = (Option<$arch::Location>, Direction);
126            type Iter = String;
127            fn iter_locs<D: Disambiguator<$arch::Location>>(self, _: &mut D) -> Self::Iter {
128                $arch::decompose(self).iter_locs()
129            }
130        }
131    }
132}
133*/