yaxpeax_core/analyses/control_flow/
mod.rs

1// some of the VW-specific types added here are prefixed like `VW_*` and don't match the CamelCase
2// style rustc would like to see
3#![allow(non_camel_case_types)]
4
5use arch::x86_64::MergedContextTable;
6use yaxpeax_x86::x86_64;
7use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
8use std::hash::Hash;
9use std::ops::Bound::Included;
10use std::fmt::{self, Debug};
11
12use smallvec::SmallVec;
13
14use petgraph;
15use petgraph::graphmap::{GraphMap, Nodes};
16
17use yaxpeax_arch::{Address, AddressBase, AddressDiff, AddressDisplay, Arch, LengthedInstruction};
18
19use arch::DecodeFrom;
20
21use num_traits::Zero;
22
23use ContextRead;
24use ContextWrite;
25
26use memory::MemoryRange;
27
28pub mod deserialize;
29
30use serialize::GraphSerializer;
31
32use serde::Serialize;
33use yaxpeax_x86::long_mode::Opcode;
34
35use serde::ser::SerializeStruct;
36#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
37pub struct Effect<Addr: Address + Debug> {
38    pub(crate) stop_after: bool,
39    // the AddressDiffAmount trait fools `Deserialize`'s proc macro, so we have to explicitly write
40    // the bound serde should use.
41    #[serde(bound(deserialize = "Addr: Address"))]
42    pub dest: Option<Target<Addr>>
43}
44
45impl <Addr: Address + Debug> Effect<Addr> {
46    pub fn is_stop(&self) -> bool {
47        self.stop_after
48    }
49
50    pub fn stop() -> Effect<Addr> {
51        Effect {
52            stop_after: true,
53            dest: None
54        }
55    }
56    pub fn stop_and(dest: Target<Addr>) -> Effect<Addr> {
57        Effect {
58            stop_after: true,
59            dest: Some(dest)
60        }
61    }
62    pub fn cont() -> Effect<Addr> {
63        Effect {
64            stop_after: false,
65            dest: None
66        }
67    }
68    pub fn cont_and(dest: Target<Addr>) -> Effect<Addr> {
69        Effect {
70            stop_after: false,
71            dest: Some(dest)
72        }
73    }
74}
75
76#[derive(Serialize, Deserialize, Clone, PartialEq)]
77pub enum Target<Addr: Address + Debug> {
78    // the AddressDiffAmount trait fools `Deserialize`'s proc macro, so we have to explicitly write
79    // the bound serde should use.
80    #[serde(bound(deserialize = "Addr: Address"))]
81    Relative(AddressDiff<Addr>),
82    #[serde(bound(deserialize = "Addr: Address"))]
83    Absolute(Addr),
84    #[serde(bound(deserialize = "Addr: Address"))]
85    Multiple(Vec<Target<Addr>>), // TODO: ?? jump tables?
86    Indeterminate       // Unknowns? rets? idk
87}
88
89impl<Addr: Address + Debug> Debug for Target<Addr> {
90    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
91        match self {
92            Target::Relative(diff) => {
93                write!(f, "Target::Relative($+{:#02x})", Addr::zero().wrapping_offset(*diff).to_linear())
94            }
95            Target::Absolute(addr) => {
96                write!(f, "Target::Absolute({:#02x})", addr.to_linear())
97            }
98            Target::Multiple(targets) => {
99                write!(f, "Target::Multiple(")?;
100                let mut first = true;
101                for target in targets {
102                    if !first {
103                        write!(f, ", ")?;
104                    }
105                    first = false;
106                    write!(f, "{:?}", target)?;
107                }
108                write!(f, ")")
109            }
110            Target::Indeterminate => {
111                write!(f, "Target::Indeterminate")
112            }
113        }
114    }
115}
116
117pub trait Determinant<T, Addr: Address + Debug> {
118    fn control_flow(&self, _ctx: Option<&T>) -> Effect<Addr>;
119}
120
121#[derive(Debug, Copy, Clone, Serialize, Deserialize)]
122pub struct BasicBlock<Addr> where Addr: Copy + Clone {
123    pub start: Addr,
124    pub end: Addr // inclusive!!
125}
126
127impl<Addr: Copy + Clone> BasicBlock<Addr> {
128    pub fn new(start_addr: Addr, end_addr: Addr) -> BasicBlock<Addr> {
129        BasicBlock {
130            start: start_addr,
131            end: end_addr
132        }
133    }
134}
135
136#[derive(Default)]
137pub struct ControlFlowGraph<A> where A: Address {
138    pub entrypoint: A,
139    pub blocks: BTreeMap<A, BasicBlock<A>>,
140    pub graph: GraphMap<A, (), petgraph::Directed>
141}
142
143impl <A: Address + Hash> Serialize for ControlFlowGraph<A> {
144    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
145        let mut struc = serializer.serialize_struct("CFG<A>", 3)?;
146        struc.serialize_field("entrypoint", &self.entrypoint)?;
147        struc.serialize_field("blocks", &self.blocks)?;
148        struc.serialize_field("graph", &GraphSerializer::from(&self.graph))?;
149        struc.end()
150    }
151}
152
153#[test]
154fn control_flow_graph_construction() {
155    let mut cfg: ControlFlowGraph<u32> = ControlFlowGraph::new();
156    let nexts = cfg.with_effect(4000 - 1, 4000, &Effect::stop());
157    println!("nexts0: {:?}", nexts);
158    assert!(nexts.len() == 0);
159    let nexts = cfg.with_effect(4008, 4008, &Effect::stop_and(
160        Target::Relative(AddressDiff::from_const(3))));
161    println!("nexts1: {:?}", nexts);
162    assert!(nexts.len() == 1);
163    assert!(nexts[0] == 4012);
164    let nexts = cfg.with_effect(4030, 4031, &Effect::cont_and(
165        Target::Relative(AddressDiff::from_const(-13i32 as u32))));
166    println!("nexts2: {:?}", nexts);
167    assert!(nexts.len() == 2);
168    assert!(nexts.contains(&4018));
169    println!("graph: {:?}", cfg.graph);
170    assert!(nexts.contains(&4031));
171    let expected_blocks: [(u32, u32, Vec<u32>); 4] = [
172        (4000, 4008, vec![4012]),
173//        (0x4009, 0x4011),
174        (4012, 4017, vec![4018]),
175        (4018, 4030, vec![4031, 4018]),
176        (4031, 0xffffffff, vec![])
177    ];
178
179    for (start, end, nexts) in expected_blocks.iter() {
180        let block = cfg.get_block(*start);
181        println!("block at {}: ({}, {}). Expecting: ({}, {})", start, block.start, block.end, start, end);
182        assert!(block.start == *start);
183        assert!(block.end == *end);
184        println!("Expected neighbors {:?}", nexts);
185        let neighbors = cfg.graph.neighbors(*start).collect::<Vec<u32>>();
186        println!("Actual neighbors {:?}", neighbors);
187        for i in 0..nexts.len() {
188            assert!(nexts[i] == neighbors[i]);
189        }
190    }
191}
192
193#[test]
194fn control_flow_graph_construction_2() {
195    let mut cfg: ControlFlowGraph<u32> = ControlFlowGraph::new();
196    cfg.with_effect(1000 - 1, 1000, &Effect::stop());
197    cfg.with_effect(1009, 1010, &Effect::cont_and(
198        Target::Relative(AddressDiff::from_const(-10i32 as u32))));
199    cfg.with_effect(1019,  1020, &Effect::cont_and(
200        Target::Relative(AddressDiff::from_const(-11i32 as u32))));
201
202    // TODO
203//    println!("cfg:\n  graph: {:?}\n  blocks: {:?}", cfg.graph, cfg.blocks);
204//    assert!(false == true);
205}
206
207#[test]
208fn control_flow_graph_construction_3() {
209    /*
210     * OK. the issue here was something like having a block
211     * [0, 9], that leads into [10, 19],
212     * but addint a split that stops at 4 yielded
213     * [0, 4], [4, 9], [10, 19]
214     * where nothing was connected.
215     */
216    let mut cfg: ControlFlowGraph<u32> = ControlFlowGraph::new();
217    cfg.with_effect(0, 1, &Effect::stop_and(
218        Target::Absolute(10)
219    ));
220    /*
221     * So now we have [0, 0], [1, 9], [10, ..]
222     * with 0 -> 10, 1 -> 10
223     */
224    for n in [0, 1, 10].iter() {
225        assert!(cfg.graph.contains_node(*n));
226    }
227    for (start, dest) in [(0, 10), (1, 10)].iter() {
228        assert!(cfg.graph.contains_edge(*start, *dest));
229    }
230
231    cfg.with_effect(4, 5, &Effect::stop());
232    /*
233     * and now we should have [0, 0], [1, 4], [5, 9], [10, ..]
234     * with 0 -> 10, 5 -> 10
235     */
236    for n in [0, 1, 5, 10].iter() {
237        assert!(cfg.graph.contains_node(*n));
238    }
239    for (start, dest) in [(0, 10), (5, 10)].iter() {
240        assert!(cfg.graph.contains_edge(*start, *dest));
241    }
242}
243
244impl <A> ControlFlowGraph<A> where A: Address + Debug + petgraph::graphmap::NodeTrait {
245    pub fn new() -> ControlFlowGraph<A> {
246        let mut blocks = BTreeMap::new();
247        blocks.insert(A::min_value(), BasicBlock::new(A::min_value(), A::max_value()));
248        let mut cfg = ControlFlowGraph {
249            entrypoint: A::zero(),
250            blocks,
251            graph: GraphMap::new()
252        };
253        cfg.graph.add_node(A::min_value());
254        cfg
255    }
256
257    pub fn from(addr: A) -> ControlFlowGraph<A> {
258        let mut blocks = BTreeMap::new();
259        blocks.insert(A::min_value(), BasicBlock::new(A::min_value(), A::max_value()));
260        let mut cfg = ControlFlowGraph {
261            entrypoint: addr,
262            blocks,
263            graph: GraphMap::new()
264        };
265        cfg.graph.add_node(A::min_value());
266        cfg
267    }
268
269    pub fn blocks(&self) -> Nodes<A> {
270        self.graph.nodes()
271    }
272
273    pub fn destinations(&self, block: A) -> Vec<A> {
274        self.graph.neighbors_directed(block, petgraph::Direction::Outgoing).into_iter().collect()
275    }
276
277    pub fn sources(&self, block: A) -> Vec<A> {
278        self.graph.neighbors_directed(block, petgraph::Direction::Incoming).into_iter().collect()
279    }
280
281    /*
282     * U should be a function, function_table should be an oracle
283     * we can query to answer "does there exist a function at this place?"
284     *
285     * TODO: are there other reasons a basic block edge should be
286     * disincluded?
287     */
288    pub fn get_function<U>(&self, start: A, function_table: &HashMap<A, U>) -> ControlFlowGraph<A> {
289        tracing::trace!("get_function!");
290        let mut result: ControlFlowGraph<A> = ControlFlowGraph::from(start);
291        result.graph = GraphMap::new();
292        result.graph.add_node(start);
293        result.blocks = BTreeMap::new();
294
295        let mut bfs_deque = VecDeque::new();
296        bfs_deque.push_back(start);
297        let mut bfs_visited = HashSet::new();
298        bfs_visited.insert(start);
299
300        while let Some(next) = bfs_deque.pop_front() {
301            for outedge in self.graph.neighbors_directed(next, petgraph::Direction::Outgoing) {
302                if bfs_visited.insert(outedge) {
303                    // don't walk into another function - don't have to check `next == start`
304                    // because start was visited already and bfs_visited.insert() would never be
305                    // true anyway.
306                    if !function_table.contains_key(&outedge) {
307                        bfs_deque.push_back(outedge);
308                        tracing::trace!("add_edge 0: {:x} {:x}", next.to_linear(), outedge.to_linear());
309                        result.graph.add_edge(next, outedge, ());
310                    }
311                }
312            }
313
314            tracing::trace!("block insert 0: {:x} : {:x}", next.to_linear(), self.get_block(next).end.to_linear());
315            result.blocks.insert(next, *self.get_block(next));
316        }
317        return result;
318    }
319
320    pub fn get_block<'a>(&'a self, addr: A) -> &'a BasicBlock<A> {
321        let (_block_idx, block) = self.blocks.range((Included(&A::min_value()), Included(&addr))).rev().next().expect("there should be a basic block covering all addresses, this is a control flow data structure error");
322        block
323    }
324    /*
325     * Adjust control flow linkage as appropriate with effect `effect`
326     * applied at `at` with the next linear instruction at `next`.
327     *
328     * This takes `next` explicitly to avoid taking an instruction
329     * just to `instr.len()` it. Add at the call site.
330     *
331     * This may be best explained with an example:
332     *   0x1234: 54030400: jmp $+4 (imaginary ISA)
333     * the effect from here is a relative branch with no continuation
334     * to relative +4. the +4 is computed with respect to the start
335     * of the *next* instruction (eg where PC points *after* this
336     * instruction). To find the basic block this affects, we need
337     * the address of the instruction that causes control flow
338     * but also the address of the next instruction for relative branch
339     * destination computations.
340     *
341     * So at = 0x1234, next = 0x1238, effect = stop_and(Relative(4)).
342     */
343    pub fn with_effect(&mut self, at: A, next: A, effect: &Effect<A>) -> SmallVec<[A; 2]> {
344        // splits the basic block enclosing split_loc into two basic blocks,
345        // one ending directly before split_loc, and one starting at split_loc
346        // continuing to the end of the original basic block.
347        //
348        // If split_loc is the start of the enclosing basic block, no change
349        // is made (the former block would be nonsense spanning
350        //  [split_loc, split_loc - 1]
351        // and thus would not be added, yielding no net change.
352        fn add_split<A: Address + petgraph::graphmap::NodeTrait>(graph: &mut ControlFlowGraph<A>, split_loc: A, preserve_edges: bool) -> bool {
353            // find a block before `split_loc`. either there is one, or `split_loc` is the lowest
354            // value of `A`, and there is no possible start address before it.
355            let mut iter = graph.blocks.range_mut((Included(&A::min_value()), Included(&split_loc))).rev();
356            let last_block: &mut BasicBlock<A> = if let Some((_, prev)) = iter.next() {
357                prev
358            } else {
359                // no prior block, so split_loc should be the first address.
360                assert_eq!(split_loc.to_linear(), A::min_value().to_linear());
361                return false;
362            };
363
364            if last_block.start == split_loc {
365                return false;
366            }
367
368            // ok, we have a basic block that starts before split_loc, so resize it to end at
369            // `split_loc - 1`, with `[split_loc, block.end]` as the block we
370            // must insert after it
371            // println!("original last block: {:x} : {:x}", last_block.start.to_linear(),
372            // last_block.end.to_linear());
373            let split_loc_end = last_block.end;
374            let last_start = last_block.start;
375            last_block.end = split_loc - AddressDiff::one();
376            // println!("last block: {:x} : {:x}", last_start.to_linear(), last_block.end.to_linear());
377            tracing::debug!("block insert because splitting block: {:x} : {:x}", split_loc.to_linear(), split_loc_end.to_linear());
378            graph.blocks.insert(split_loc, BasicBlock::new(split_loc, split_loc_end));
379
380            let neighbors: Vec<A> = graph.graph.neighbors(last_start).into_iter().collect();
381            for next in neighbors.into_iter() {
382                tracing::debug!("Removing destination edge because of split: last_start = {:x} split_loc = {:x} {:x} {:x} -> {:x} {:x}",  last_start.to_linear(), split_loc.to_linear(), last_start.to_linear(), next.to_linear(), split_loc.to_linear(), next.to_linear());
383                graph.graph.remove_edge(last_start, next);
384                tracing::debug!("Adding edge to bottom block: {:x} {:x}", split_loc.to_linear(), next.to_linear());
385                graph.graph.add_edge(split_loc, next, ());
386            }
387            if preserve_edges {
388                tracing::debug!("add_edge from first part of start to last part: {:x} {:x}", split_loc.to_linear(), last_start.to_linear());
389                graph.graph.add_edge(last_start, split_loc, ());
390            } else {
391//                    graph.graph.add_node(new_block.start);
392            }
393            true
394        }
395
396        let mut result: SmallVec<[A; 2]> = SmallVec::new();
397
398        let enclosing_block_start: A = self.get_block(at).start;
399
400        tracing::debug!("with_effect: {:x} {:?} stop_after = {:?} dest = {:?}", enclosing_block_start.to_linear(), effect, effect.stop_after, effect.dest);
401
402        if effect.stop_after {
403            add_split(self, next, false);
404        } else {
405            // if this is cont, AND there is nothing to branch to, this
406            // does not really effect control flow.
407            //
408            // so if this is cont and there is an out-dest, this ends
409            // the basic block.
410            if effect.dest.is_some() {
411                let dest_addr = next;
412                if add_split(self, dest_addr, true) { // TODO: t.len());
413                }
414                result.push(dest_addr);
415                tracing::debug!("add_edge as part of branch: {:x} {:x}", enclosing_block_start.to_linear(), dest_addr.to_linear());
416                self.graph.add_edge(enclosing_block_start, dest_addr, ());
417            }
418        }
419
420//        let enclosing_block_start: A = self.get_block(at).start;
421
422        match &effect.dest {
423            // Ok, have to clip the containing basic block
424            // if this is not going to the start of an existing basic block
425            Some(Target::Relative(rel)) => {
426                let dest_addr = next.wrapping_offset(*rel);
427                add_split(self, dest_addr, true);
428                result.push(dest_addr);
429                let enclosing_block_start: A = self.get_block(at).start;
430                tracing::debug!("adding relative jump edge 2: {:x} {:x}", enclosing_block_start.to_linear(), dest_addr.to_linear());
431                self.graph.add_edge(enclosing_block_start, dest_addr, ());
432            },
433            Some(Target::Absolute(dest)) => {
434                let dest_addr = *dest;
435                add_split(self, dest_addr, true);
436                result.push(dest_addr);
437//              let enclosing_block_start: A = self.get_block(at).start;
438                tracing::debug!("adding absolute jump edge: {:x} {:x}", enclosing_block_start.to_linear(), dest_addr.to_linear());
439                self.graph.add_edge(enclosing_block_start, dest_addr, ());
440            }
441            Some(Target::Multiple(_targets)) => {},
442            _dest => {
443                // TODO: unhandled!
444            }
445        }
446        result
447    }
448}
449
450pub fn explore_all<'a, A, U, M, Contexts, Update, InstrCallback>(
451    data: &M,
452    contexts: &'a mut Contexts,
453    cfg: &mut ControlFlowGraph<A::Address>,
454    starts: Vec<A::Address>,
455    on_instruction_discovered: &InstrCallback
456) where
457    A: Arch + DecodeFrom<M>,
458    M: MemoryRange<A> + ?Sized,
459    Contexts: ContextRead<A, U> + ContextWrite<A, Update>,
460    A::Address: Hash + petgraph::graphmap::NodeTrait + num_traits::WrappingAdd,
461    A::Instruction: Debug + Determinant<U, A::Address>,
462    InstrCallback: Fn(&A::Instruction, A::Address, &Effect<A::Address>, &Contexts) -> Vec<(A::Address, Update)>,
463    // for<'x, 'y> crate::memory::repr::cursor::UnboundedReader<'x, 'y, A, M>: yaxpeax_arch::Reader<A::Address, A::Word>
464{
465    let mut to_explore: VecDeque<A::Address> = VecDeque::new();
466    let mut seen: HashSet<A::Address> = HashSet::new();
467
468    for addr in starts.iter() {
469        to_explore.push_back(*addr);
470        seen.insert(*addr);
471
472        if *addr > A::Address::zero() {
473            // we've been told by `starts` that control flow leads here
474            // so it must be the start of a basic block.
475            tracing::trace!("with_effect 1: {:x} {:x} effect::stop", (*addr - AddressDiff::one()).to_linear(), (*addr).to_linear());
476            cfg.with_effect(*addr - AddressDiff::one(), *addr, &Effect::stop());
477        }
478    }
479
480    while let Some(addr) = to_explore.pop_front() {
481        let dests = explore_control_flow(data, contexts, cfg, addr, on_instruction_discovered);
482        tracing::trace!("CFG Yield: {:x} -> {:?}", addr.to_linear(), dests);
483        // for b in cfg.blocks(){
484
485        //     let edges = cfg.destinations(b);
486        //     println!("{:x} -> {:?}", b.to_linear(), edges);
487        // }
488        // if addr.to_linear() == 0xfa357{
489            //println!("CFG Yield: {:x} -> {:?}", addr.to_linear(), dests);
490        // }
491
492        for next in dests.into_iter() {
493            if !seen.contains(&next) {
494                to_explore.push_back(next);
495                seen.insert(next);
496            }
497        }
498    }
499}
500
501pub struct AnalysisBuilder<
502    'memory,
503    'ctx,
504    A: Arch,
505    M: MemoryRange<A> + ?Sized,
506    U,
507    Update,
508    Contexts: ContextWrite<A, Update> + ContextRead<A, U>,
509> {
510    memory: &'memory M,
511    starts: Option<Vec<A::Address>>,
512    contexts: &'ctx mut Contexts,
513    on_instruction_discovered: fn(&A::Instruction, A::Address, &Effect<A::Address>, &Contexts) -> Vec<(A::Address, Update)>,
514    _u: std::marker::PhantomData<U>,
515}
516
517impl<'memory, 'ctx, A, M: MemoryRange<A> + ?Sized, U, Update, Contexts> AnalysisBuilder<'memory, 'ctx, A, M, U, Update, Contexts> where
518    A: Arch + DecodeFrom<M>,
519    Contexts: ContextWrite<A, Update> + ContextRead<A, U>,
520    A::Address: Hash + petgraph::graphmap::NodeTrait + num_traits::WrappingAdd,
521    A::Instruction: Debug + Determinant<U, A::Address>,
522{
523    pub fn new(memory: &'memory M, contexts: &'ctx mut Contexts) -> Self {
524        fn do_nothing<
525            A: Arch,
526            U,
527            Update,
528            Contexts: ContextWrite<A, Update> + ContextRead<A, U>
529        >(_inst: &A::Instruction, _addr: A::Address, _effect: &Effect<A::Address>, _ctx: &Contexts) -> Vec<(A::Address, Update)> {
530            Vec::new()
531        }
532        Self {
533            memory,
534            starts: None,
535            contexts,
536            on_instruction_discovered: do_nothing,
537            _u: std::marker::PhantomData,
538        }
539    }
540
541    pub fn with_entrypoints(mut self, starts: Vec<A::Address>) -> Self {
542        self.starts = Some(starts);
543        self
544    }
545
546    pub fn evaluate(self) -> ControlFlowGraph<A::Address> {
547        let mut cfg = ControlFlowGraph::new();
548        self.evaluate_into(&mut cfg);
549        cfg
550    }
551
552    pub fn evaluate_into(self, cfg: &mut ControlFlowGraph<A::Address>) {
553        let Self {
554            memory,
555            contexts,
556            starts,
557            on_instruction_discovered,
558            ..
559        } = self;
560        if let Some(starts) = starts {
561            explore_all(memory, contexts, cfg, starts, &on_instruction_discovered);
562        } else {
563            explore_all(memory, contexts, cfg, vec![A::Address::zero()], &on_instruction_discovered);
564        }
565    }
566}
567
568pub fn explore_control_flow<'a, A, U, M, Contexts, Update, InstrCallback>(
569    data: &M,
570    contexts: &'a mut Contexts,
571    cfg: &mut ControlFlowGraph<A::Address>,
572    start: A::Address,
573    on_instruction_discovered: &InstrCallback
574) -> SmallVec<[A::Address; 2]> where
575    A: Arch + DecodeFrom<M>,
576    M: MemoryRange<A> + ?Sized,
577    Contexts: ContextWrite<A, Update> + ContextRead<A, U>,
578    A::Address: Hash + petgraph::graphmap::NodeTrait + num_traits::WrappingAdd,
579    A::Instruction: Debug + Determinant<U, A::Address>,
580    InstrCallback: Fn(&A::Instruction, A::Address, &Effect<A::Address>, &Contexts) -> Vec<(A::Address, Update)> {
581    // we don't know if we've just observed some flow to start,
582    // or that start has already been explored,
583    // so for now just go through start to end like normal
584    //
585    // can't even assume if the block ends at the same end
586    // as we find that we've already seen this, because that
587    // would ambiguify single instruction basic blocks
588    let mut addr = start;
589    loop {
590        let range = match data.range_from(addr) {
591            Some(range) => range,
592            None => {
593                use petgraph::Direction;
594                println!("Reached {}, which is not a valid address - marking start ({}) as hazard.", addr.show(), start.show());
595                let problem_blocks = cfg.graph.neighbors_directed(start, Direction::Incoming).collect::<Vec<A::Address>>();
596                println!("Problem blocks: {:?}", problem_blocks);
597                for problem in problem_blocks.iter() {
598                    println!("Remove edge because it is a part of a problematic block: {:x} {:x}", problem.to_linear(), start.to_linear());
599                    cfg.graph.remove_edge(*problem, start);
600                }
601                return SmallVec::new();
602            }
603        };
604        match A::decode_from(&range) {
605            Ok(instr) => {
606                let effect = {
607                    let ctx = contexts.at(&addr);
608                    //println!("computing control flow at {}", addr.show());
609                    instr.control_flow(Some(&ctx))
610                };
611                let results = on_instruction_discovered(&instr, addr, &effect, contexts);
612                for (addr, update) in results.into_iter() {
613                    contexts.put(addr, update);
614                }
615                match effect {
616                    Effect { stop_after: false, dest: None } => {
617                        // we can continue!
618                        addr += instr.len();
619                    },
620                    // and for any other cases...
621                    effect @ _ => {
622                        println!("with_effect because we are at end of a block: {:x} {:x} {:?}", addr.to_linear(), addr.wrapping_offset(instr.len()).to_linear(), &effect );
623                        return cfg.with_effect(addr, addr.wrapping_offset(instr.len()), &effect);
624                    }
625                }
626            },
627            Err(_) => {
628                return SmallVec::new();
629            }
630        }
631    }
632}
633
634//                                        v-- (Addr, T). this probably will have to go in favor of Vec<u8> and T:
635//                                        Decodable?
636pub fn build_global_cfgs<'a, A: Arch, U, Update, UTable>(ts: &Vec<(A::Address, A::Instruction)>, contexts: &'a UTable, _start: u16) -> ControlFlowGraph<A::Address>
637    where
638        A::Instruction: Determinant<U, A::Address> + LengthedInstruction<Unit=AddressDiff<A::Address>>,
639        A::Address: petgraph::graphmap::NodeTrait,
640        UTable: ContextRead<A, U> + ContextWrite<A, Update>
641{
642    let mut cfg = ControlFlowGraph::new();
643
644    // splits the basic block enclosing split_loc into two basic blocks,
645    // one ending directly before split_loc, and one starting at split_loc
646    // continuing to the end of the original basic block.
647    //
648    // If split_loc is the start of the enclosing basic block, no change
649    // is made (the former block would be nonsense spanning
650    //  [split_loc, split_loc - 1]
651    // and thus would not be added, yielding no net change.
652    fn add_split<A>(blocks: &mut BTreeMap<A, BasicBlock<A>>, split_loc: A) where A: Address {
653        // find a block before `split_loc`. either there is one, or `split_loc` is the lowest
654        // value of `A`, and there is no possible start address before it.
655        let mut iter = blocks.range_mut((Included(&A::min_value()), Included(&split_loc))).rev();
656
657        let last_block: &mut BasicBlock<A> = if let Some((_, prev)) = iter.next() {
658            prev
659        } else {
660            // no prior block, so split_loc should be the first address.
661            assert_eq!(split_loc.to_linear(), A::min_value().to_linear());
662            return;
663        };
664
665        if last_block.start == split_loc {
666            return;
667        }
668
669        // ok, we have a basic block that starts before split_loc, so resize it to end at
670        // `split_loc - 1`, with `[split_loc, block.end]` as the block we must insert after it
671        let split_loc_end = last_block.end;
672        last_block.end = split_loc - AddressDiff::one();
673        blocks.insert(split_loc, BasicBlock::new(split_loc, split_loc_end));
674    }
675
676    for (addr, t) in ts {
677        let effect = t.control_flow(Some(&contexts.at(addr)));
678        if effect.stop_after {
679            add_split(&mut cfg.blocks, addr.wrapping_offset(t.len()));
680        }
681        match effect.dest {
682            // Ok, have to clip the containing basic block
683            // if this is not going to the start of an existing basic block
684            Some(Target::Relative(rel)) => {
685                add_split(&mut cfg.blocks, addr.wrapping_offset(t.len()).wrapping_offset(rel));
686            },
687            Some(Target::Absolute(dest)) => {
688                add_split(&mut cfg.blocks, dest);
689            }
690            Some(Target::Multiple(_targets)) => {
691                /*
692                for target in targets {
693                    match target {
694                        Target::Relative(rel) => {
695                            add_split(&mut cfg.blocks, addr.wrapping_offset(t.len()).wrapping_offset(rel));
696                        },
697                        Target::Absolute(dest) => {
698                            add_split(&mut cfg.blocks, dest);
699                        }
700                        _ => {
701                            // TODO: handle these.
702                            panic!("Unhandled");
703                        }
704                    }
705                }
706                */
707            },
708            _ => {
709                // TODO: unhandled!
710            }
711        }
712    }
713
714    // Add to digraph here.
715    // ssa on this?
716    // bfs to find reachable from a call?
717    // all this and more, next time..
718
719    {
720    let mut block_iter = cfg.blocks.values();
721
722    let mut curr_block = block_iter.next().expect("basic blocks should span all instructions.");
723    // addr should be increasing, so we *will* reach the end of this block eventually..
724    for (addr, t) in ts {
725        if addr == &curr_block.end {
726            let effect = t.control_flow(Some(&contexts.at(addr)));
727            if !effect.stop_after {
728                cfg.graph.add_edge(curr_block.start, addr.wrapping_offset(t.len()), ());
729            }
730            match effect.dest {
731                Some(Target::Relative(rel)) => {
732                    cfg.graph.add_edge(curr_block.start, addr.wrapping_offset(t.len()).wrapping_offset(rel), ());
733                },
734                Some(Target::Absolute(dest)) => {
735                    cfg.graph.add_edge(curr_block.start, dest, ());
736                },
737                Some(Target::Multiple(_targets)) => {
738                    /*
739                    for target in targets {
740                        match target {
741                            Target::Relative(rel) => {
742                                cfg.graph.add_edge(curr_block.start, addr.wrapping_offset(t.len()).wrapping_offset(rel), ());
743                            },
744                            Target::Absolute(dest) => {
745                                cfg.graph.add_edge(curr_block.start, dest, ());
746                            }
747                            _ => {
748                                // TODO: handle these.
749                                panic!("Unhandled");
750                            }
751                        }
752                    }
753                    */
754                },
755                _ => {
756                    // TODO: handle these cases too...
757                }
758            }
759            curr_block = block_iter.next().expect("basic blocks should span all instructions.");
760        }
761    }
762    }
763
764    cfg
765}
766
767use analyses::Value;
768use analyses::ValueRes;
769use analyses::control_flow;
770
771pub struct ControlFlowAnalysis<A: Address + Debug> {
772    pub(crate) effect: control_flow::Effect<A>,
773}
774
775impl <A: Address + Debug> ControlFlowAnalysis<A> {
776    pub fn new() -> Self {
777        Self {
778            effect: control_flow::Effect::cont(),
779        }
780    }
781
782    pub fn into_effect(self) -> control_flow::Effect<A> {
783        self.effect
784    }
785}
786
787impl <A: Address + Debug> From<AddressDiff<A>> for control_flow::Effect<A> {
788    fn from(item: AddressDiff<A>) -> Self {
789        control_flow::Effect::cont_and(
790            control_flow::Target::Relative(item)
791        )
792    }
793}
794
795pub trait ToAddrDiff: yaxpeax_arch::Address {
796    fn translate_offset(from: i64) -> AddressDiff<Self>;
797}
798
799impl ToAddrDiff for u64 {
800    fn translate_offset(from: i64) -> AddressDiff<Self> {
801        AddressDiff::from_const(from as u64)
802    }
803}
804
805impl ToAddrDiff for u32 {
806    fn translate_offset(from: i64) -> AddressDiff<Self> {
807        AddressDiff::from_const(from as u64 as u32)
808    }
809}
810
811impl <A: Address + ToAddrDiff + Debug> Value for control_flow::Effect<A> {
812    fn unknown() -> Self {
813        control_flow::Effect::stop()
814    }
815
816    fn from_const(c: i64) -> Self {
817        control_flow::Effect::stop_and(
818            control_flow::Target::Relative(A::translate_offset(c))
819        )
820    }
821
822    fn from_set(effects: &[Self]) -> Self {
823        debug_assert!(effects.len() != 0);
824        let mut stop_after = true;
825        let mut target: Option<Target<A>> = None;
826
827        for effect in effects {
828            stop_after &= effect.is_stop();
829
830            let merged_target = match (target, effect.dest.as_ref()) {
831                (None, None) => {
832                    None
833                }
834                (None, Some(o)) => {
835                    Some(o.clone())
836                }
837                (Some(o), None) => {
838                    Some(o)
839                }
840                // welllll this ought to be deduplicated...
841                (Some(Target::Multiple(ref l)), Some(Target::Multiple(r))) => {
842                    let mut vec = l.clone();
843                    vec.extend_from_slice(&r);
844                    Some(Target::Multiple(vec))
845                }
846                (Some(Target::Multiple(l)), Some(r)) => {
847                    let mut vec = l.clone();
848                    vec.push(r.clone());
849                    Some(Target::Multiple(vec))
850                }
851                (Some(ref l), Some(Target::Multiple(r))) => {
852                    let mut vec = r.clone();
853                    vec.push(l.clone());
854                    Some(Target::Multiple(vec))
855                }
856                (Some(l), Some(r)) => {
857                    if &l == r {
858                        Some(l)
859                    } else {
860                        Some(Target::Multiple(vec![l, r.clone()]))
861                    }
862                }
863            };
864            target = merged_target;
865        }
866
867        Effect {
868            stop_after,
869            dest: target,
870        }
871    }
872
873    fn to_const(&self) -> Option<i64> {
874        None
875    }
876
877    #[inline(always)]
878    fn add(&self, other: &Self) -> ValueRes<Self> {
879        if (self.stop_after == true && self.dest.is_none()) ||
880            (other.stop_after == true && other.dest.is_none()) {
881
882            return ValueRes::literal(Self::unknown());
883        }
884
885        match (self.dest.as_ref(), other.dest.as_ref()) {
886            (None, Some(control_flow::Target::Relative(rel))) |
887            (Some(control_flow::Target::Relative(rel)), None) => {
888                ValueRes::literal(control_flow::Effect {
889                    stop_after: self.stop_after || other.stop_after,
890                    dest: Some(control_flow::Target::Relative(*rel))
891                })
892            },
893            (Some(control_flow::Target::Relative(l)), Some(control_flow::Target::Relative(r))) => {
894                ValueRes::literal(control_flow::Effect {
895                    stop_after: self.stop_after || other.stop_after,
896                    dest: Some(control_flow::Target::Relative(
897                        A::zero().wrapping_offset(*l).wrapping_offset(*r).diff(&A::zero()).expect("can compute diff")
898                    ))
899                })
900            }
901            _ => {
902                panic!("bad add: {:?} + {:?}", self, other);
903            }
904        }
905    }
906}
907
908macro_rules! impl_control_flow {
909    /*
910    ($semantic:expr, $arch:ty, $inst_ty:ty, ) => {
911        impl_control_flow!($semantic, $arch, |inst| { None }, );
912    };
913    */
914    ($semantic:expr, $arch:ty, $inst_ty:ty, $fixup:expr, ) => {
915        impl <T> $crate::analyses::control_flow::Determinant<T, <$arch as yaxpeax_arch::Arch>::Address> for $inst_ty {
916            fn control_flow(&self, _ctx: Option<&T>) -> control_flow::Effect<<$arch as yaxpeax_arch::Arch>::Address> {
917                let fixup: fn(&$inst_ty) -> Option<control_flow::Effect<<$arch as yaxpeax_arch::Arch>::Address>> = $fixup;
918                if let Some(effect) = fixup(self) {
919                    return effect;
920                }
921                let mut instr_control_flow = $crate::analyses::control_flow::ControlFlowAnalysis::new();
922                $semantic((), self, &mut instr_control_flow);
923                instr_control_flow.into_effect()
924            }
925        }
926    };
927}
928
929pub fn check_cfg_integrity(blocks: &BTreeMap<u64, VW_Block>, graph: &GraphMap<u64, (), petgraph::Directed>){
930    for (addr,block) in blocks.iter(){
931        //1. check that key points to start of block
932        assert!(*addr == block.start);
933        //2. check that block is in the graph
934        assert!(graph.contains_node(*addr));
935        //3. check that there are no overlapping blocks
936        for (a2,b2) in blocks.iter(){
937            if addr == a2 {continue};
938            let before = block.end < b2.start;
939            let after = block.start > b2.end;
940            if !(before || after) {
941                println!("{:?} {:?}", block.as_str(), b2.as_str());
942                assert!(false);
943            }
944        }
945    }
946    //4. check that same number of blocks and graph nodes.
947    //   along with check 3, shows that block nodes and graph nodes are the
948    //   same
949    assert!(blocks.keys().len() == graph.node_count());
950}
951
952#[derive(Debug, Copy, Clone)]
953pub struct VW_Block {
954    pub start: u64,
955    pub end: u64, // inclusive!!
956}
957
958impl VW_Block {
959    pub fn new(start_addr: u64, end_addr: u64) -> VW_Block {
960        VW_Block {
961            start: start_addr,
962            end: end_addr
963        }
964    }
965
966    pub fn as_str(&self) -> std::string::String{
967        return format!("Block[0x{:x}:0x{:x}]", self.start, self.end);
968    }
969}
970
971#[derive(Default, Clone)]
972pub struct VW_CFG {
973    pub entrypoint: u64,
974    pub blocks: BTreeMap<u64, VW_Block>,
975    pub graph: GraphMap<u64, (), petgraph::Directed>
976}
977
978impl VW_CFG{
979
980    pub fn new(entrypoint: u64) -> VW_CFG {
981        VW_CFG {
982            entrypoint: entrypoint,
983            blocks: BTreeMap::new(),
984            graph: GraphMap::new()
985        }
986    }
987
988    pub fn prev_block(&self, addr: u64) -> Option<VW_Block>{
989        for (_block_start, b) in self.blocks.iter(){
990            if (addr > b.start) && (addr < b.end) {
991                return Some(*b);
992            }
993        }
994        None
995    }
996
997    pub fn get_block(&self, addr: u64) -> &VW_Block {
998        &self.blocks[&addr]
999    }
1000
1001    pub fn destinations(&self, addr: u64) -> Vec<u64> {
1002        self.graph.neighbors_directed(addr, petgraph::Direction::Outgoing).into_iter().collect()
1003    }
1004
1005    pub fn predecessors(&self, addr: u64) -> Vec<u64> {
1006        self.graph.neighbors_directed(addr, petgraph::Direction::Incoming).into_iter().collect()
1007    }
1008
1009    pub fn check_integrity(&self){
1010        check_cfg_integrity(&self.blocks, &self.graph);
1011        // for (addr,block) in self.blocks.iter(){
1012        //     //1. check that key points to start of block
1013        //     assert!(*addr == block.start);
1014        //     //2. check that block is in the graph
1015        //     assert!(self.graph.contains_node(*addr));
1016        //     //3. check that there are no overlapping blocks
1017        //     for (a2,b2) in self.blocks.iter(){
1018        //         if addr == a2 {continue};
1019        //         let before = block.end < b2.start;
1020        //         let after = block.start > b2.end;
1021        //         if !(before || after) {
1022        //             println!("{:?} {:?}", block.as_str(), b2.as_str());
1023        //             assert!(false);
1024        //         }
1025        //     }
1026        // }
1027        // //4. check that same number of blocks and graph nodes.
1028        // //   along with check 3, shows that block nodes and graph nodes are the
1029        // //   same
1030        // assert!(self.blocks.keys().len() == self.graph.node_count());
1031    }
1032}
1033
1034#[derive(Default)]
1035pub struct VW_CFG_Builder{
1036    pub cfg: VW_CFG,
1037    pub switch_targets: Option<HashMap<u64, std::vec::Vec<i64>>>,
1038    pub unresolved_jumps: u32,
1039}
1040
1041impl VW_CFG_Builder {
1042
1043    pub fn new(entrypoint : u64, switch_targets: Option<&HashMap<u64, std::vec::Vec<i64>>>) -> VW_CFG_Builder {
1044        VW_CFG_Builder {
1045            cfg: VW_CFG::new(entrypoint),
1046            switch_targets : switch_targets.map(|x| x.clone()),
1047            unresolved_jumps : 0,
1048        }
1049    }
1050
1051    //split block_to_split at split_addr
1052    fn apply_split(&mut self, block_to_split: &VW_Block, split_addr: u64){
1053        //update block index
1054        self.cfg.blocks.remove(&block_to_split.start);
1055        let b1 = VW_Block::new(block_to_split.start , split_addr - 1);
1056        let b2 = VW_Block::new(split_addr, block_to_split.end);
1057        self.cfg.blocks.insert(block_to_split.start, b1);
1058        self.cfg.blocks.insert(split_addr, b2);
1059
1060        //update graph
1061        self.cfg.graph.remove_node(block_to_split.start);
1062        self.cfg.graph.add_node(block_to_split.start);
1063        self.cfg.graph.add_node(split_addr);
1064
1065        let neighbors: Vec<u64> = self.cfg.graph.neighbors(block_to_split.start).into_iter().collect();
1066        for next in neighbors.into_iter() {
1067            self.cfg.graph.remove_edge(block_to_split.start, next);
1068            self.cfg.graph.add_edge(split_addr, next, ());
1069        }
1070        self.cfg.graph.add_edge(block_to_split.start, split_addr, ());
1071
1072    }
1073
1074    // Split if inside block (not first or last address)
1075    fn maybe_apply_split(&mut self, split_addr: u64){
1076        let block = self.cfg.prev_block(split_addr);
1077        if let Some(block_to_split) = block {
1078            self.apply_split(&block_to_split, split_addr)
1079        };
1080    }
1081
1082
1083    fn vw_decode<M>(&mut self, data: &M, contexts: &MergedContextTable, start: u64) -> Option<(Vec<u64>, VW_Block)> where M: MemoryRange<x86_64>{
1084        let mut addr = start;
1085        //show_instruction(data,contexts,addr,None);
1086        loop {
1087            //show_instruction(data,contexts,addr,None);
1088            let mut range = match data.range_from(addr) {
1089                Some(range) => range,
1090                None => { tracing::warn!("Decoding range error"); return None; }
1091            };
1092            match x86_64::decode_from(&mut range) {
1093                Ok(instr) => {
1094                    match get_effect(contexts, &instr, addr) {
1095                        Effect { stop_after: false, dest: None } => {
1096                            // we can continue!
1097                            addr += instr.len();
1098                            // if we hit an existing block, stop decoding
1099                            if self.cfg.blocks.contains_key(&addr){
1100                                let b = VW_Block::new(start, addr - 1);
1101                                //println!("Hit already existing block: 0x{:x}", addr);
1102                                return Some((vec![addr],b));
1103                            }
1104                        },
1105                        // and for any other cases...
1106                        effect @ _ => {
1107                            //println!("Maybe branch? effect = {:?} {:x}", &effect,  addr + instr.len());
1108                            let dsts = self.effect_to_destinations(&effect, addr + instr.len(), &instr, addr);
1109                            let b = VW_Block::new(start, addr + instr.len() - 1);
1110                            //println!("Hit branch: {:?}", dsts);
1111                            return Some((dsts,b))
1112                        }
1113                    }
1114                },
1115                Err(e) => {
1116                    tracing::warn!("Decode error: {:?} @ {:x}", e, addr);
1117                    panic!("Decode error!");
1118                    // return None;
1119                }
1120            }
1121        }
1122    }
1123
1124    fn process_job<M>(&mut self, data: &M, contexts: &MergedContextTable, start: u64) -> Vec<u64> where M: MemoryRange<x86_64>{
1125        //if addr is inside of a block, apply split and return no destinations
1126        self.maybe_apply_split(start);
1127
1128        let decode_result = self.vw_decode(data, contexts, start);
1129        //println!("Decode: {:x} -> {:?}", start, decode_result);
1130        if let None = decode_result{ 
1131            return vec![];
1132        }
1133        let (dsts,block) = decode_result.unwrap();
1134        self.cfg.blocks.insert(start, block);
1135        self.cfg.graph.add_node(start);
1136        for dst in dsts.clone() {
1137            self.cfg.graph.add_edge(start, dst, ());
1138            self.maybe_apply_split(dst);
1139        }
1140        dsts
1141    }
1142
1143    //get destinations at end of block
1144    //next = start of next instruction
1145    fn effect_to_destinations(&mut self, effect : &Effect<u64>, next: u64, instr: &yaxpeax_x86::long_mode::Instruction, addr: u64) -> Vec<u64>{
1146        let mut dsts : Vec<u64> = vec![];
1147        if !effect.stop_after{
1148            dsts.push(next)
1149        }
1150        match &effect.dest {
1151            Some(Target::Relative(rel)) => {
1152                let dest_addr = next.wrapping_offset(*rel);
1153                dsts.push(dest_addr);
1154                return dsts;
1155            },
1156            Some(Target::Absolute(dest)) => {
1157                dsts.push(*dest);
1158                return dsts;
1159            }
1160            Some(Target::Multiple(_targets)) =>  return vec![],
1161            Some(Target::Indeterminate) => return vec![],
1162            None => {
1163                if self.switch_targets.is_none(){
1164                    return vec![];
1165                }
1166                match instr.opcode() {
1167                    Opcode::JMP => (),
1168                    Opcode::RETURN | Opcode::UD2 => (),
1169                    _ => panic!("Unknown indirect control flow transfer {:?}", instr.opcode()),
1170                }
1171                if let Opcode::JMP = instr.opcode() {
1172                    if !self.switch_targets.as_ref().unwrap().contains_key(&addr){
1173                        self.unresolved_jumps += 1;
1174                        return vec![];
1175                        // panic!("No entry in switch_targets for the switch @ {:x}", addr);
1176                    }
1177                    let switch = self.switch_targets.as_ref().unwrap().get(&addr).unwrap();
1178                    let targets: Vec<u64> = switch.into_iter().map(|x| (*x) as u64).rev().collect();
1179                    // println!("Indirect jumps discovered");
1180                    // for target in &targets{
1181                    //     println!("{:x}", target);
1182                    // }
1183                    return targets;
1184                }
1185                return vec![];
1186            }
1187        }
1188    }
1189}
1190
1191fn get_effect(contexts: &MergedContextTable, instr: &yaxpeax_x86::long_mode::Instruction, addr: u64) -> Effect<u64>{
1192    let ctx = contexts.at(&addr);
1193    instr.control_flow(Some(&ctx))
1194}
1195
1196pub fn get_cfg<M>(
1197    data: &M,
1198    contexts: &MergedContextTable,
1199    entrypoint: u64,
1200    switch_targets: Option<&HashMap<u64, std::vec::Vec<i64>>>,
1201) -> (VW_CFG,u32) where M: MemoryRange<x86_64>,
1202{
1203    let mut cfg_builder = VW_CFG_Builder::new(entrypoint, switch_targets);
1204    let mut to_explore: VecDeque<u64> = VecDeque::new();
1205    let mut seen: HashSet<u64> = HashSet::new();
1206    to_explore.push_back(entrypoint);
1207
1208    while let Some(addr) = to_explore.pop_front() {
1209        let dests = cfg_builder.process_job(data, contexts, addr);
1210
1211        let out_addrs: Vec<std::string::String> = dests.clone().into_iter().map(|x| format!("{:x}", x)).rev().collect();
1212        tracing::trace!("Processed Job: {:x} -> {:?}", addr, out_addrs);
1213
1214        for next in dests.into_iter() {
1215            if !seen.contains(&next) {
1216                to_explore.push_back(next);
1217                seen.insert(next);
1218            }
1219        }
1220    }
1221    cfg_builder.cfg.check_integrity();
1222    (cfg_builder.cfg,cfg_builder.unresolved_jumps)
1223}
yaxpeax_core/analyses/control_flow/mod.rs

yaxpeax_core/analyses/control_flow/
mod.rs