1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
/*

if-decompiler - core library
===============================

Copyright (c) 2021 Dannii Willis
MIT licenced
https://github.com/curiousdannii/if-decompiler

*/

#![forbid(unsafe_code)]

use std::collections::BTreeMap;

use fnv::{FnvHashMap, FnvHashSet};
use petgraph::{graph, visit};

pub mod glulx;

// Function data from an Inform debug file
#[derive(Debug)]
pub struct DebugFunctionData {
    pub addr: u32,
    pub len: u32,
    pub name: String,
}

// Function safety refers to whether or not a function can be compiled and run without worrying about its locals and stack being part of the savestate
// Unsafe functions need to be compiled such that they can be serialised and restored
// UnsafeDynamicBranches functions have dynamic branches and need even more special care
// SafetyTBD functions have not yet been determined. At the end of decompilation any remaining SafetyTBD functions will be assumed safe.
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum FunctionSafety {
    Unsafe,
    UnsafeDynamicBranches,
    SafetyTBD,
}

// Now a trait for generalising over our VMs
pub trait VirtualMachine {
    fn get_functions(&self) -> FnvHashMap<u32, FunctionSafety>;
    fn mark_function_as_unsafe(&mut self, addr: u32);

    fn mark_all_unsafe_functions(&mut self, edges: FnvHashSet<(u32, u32)>) {
        let mut graph: graph::Graph<u32, ()> = graph::Graph::new();

        // Add the graph nodes
        let functions = self.get_functions();
        let mut function_nodes = FnvHashMap::default();
        let mut unsafe_functions = Vec::new();
        for (addr, safety) in functions {
            let node = graph.add_node(addr);
            function_nodes.insert(addr, node);
            if safety != FunctionSafety::SafetyTBD {
                unsafe_functions.push(node);
            }
        }

        // Then add the graph edges
        graph.extend_with_edges(edges.iter().map(|(caller_addr, callee_addr)| {
            let caller_node = function_nodes[caller_addr];
            let callee_node = function_nodes[callee_addr];
            // The direction must be callee->caller, as we'll change the caller's safety if the callee is unsafe
            (callee_node, caller_node)
        }));

        // Now walk the function graph, marking each caller as Unsafe
        let mut dfs = visit::Dfs::empty(&graph);
        dfs.stack = unsafe_functions;
        while let Some(node_index) = dfs.next(&graph) {
            let addr = graph[node_index];
            self.mark_function_as_unsafe(addr);
        }
    }
}

// Generic instruction functions
pub trait VMInstruction {
    fn addr(&self) -> u32;
    fn does_halt(&self) -> bool;
}

// A generic basic block
pub struct BasicBlock<I> {
    pub label: u32,
    pub code: Vec<I>,
    pub branches: FnvHashSet<u32>,
}

// Calculate basic blocks
pub fn calculate_basic_blocks<I: VMInstruction>(instructions: Vec<I>, entry_points: FnvHashSet<u32>, exit_branches: FnvHashMap<u32, Vec<u32>>) -> BTreeMap<u32, BasicBlock<I>> {
    let mut blocks: BTreeMap<u32, BasicBlock<I>> = BTreeMap::new();
    let mut current_block_addr = 0;
    let mut last_instruction_halted = false;
    for instruction in instructions {
        let addr = instruction.addr();
        // If we're in the middle of a block, see if we should add to it
        if current_block_addr > 0 {
            let current_block = blocks.get_mut(&current_block_addr).unwrap();
            // Finish a previous block because this one starts a new one
            if entry_points.contains(&addr) {
                // Unless the last instruction halted, add this new instruction as a branch to the last block
                if !last_instruction_halted {
                    current_block.branches.insert(addr);
                }
                // Make a new block below
            }
            else {
                // If this instruction branches, finish up the block
                if let Some(branches) = exit_branches.get(&addr) {
                    for branch in branches {
                        current_block.branches.insert(*branch);
                    }
                    current_block_addr = 0;
                }
                // Add to the current block
                last_instruction_halted = instruction.does_halt();
                current_block.code.push(instruction);
                // Continue so we don't make a new block
                continue;
            }
        }
        // Make a new block
        current_block_addr = addr;
        last_instruction_halted = instruction.does_halt();
        let mut current_block = BasicBlock::<I> {
            label: addr,
            code: vec![instruction],
            branches: FnvHashSet::default(),
        };
        // Add branches if we have any
        if let Some(branches) = exit_branches.get(&addr) {
            for branch in branches {
                current_block.branches.insert(*branch);
            }
        }
        blocks.insert(addr, current_block);
    }
    blocks
}

#[derive(Copy, Clone, Debug, PartialEq)]
pub enum BranchTarget {
    Dynamic,
    Absolute(u32),
    Return(u32),
}