if_decompiler/glulx/
disassembler.rs

1/*
2
3Glulx Disassembler
4==================
5
6Copyright (c) 2021 Dannii Willis
7MIT licenced
8https://github.com/curiousdannii/if-decompiler
9
10*/
11
12use fnv::FnvHashSet;
13
14use super::*;
15
16impl GlulxState {
17    pub fn disassemble(&mut self, image: &[u8]) -> FnvHashSet<(u32, u32)> {
18        let decoding_table = self.parse_string_decoding_table(image);
19
20        let mut edges = FnvHashSet::default();
21
22        let ram_start = self.read_addr(image, 8) as u64;
23        self.ramstart = ram_start as u32;
24        let decoding_table_addr = self.read_addr(image, 28);
25        let root_node_addr = self.read_addr(image, decoding_table_addr + 8);
26
27        let mut cursor = Cursor::new(image);
28
29        // If we have debug file data, use it to disassemble all the functions
30        if let Some(functions) = &self.debug_function_data {
31            for (&addr, func) in functions {
32                cursor.set_position(addr as u64);
33                let function_type = cursor.get_u8();
34                self.functions.insert(func.addr, self.disassemble_function(&mut cursor, &mut edges, addr, Some(func.len), function_type));
35            }
36            return edges;
37        }
38
39        // Otherwise parse the file manually
40        // Skip past the header
41        cursor.set_position(60);
42
43        // Loop through the ROM until the end of RAM or we find a
44        while cursor.position() < ram_start {
45            let addr = cursor.position() as u32;
46            let object_type = cursor.get_u8();
47
48            match object_type {
49                // Padding
50                0 => {},
51
52                // Functions
53                0xC0 | 0xC1 => {
54                    self.functions.insert(addr, self.disassemble_function(&mut cursor, &mut edges, addr, None, object_type));
55                },
56
57                // Strings - just skip past them for now!
58                0xE0 => {
59                    if self.stop_on_string {
60                        break;
61                    }
62                    while cursor.get_u8() != 0 {}
63                },
64                0xE2 => {
65                    if self.stop_on_string {
66                        break;
67                    }
68                    cursor.get_u8();
69                    cursor.get_u8();
70                    cursor.get_u8();
71                    while cursor.get_u32() != 0 {}
72                },
73                // Compressed strings will take a bit more work...
74                0xE1 => {
75                    if self.stop_on_string {
76                        break;
77                    }
78
79                    fn get_node<'a>(table: &'a FnvHashMap<u32, DecodingNode>, addr: u32) -> &'a DecodingNode {
80                        table.get(&addr).unwrap()
81                    }
82                    fn get_node_branch_addresses(node: &DecodingNode) -> [u32; 2] {
83                        match node {
84                            DecodingNode::Branch(branch) => {
85                                [branch.left, branch.right]
86                            },
87                            _ => panic!("Decoding node is not a branch"),
88                        }
89                    }
90
91                    let root_node = get_node(&decoding_table, root_node_addr);
92                    let root_branches = get_node_branch_addresses(root_node);
93                    let mut left_node = root_branches[0];
94                    let mut right_node = root_branches[1];
95
96                    let mut byte = cursor.get_u8();
97                    let mut bits = 8;
98                    loop {
99                        let bit = byte & 0x01;
100                        bits -= 1;
101                        byte >>= 1;
102                        let node = get_node(&decoding_table, if bit == 0 {left_node} else {right_node});
103                        match node {
104                            DecodingNode::Terminator => {
105                                break;
106                            },
107                            DecodingNode::Leaf => {
108                                left_node = root_branches[0];
109                                right_node = root_branches[1];
110                            },
111                            DecodingNode::Branch(branch) => {
112                                left_node = branch.left;
113                                right_node = branch.right;
114                            },
115                        }
116                        if bits == 0 {
117                            bits = 8;
118                            byte = cursor.get_u8();
119                        }
120                    }
121                },
122
123                // Unknown
124                _ => {
125                    println!("Stopping on unknown object type {:?} at {:?}", object_type, addr);
126                    break;
127                },
128            }
129        };
130
131        // Return the list of edges
132        edges
133    }
134
135    // Parse the string decoding table, but only so that we can ignore compressed strings
136    pub fn parse_string_decoding_table(&self, image: &[u8]) -> FnvHashMap<u32, DecodingNode> {
137        let mut table = FnvHashMap::default();
138        let mut cursor = Cursor::new(image);
139
140        let decoding_table_addr = self.read_addr(image, 28);
141        let root_node_addr = self.read_addr(image, decoding_table_addr + 8);
142
143        // Keep a list of nodes to process and loop through
144        // I tried doing this recursively but couldn't make it work with the borrow checker
145        let mut nodes_to_process = vec![root_node_addr];
146        loop {
147            let addr = nodes_to_process.pop().unwrap();
148            cursor.set_position(addr as u64);
149            let node_type = cursor.get_u8();
150            let node = match node_type {
151                0x00 => {
152                    let left = cursor.get_u32();
153                    let right = cursor.get_u32();
154                    nodes_to_process.push(left);
155                    nodes_to_process.push(right);
156                    DecodingNode::Branch(DecodingNodeBranch {
157                        left,
158                        right,
159                    })
160                },
161                0x01 => DecodingNode::Terminator,
162                0x02 => {
163                    cursor.get_u8();
164                    DecodingNode::Leaf
165                },
166                0x03 => {
167                    while cursor.get_u8() != 0 {}
168                    DecodingNode::Leaf
169                },
170                0x04 | 0x08 | 0x09 => {
171                    cursor.get_u32();
172                    DecodingNode::Leaf
173                },
174                0x05 => {
175                    while cursor.get_u32() != 0 {}
176                    DecodingNode::Leaf
177                },
178                0x0A | 0x0B => {
179                    let _addr = cursor.get_u32();
180                    let count = cursor.get_u32();
181                    for _ in 0..count {
182                        cursor.get_u32();
183                    }
184                    DecodingNode::Leaf
185                }
186                _ => panic!("Invalid string decoding node at {}", addr),
187            };
188            table.insert(addr, node);
189            if nodes_to_process.len() == 0 {
190                break;
191            }
192        }
193
194        table
195    }
196
197    fn disassemble_function(&self, cursor: &mut Cursor<&[u8]>, edges: &mut FnvHashSet<(u32, u32)>, addr: u32, len: Option<u32>, function_mode: u8) -> Function {
198        let argument_mode = match function_mode {
199            0xC0 => FunctionArgumentMode::Stack,
200            0xC1 => FunctionArgumentMode::Locals,
201            _ => unreachable!(),
202        };
203
204        // Parse the locals formats
205        let mut locals = 0;
206        loop {
207            let local_type = cursor.get_u8();
208            let count = cursor.get_u8() as u32;
209            if local_type == 0 {
210                break
211            }
212            if local_type != 4 {
213                panic!("1 and 2 byte locals are not supported in function {}", addr);
214            }
215            locals += count;
216        }
217
218        // Basic blocks
219        let mut entry_points = FnvHashSet::default();
220        let mut exit_branches = FnvHashMap::default();
221
222        // Parse the instructions
223        let end_addr = len.map(|l| addr + l);
224        let mut instructions = Vec::new();
225        let mut instruction_addresses = FnvHashSet::default();
226        'parse_loop: loop {
227            let instruction = self.disassemble_instruction(cursor);
228            instruction_addresses.insert(instruction.addr);
229
230            // If this instruction branches, then update the entry and exit points
231            if let Some(target) = instruction.branch {
232                match instruction.opcode {
233                    opcodes::OP_JUMP | opcodes::OP_JUMPABS => {
234                        let mut branch_targets = Vec::new();
235                        if let BranchTarget::Absolute(addr) = target {
236                            entry_points.insert(addr);
237                            branch_targets.push(addr);
238                        }
239                        exit_branches.insert(instruction.addr, branch_targets);
240                    },
241                    _ => {
242                        // If the branch returns then don't end a basic block here
243                        // Except for @catch!
244                        let returns = match target {
245                            BranchTarget::Return(_) => true,
246                            _ => false,
247                        };
248                        if !returns || instruction.opcode == opcodes::OP_CATCH {
249                            entry_points.insert(instruction.next);
250                            let mut branch_targets = vec![instruction.next];
251                            if let BranchTarget::Absolute(addr) = target {
252                                entry_points.insert(addr);
253                                branch_targets.push(addr);
254                            }
255                            exit_branches.insert(instruction.addr, branch_targets);
256                        }
257                    },
258                };
259            }
260            let opcode = instruction.opcode;
261
262            // If this instruction calls, then add it to the edges list
263            if opcodes::instruction_calls(opcode) {
264                if let Operand::Constant(callee_addr) = instruction.operands[0] {
265                    edges.insert((addr, callee_addr));
266                }
267            }
268
269            // Add an entry point for instructions which may resume later
270            if opcodes::instruction_resumes(opcode) {
271                entry_points.insert(instruction.next);
272            }
273
274            instructions.push(instruction);
275
276            // If we have an end_addr (from a debug file) then use it to determine when to stop decoding
277            if let Some(end_addr) = end_addr {
278                if cursor.position() as u32 == end_addr {
279                    break;
280                }
281                continue;
282            }
283
284            if opcodes::instruction_halts(opcode) {
285                // Stop parsing instructions if we don't have any pending entry_points
286                // Short cut - check if the next address is an entry point
287                if !entry_points.contains(&(cursor.position() as u32)) {
288                    // Otherwise check if any entry points haven't already been parsed
289                    for _ in entry_points.difference(&instruction_addresses) {
290                        continue 'parse_loop;
291                    }
292
293                    // And check for an unreachable instruction
294                    let final_addr = cursor.position();
295                    let potential_opcode = decode_opcode(cursor);
296                    cursor.set_position(final_addr);
297                    // Check for 0 first, as it shouldn't be interpreted as a NOP
298                    if potential_opcode == 0 {
299                        break;
300                    }
301                    match opcodes::operands_count(potential_opcode) {
302                        Some(_) => {
303                            entry_points.insert(final_addr as u32);
304                            continue 'parse_loop;
305                        },
306                        None => break,
307                    };
308                }
309            }
310        }
311
312        let safety = self.function_safety(addr, &instructions);
313        let blocks = calculate_basic_blocks(instructions, entry_points, exit_branches);
314
315        Function {
316            addr,
317            argument_mode,
318            blocks,
319            locals,
320            safety,
321        }
322    }
323
324    fn disassemble_instruction(&self, cursor: &mut Cursor<&[u8]>) -> Instruction {
325        use Operand::*;
326
327        let addr = cursor.position() as u32;
328        let opcode = decode_opcode(cursor);
329
330        // Extract the operands
331        let mut operands = Vec::default();
332        let operands_count = opcodes::operands_count(opcode).expect(&format!("Unknown opcode {} at address {}", opcode, addr)) as usize;
333        let mut operand_types = Vec::default();
334        while operand_types.len() < operands_count {
335            let types = cursor.get_u8();
336            operand_types.push(types & 0x0F);
337            operand_types.push(types >> 4);
338        }
339        for i in 0..operands_count {
340            let operand = match operand_types[i] {
341                0 => Constant(0),
342                1 => Constant(cursor.get_i8() as i32 as u32),
343                2 => Constant(cursor.get_i16() as i32 as u32),
344                3 => Constant(cursor.get_u32()),
345                5 => Memory(cursor.get_u8() as u32),
346                6 => Memory(cursor.get_u16() as u32),
347                7 => Memory(cursor.get_u32()),
348                8 => Stack,
349                9 => Local(cursor.get_u8() as u32),
350                10 => Local(cursor.get_u16() as u32),
351                11 => Local(cursor.get_u32()),
352                13 => RAM(cursor.get_u8() as u32),
353                14 => RAM(cursor.get_u16() as u32),
354                15 => RAM(cursor.get_u32()),
355                x => panic!("Invalid operand mode {} in instruction {}", x, addr),
356            };
357            operands.push(operand);
358        }
359
360        // Calculate branch targets
361        use BranchTarget::*;
362        let calc_branch = || -> BranchTarget {
363            match *operands.last().unwrap() {
364                Constant(target) => {
365                    if opcode == opcodes::OP_JUMPABS {
366                        Absolute(target)
367                    }
368                    else {
369                        if target == 0 || target == 1 {
370                            Return(target)
371                        }
372                        else {
373                            Absolute((cursor.position() as i32 + target as i32 - 2) as u32)
374                        }
375                    }
376                },
377                _ => Dynamic,
378            }
379        };
380        let branch = match opcodes::instruction_branches(opcode) {
381            true => Some(calc_branch()),
382            false => None,
383        };
384
385        // Extract the storer(s) - in reverse order (makes it simpler for OP_FMOD)
386        use opcodes::StoreMode::*;
387        let (storer2, storer) = match opcodes::instruction_stores(opcode) {
388            DoesNotStore => (Operand::Constant(0), Operand::Constant(0)),
389            LastOperand => (Operand::Constant(0), operands.pop().unwrap()),
390            LastTwoOperands => (operands.pop().unwrap(), operands.pop().unwrap()),
391        };
392
393        Instruction {
394            addr,
395            opcode,
396            operands,
397            branch,
398            storer,
399            storer2,
400            next: cursor.position() as u32,
401        }
402    }
403
404    // Check the function safety overrides
405    fn function_safety(&self, addr: u32, instructions: &Vec<Instruction>) -> FunctionSafety {
406        if let Some(functions) = &self.safe_function_overides {
407            if functions.contains(&addr) {
408                return FunctionSafety::SafetyTBD;
409            }
410        }
411        if let Some(functions) = &self.unsafe_function_overides {
412            if functions.contains(&addr) {
413                return FunctionSafety::Unsafe;
414            }
415        }
416        opcodes::function_safety(instructions)
417    }
418}
419
420// Decode a variable length opcode
421fn decode_opcode(cursor: &mut Cursor<&[u8]>) -> u32 {
422    let opcode_byte = cursor.get_u8();
423    match opcode_byte {
424        0 ..= 0x7F => opcode_byte as u32,
425        0x80 ..= 0xBF => ((opcode_byte as u32 & 0x3F) << 8) | cursor.get_u8() as u32,
426        0xC0 ..= 0xFF => ((opcode_byte as u32 & 0x3F) << 24) | ((cursor.get_u8() as u32) << 16) | cursor.get_u16() as u32,
427    }
428}
429
430pub enum DecodingNode {
431    Branch(DecodingNodeBranch),
432    Leaf,
433    Terminator,
434}
435
436pub struct DecodingNodeBranch {
437    pub left: u32,
438    pub right: u32,
439}