1use fnv::FnvHashSet;
13
14use super::*;
15
16impl GlulxState {
17 pub fn disassemble(&mut self, image: &[u8]) -> FnvHashSet<(u32, u32)> {
18 let decoding_table = self.parse_string_decoding_table(image);
19
20 let mut edges = FnvHashSet::default();
21
22 let ram_start = self.read_addr(image, 8) as u64;
23 self.ramstart = ram_start as u32;
24 let decoding_table_addr = self.read_addr(image, 28);
25 let root_node_addr = self.read_addr(image, decoding_table_addr + 8);
26
27 let mut cursor = Cursor::new(image);
28
29 if let Some(functions) = &self.debug_function_data {
31 for (&addr, func) in functions {
32 cursor.set_position(addr as u64);
33 let function_type = cursor.get_u8();
34 self.functions.insert(func.addr, self.disassemble_function(&mut cursor, &mut edges, addr, Some(func.len), function_type));
35 }
36 return edges;
37 }
38
39 cursor.set_position(60);
42
43 while cursor.position() < ram_start {
45 let addr = cursor.position() as u32;
46 let object_type = cursor.get_u8();
47
48 match object_type {
49 0 => {},
51
52 0xC0 | 0xC1 => {
54 self.functions.insert(addr, self.disassemble_function(&mut cursor, &mut edges, addr, None, object_type));
55 },
56
57 0xE0 => {
59 if self.stop_on_string {
60 break;
61 }
62 while cursor.get_u8() != 0 {}
63 },
64 0xE2 => {
65 if self.stop_on_string {
66 break;
67 }
68 cursor.get_u8();
69 cursor.get_u8();
70 cursor.get_u8();
71 while cursor.get_u32() != 0 {}
72 },
73 0xE1 => {
75 if self.stop_on_string {
76 break;
77 }
78
79 fn get_node<'a>(table: &'a FnvHashMap<u32, DecodingNode>, addr: u32) -> &'a DecodingNode {
80 table.get(&addr).unwrap()
81 }
82 fn get_node_branch_addresses(node: &DecodingNode) -> [u32; 2] {
83 match node {
84 DecodingNode::Branch(branch) => {
85 [branch.left, branch.right]
86 },
87 _ => panic!("Decoding node is not a branch"),
88 }
89 }
90
91 let root_node = get_node(&decoding_table, root_node_addr);
92 let root_branches = get_node_branch_addresses(root_node);
93 let mut left_node = root_branches[0];
94 let mut right_node = root_branches[1];
95
96 let mut byte = cursor.get_u8();
97 let mut bits = 8;
98 loop {
99 let bit = byte & 0x01;
100 bits -= 1;
101 byte >>= 1;
102 let node = get_node(&decoding_table, if bit == 0 {left_node} else {right_node});
103 match node {
104 DecodingNode::Terminator => {
105 break;
106 },
107 DecodingNode::Leaf => {
108 left_node = root_branches[0];
109 right_node = root_branches[1];
110 },
111 DecodingNode::Branch(branch) => {
112 left_node = branch.left;
113 right_node = branch.right;
114 },
115 }
116 if bits == 0 {
117 bits = 8;
118 byte = cursor.get_u8();
119 }
120 }
121 },
122
123 _ => {
125 println!("Stopping on unknown object type {:?} at {:?}", object_type, addr);
126 break;
127 },
128 }
129 };
130
131 edges
133 }
134
135 pub fn parse_string_decoding_table(&self, image: &[u8]) -> FnvHashMap<u32, DecodingNode> {
137 let mut table = FnvHashMap::default();
138 let mut cursor = Cursor::new(image);
139
140 let decoding_table_addr = self.read_addr(image, 28);
141 let root_node_addr = self.read_addr(image, decoding_table_addr + 8);
142
143 let mut nodes_to_process = vec![root_node_addr];
146 loop {
147 let addr = nodes_to_process.pop().unwrap();
148 cursor.set_position(addr as u64);
149 let node_type = cursor.get_u8();
150 let node = match node_type {
151 0x00 => {
152 let left = cursor.get_u32();
153 let right = cursor.get_u32();
154 nodes_to_process.push(left);
155 nodes_to_process.push(right);
156 DecodingNode::Branch(DecodingNodeBranch {
157 left,
158 right,
159 })
160 },
161 0x01 => DecodingNode::Terminator,
162 0x02 => {
163 cursor.get_u8();
164 DecodingNode::Leaf
165 },
166 0x03 => {
167 while cursor.get_u8() != 0 {}
168 DecodingNode::Leaf
169 },
170 0x04 | 0x08 | 0x09 => {
171 cursor.get_u32();
172 DecodingNode::Leaf
173 },
174 0x05 => {
175 while cursor.get_u32() != 0 {}
176 DecodingNode::Leaf
177 },
178 0x0A | 0x0B => {
179 let _addr = cursor.get_u32();
180 let count = cursor.get_u32();
181 for _ in 0..count {
182 cursor.get_u32();
183 }
184 DecodingNode::Leaf
185 }
186 _ => panic!("Invalid string decoding node at {}", addr),
187 };
188 table.insert(addr, node);
189 if nodes_to_process.len() == 0 {
190 break;
191 }
192 }
193
194 table
195 }
196
197 fn disassemble_function(&self, cursor: &mut Cursor<&[u8]>, edges: &mut FnvHashSet<(u32, u32)>, addr: u32, len: Option<u32>, function_mode: u8) -> Function {
198 let argument_mode = match function_mode {
199 0xC0 => FunctionArgumentMode::Stack,
200 0xC1 => FunctionArgumentMode::Locals,
201 _ => unreachable!(),
202 };
203
204 let mut locals = 0;
206 loop {
207 let local_type = cursor.get_u8();
208 let count = cursor.get_u8() as u32;
209 if local_type == 0 {
210 break
211 }
212 if local_type != 4 {
213 panic!("1 and 2 byte locals are not supported in function {}", addr);
214 }
215 locals += count;
216 }
217
218 let mut entry_points = FnvHashSet::default();
220 let mut exit_branches = FnvHashMap::default();
221
222 let end_addr = len.map(|l| addr + l);
224 let mut instructions = Vec::new();
225 let mut instruction_addresses = FnvHashSet::default();
226 'parse_loop: loop {
227 let instruction = self.disassemble_instruction(cursor);
228 instruction_addresses.insert(instruction.addr);
229
230 if let Some(target) = instruction.branch {
232 match instruction.opcode {
233 opcodes::OP_JUMP | opcodes::OP_JUMPABS => {
234 let mut branch_targets = Vec::new();
235 if let BranchTarget::Absolute(addr) = target {
236 entry_points.insert(addr);
237 branch_targets.push(addr);
238 }
239 exit_branches.insert(instruction.addr, branch_targets);
240 },
241 _ => {
242 let returns = match target {
245 BranchTarget::Return(_) => true,
246 _ => false,
247 };
248 if !returns || instruction.opcode == opcodes::OP_CATCH {
249 entry_points.insert(instruction.next);
250 let mut branch_targets = vec![instruction.next];
251 if let BranchTarget::Absolute(addr) = target {
252 entry_points.insert(addr);
253 branch_targets.push(addr);
254 }
255 exit_branches.insert(instruction.addr, branch_targets);
256 }
257 },
258 };
259 }
260 let opcode = instruction.opcode;
261
262 if opcodes::instruction_calls(opcode) {
264 if let Operand::Constant(callee_addr) = instruction.operands[0] {
265 edges.insert((addr, callee_addr));
266 }
267 }
268
269 if opcodes::instruction_resumes(opcode) {
271 entry_points.insert(instruction.next);
272 }
273
274 instructions.push(instruction);
275
276 if let Some(end_addr) = end_addr {
278 if cursor.position() as u32 == end_addr {
279 break;
280 }
281 continue;
282 }
283
284 if opcodes::instruction_halts(opcode) {
285 if !entry_points.contains(&(cursor.position() as u32)) {
288 for _ in entry_points.difference(&instruction_addresses) {
290 continue 'parse_loop;
291 }
292
293 let final_addr = cursor.position();
295 let potential_opcode = decode_opcode(cursor);
296 cursor.set_position(final_addr);
297 if potential_opcode == 0 {
299 break;
300 }
301 match opcodes::operands_count(potential_opcode) {
302 Some(_) => {
303 entry_points.insert(final_addr as u32);
304 continue 'parse_loop;
305 },
306 None => break,
307 };
308 }
309 }
310 }
311
312 let safety = self.function_safety(addr, &instructions);
313 let blocks = calculate_basic_blocks(instructions, entry_points, exit_branches);
314
315 Function {
316 addr,
317 argument_mode,
318 blocks,
319 locals,
320 safety,
321 }
322 }
323
324 fn disassemble_instruction(&self, cursor: &mut Cursor<&[u8]>) -> Instruction {
325 use Operand::*;
326
327 let addr = cursor.position() as u32;
328 let opcode = decode_opcode(cursor);
329
330 let mut operands = Vec::default();
332 let operands_count = opcodes::operands_count(opcode).expect(&format!("Unknown opcode {} at address {}", opcode, addr)) as usize;
333 let mut operand_types = Vec::default();
334 while operand_types.len() < operands_count {
335 let types = cursor.get_u8();
336 operand_types.push(types & 0x0F);
337 operand_types.push(types >> 4);
338 }
339 for i in 0..operands_count {
340 let operand = match operand_types[i] {
341 0 => Constant(0),
342 1 => Constant(cursor.get_i8() as i32 as u32),
343 2 => Constant(cursor.get_i16() as i32 as u32),
344 3 => Constant(cursor.get_u32()),
345 5 => Memory(cursor.get_u8() as u32),
346 6 => Memory(cursor.get_u16() as u32),
347 7 => Memory(cursor.get_u32()),
348 8 => Stack,
349 9 => Local(cursor.get_u8() as u32),
350 10 => Local(cursor.get_u16() as u32),
351 11 => Local(cursor.get_u32()),
352 13 => RAM(cursor.get_u8() as u32),
353 14 => RAM(cursor.get_u16() as u32),
354 15 => RAM(cursor.get_u32()),
355 x => panic!("Invalid operand mode {} in instruction {}", x, addr),
356 };
357 operands.push(operand);
358 }
359
360 use BranchTarget::*;
362 let calc_branch = || -> BranchTarget {
363 match *operands.last().unwrap() {
364 Constant(target) => {
365 if opcode == opcodes::OP_JUMPABS {
366 Absolute(target)
367 }
368 else {
369 if target == 0 || target == 1 {
370 Return(target)
371 }
372 else {
373 Absolute((cursor.position() as i32 + target as i32 - 2) as u32)
374 }
375 }
376 },
377 _ => Dynamic,
378 }
379 };
380 let branch = match opcodes::instruction_branches(opcode) {
381 true => Some(calc_branch()),
382 false => None,
383 };
384
385 use opcodes::StoreMode::*;
387 let (storer2, storer) = match opcodes::instruction_stores(opcode) {
388 DoesNotStore => (Operand::Constant(0), Operand::Constant(0)),
389 LastOperand => (Operand::Constant(0), operands.pop().unwrap()),
390 LastTwoOperands => (operands.pop().unwrap(), operands.pop().unwrap()),
391 };
392
393 Instruction {
394 addr,
395 opcode,
396 operands,
397 branch,
398 storer,
399 storer2,
400 next: cursor.position() as u32,
401 }
402 }
403
404 fn function_safety(&self, addr: u32, instructions: &Vec<Instruction>) -> FunctionSafety {
406 if let Some(functions) = &self.safe_function_overides {
407 if functions.contains(&addr) {
408 return FunctionSafety::SafetyTBD;
409 }
410 }
411 if let Some(functions) = &self.unsafe_function_overides {
412 if functions.contains(&addr) {
413 return FunctionSafety::Unsafe;
414 }
415 }
416 opcodes::function_safety(instructions)
417 }
418}
419
420fn decode_opcode(cursor: &mut Cursor<&[u8]>) -> u32 {
422 let opcode_byte = cursor.get_u8();
423 match opcode_byte {
424 0 ..= 0x7F => opcode_byte as u32,
425 0x80 ..= 0xBF => ((opcode_byte as u32 & 0x3F) << 8) | cursor.get_u8() as u32,
426 0xC0 ..= 0xFF => ((opcode_byte as u32 & 0x3F) << 24) | ((cursor.get_u8() as u32) << 16) | cursor.get_u16() as u32,
427 }
428}
429
430pub enum DecodingNode {
431 Branch(DecodingNodeBranch),
432 Leaf,
433 Terminator,
434}
435
436pub struct DecodingNodeBranch {
437 pub left: u32,
438 pub right: u32,
439}