psdisasm/
lib.rs

1mod arguments;
2mod instruction;
3
4use arguments::ArgumentTypes;
5pub use instruction::Instruction;
6
7const REGISTERS: [&str; 32] = [
8    "0",   // Zero register
9    "$at", // Assembler temporary register
10    "$v0", "$v1", // Return value registers
11    "$a0", "$a1", "$a2", "$a3", // Argument registers
12    "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", // Temporary registers
13    "$s0", "$s1", "$s2", "$s3", "$s4", "$s5", "$s6", "$s7", // Saved registers
14    "$t8", "$t9", // More temporary registers
15    "$k0", "$k1", // Kernel registers
16    "$gp", "$sp", // Global pointer, stack pointer
17    "$fp", "$ra", // Frame pointer, return address
18];
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub struct Disassembler {
22    /// Produce easier to read disassembly output.  
23    /// Example transformations:
24    /// - sll $zero, $zero, 0 -> nop
25    /// - lui $t1, 1 -> move $t1, 0x10000
26    /// - add $t1, $t1, 2 -> add $t1, 2
27    pub pretty: bool,
28}
29
30impl Default for Disassembler {
31    fn default() -> Self {
32        Disassembler { pretty: true }
33    }
34}
35
36impl Disassembler {
37    /// Disassemble an instruction
38    pub fn disassemble(&self, ins: u32, pc: u32) -> String {
39        let ins = Instruction::new(ins);
40
41        match ins.opcode() {
42            0x00 => match ins.funct() {
43                0x00 => self.format_shamt(ins, "sll"),
44                0x02 => self.format_shamt(ins, "srl"),
45                0x03 => self.format_shamt(ins, "sra"),
46                0x04 => self.format_shift_r(ins, "sllv"),
47                0x06 => self.format_shift_r(ins, "srlv"),
48                0x07 => self.format_shift_r(ins, "srav"),
49                0x08 => self.format_instruction(ins, "jr", ArgumentTypes::S, pc),
50                0x09 => self.format_instruction(ins, "jalr", ArgumentTypes::S_D, pc),
51                0x0c => self.format_instruction(ins, "syscall", ArgumentTypes::None, 0),
52                0x0d => self.format_instruction(ins, "break", ArgumentTypes::None, 0),
53                0x10 => self.format_instruction(ins, "mfhi", ArgumentTypes::D, 0),
54                0x11 => self.format_instruction(ins, "mthi", ArgumentTypes::S, 0),
55                0x12 => self.format_instruction(ins, "mflo", ArgumentTypes::D, 0),
56                0x13 => self.format_instruction(ins, "mtlo", ArgumentTypes::S, 0),
57                0x18 => self.format_instruction(ins, "mult", ArgumentTypes::S_T, 0),
58                0x19 => self.format_instruction(ins, "multu", ArgumentTypes::S_T, 0),
59                0x1a => self.format_instruction(ins, "div", ArgumentTypes::S_T, 0),
60                0x1b => self.format_instruction(ins, "divu", ArgumentTypes::S_T, 0),
61                0x20 => self.format_arith_r(ins, "add"),
62                0x21 => self.format_arith_r(ins, "addu"),
63                0x22 => self.format_arith_r(ins, "sub"),
64                0x23 => self.format_arith_r(ins, "subu"),
65                0x24 => self.format_arith_r(ins, "and"),
66                0x25 => self.format_arith_r(ins, "or"),
67                0x26 => self.format_arith_r(ins, "xor"),
68                0x27 => self.format_instruction(ins, "nor", ArgumentTypes::D_S_T, 0),
69                0x2a => self.format_instruction(ins, "slt", ArgumentTypes::D_S_T, 0),
70                0x2b => self.format_instruction(ins, "sltu", ArgumentTypes::D_S_T, 0),
71                _ => format!("Invalid opcode 0x00 with funct {:x}", ins.funct()),
72            },
73            0x01 => {
74                // This format abuses the `rt` field for a sub-opcode
75                let link = ins.rt() & 0x10 == 0x10;
76                match (ins.rt() & 1, link) {
77                    (0, false) => self.format_instruction(ins, "bltz", ArgumentTypes::S_Jump, pc),
78                    (0, true) => self.format_instruction(ins, "bltzal", ArgumentTypes::S_Jump, pc),
79                    (1, false) => self.format_instruction(ins, "bgez", ArgumentTypes::S_Jump, pc),
80                    (1, true) => self.format_instruction(ins, "bgezal", ArgumentTypes::S_Jump, pc),
81                    _ => unreachable!(),
82                }
83            }
84            0x02 => self.format_instruction(ins, "j", ArgumentTypes::Jump, pc),
85            0x03 => self.format_instruction(ins, "jal", ArgumentTypes::Jump, pc),
86            0x04 => self.format_instruction(ins, "beq", ArgumentTypes::S_T_Jump, pc),
87            0x05 => self.format_instruction(ins, "bne", ArgumentTypes::S_T_Jump, pc),
88            0x06 => self.format_instruction(ins, "blez", ArgumentTypes::S_Jump, pc),
89            0x07 => self.format_instruction(ins, "bgtz", ArgumentTypes::S_Jump, pc),
90            0x08 => self.format_arith_imm_signed(ins, "addi"),
91            0x09 => self.format_arith_imm_signed(ins, "addiu"),
92            0x0a => self.format_instruction(ins, "slti", ArgumentTypes::T_S_SImm, 0),
93            0x0b => self.format_instruction(ins, "sltiu", ArgumentTypes::T_S_Imm, 0),
94            0x0c => self.format_arith_imm_unsigned(ins, "andi"),
95            0x0d => self.format_arith_imm_unsigned(ins, "ori"),
96            0x0e => self.format_arith_imm_unsigned(ins, "xori"),
97            0x0f => self.format_lui(ins),
98            0x10..=0x13 => self.format_coprocessor_instruction(ins),
99            0x20 => self.format_memory(ins, "lb"),
100            0x21 => self.format_memory(ins, "lh"),
101            0x22 => self.format_memory(ins, "lwl"),
102            0x23 => self.format_memory(ins, "lw"),
103            0x24 => self.format_memory(ins, "lbu"),
104            0x25 => self.format_memory(ins, "lhu"),
105            0x26 => self.format_memory(ins, "lwr"),
106            0x28 => self.format_memory(ins, "sb"),
107            0x29 => self.format_memory(ins, "sh"),
108            0x2a => self.format_memory(ins, "swl"),
109            0x2b => self.format_memory(ins, "sw"),
110            0x2e => self.format_memory(ins, "swr"),
111            0x32 => self.format_memory_cop2(ins, "lwc2"),
112            0x3a => self.format_memory_cop2(ins, "swc2"),
113            _ => format!("Unknown opcode: {:#x}", ins.opcode()),
114        }
115    }
116
117    /// Disassemble an instruction with additional context.
118    pub fn disassemble_with_context(&self, ins: u32, pc: u32, registers: &[u32; 32]) -> String {
119        let disassembled = self.disassemble(ins, pc);
120        let ins = Instruction::new(ins);
121
122        let mut kind = "";
123        let mut branch_taken = false;
124        let mut address = 0;
125
126        match ins.opcode() {
127            0x01 => {
128                // This format abuses the `rt` field for a sub-opcode
129                match ins.rt() & 1 {
130                    0 => {
131                        kind = "branch";
132                        branch_taken = (registers[ins.rs()] as i32) < 0
133                    }
134                    1 => {
135                        kind = "branch";
136                        branch_taken = (registers[ins.rs()] as i32) >= 0
137                    }
138                    _ => {}
139                }
140            }
141            0x04 => {
142                kind = "branch";
143                branch_taken = registers[ins.rs()] == registers[ins.rt()]
144            }
145            0x05 => {
146                kind = "branch";
147                branch_taken = registers[ins.rs()] != registers[ins.rt()]
148            }
149            0x06 => {
150                kind = "branch";
151                branch_taken = registers[ins.rs()] <= 0
152            }
153            0x07 => {
154                kind = "branch";
155                branch_taken = registers[ins.rs()] > 0
156            }
157            0x20..=0x26 | 0x28..=0x2e => {
158                // Memory access instructions
159                kind = "memory";
160                address = (registers[ins.rs()] as i32 + ins.simm16()) as u32;
161            }
162            0x32 | 0x3a => {
163                // Coprocessor memory access instructions
164                kind = "memory";
165                address = (registers[ins.rs()] as i32 + ins.simm16()) as u32;
166            }
167            _ => {}
168        }
169
170        match kind {
171            "branch" => {
172                if branch_taken {
173                    format!("{disassembled}\t(taken)")
174                } else {
175                    format!("{disassembled}\t(not taken)")
176                }
177            }
178            "memory" => {
179                format!("{disassembled}\t({address:x})")
180            }
181            _ => disassembled,
182        }
183    }
184
185    fn format_shamt(&self, ins: Instruction, name: &str) -> String {
186        if self.pretty {
187            if ins.rd() == 0 {
188                return "nop".to_string();
189            }
190
191            if ins.shamt() == 0 {
192                return format!("move {}, {}", REGISTERS[ins.rd()], REGISTERS[ins.rt()]);
193            }
194        }
195
196        self.format_instruction(ins, name, ArgumentTypes::D_T_Shift, 0)
197    }
198
199    fn format_shift_r(&self, ins: Instruction, name: &str) -> String {
200        if self.pretty {
201            if ins.rd() == 0 && ins.rt() == 0 {
202                return "nop".to_string();
203            }
204
205            if ins.rd() == ins.rt() {
206                return format!("{name} {}, {}", REGISTERS[ins.rd()], REGISTERS[ins.rs()]);
207            }
208        }
209
210        self.format_instruction(ins, name, ArgumentTypes::D_T_S, 0)
211    }
212
213    fn format_arith_r(&self, ins: Instruction, name: &str) -> String {
214        if self.pretty {
215            if ins.rd() == 0 {
216                return "nop".to_string();
217            }
218
219            if ins.rd() == ins.rs() {
220                return format!("{name} {}, {}", REGISTERS[ins.rd()], REGISTERS[ins.rt()]);
221            }
222
223            if ins.rs() == 0 && (name == "add" || name == "addu" || name == "or" || name == "xor") {
224                return format!("move {}, {}", REGISTERS[ins.rd()], REGISTERS[ins.rt()]);
225            }
226
227            if ins.rt() == 0 && (name == "add" || name == "addu" || name == "or" || name == "xor") {
228                return format!("move {}, {}", REGISTERS[ins.rd()], REGISTERS[ins.rs()]);
229            }
230        }
231
232        self.format_instruction(ins, name, ArgumentTypes::D_S_T, 0)
233    }
234
235    fn format_arith_imm_signed(&self, ins: Instruction, name: &str) -> String {
236        let mut immediate = ins.simm16();
237        let sign = if immediate < 0 { "-" } else { "" };
238        immediate = immediate.abs();
239
240        let imm_str = if immediate < 10 {
241            format!("{sign}{immediate}")
242        } else {
243            format!("{sign}{immediate:#x}")
244        };
245
246        if self.pretty {
247            if ins.rt() == 0 {
248                return "nop".to_string();
249            }
250
251            if ins.rt() == ins.rs() {
252                return format!("{name} {}, {imm_str}", REGISTERS[ins.rt()]);
253            }
254
255            if ins.rs() == 0 {
256                return format!("move {}, {imm_str}", REGISTERS[ins.rt()]);
257            }
258        }
259
260        self.format_instruction(ins, name, ArgumentTypes::T_S_SImm, 0)
261    }
262
263    fn format_arith_imm_unsigned(&self, ins: Instruction, name: &str) -> String {
264        let immediate = ins.imm16();
265
266        let imm_str = if immediate < 10 {
267            format!("{immediate}")
268        } else {
269            format!("{immediate:#x}")
270        };
271
272        if self.pretty {
273            if ins.rt() == 0 {
274                return "nop".to_string();
275            }
276
277            if ins.rt() == ins.rs() {
278                return format!("{name} {}, {imm_str}", REGISTERS[ins.rt()]);
279            }
280
281            if ins.rs() == 0 && name != "andi" {
282                return format!("move {}, {imm_str}", REGISTERS[ins.rt()]);
283            }
284        }
285
286        self.format_instruction(ins, name, ArgumentTypes::T_S_SImm, 0)
287    }
288
289    fn format_lui(&self, ins: Instruction) -> String {
290        format!(
291            "{} {}, {:#x}",
292            if self.pretty { "move" } else { "lui" },
293            REGISTERS[ins.rt()],
294            if self.pretty {
295                ins.imm16() << 16
296            } else {
297                ins.imm16()
298            }
299        )
300    }
301
302    fn format_memory(&self, ins: Instruction, name: &str) -> String {
303        let immediate = ins.simm16();
304        let sign = if immediate < 0 { "-" } else { "" };
305        let immediate = immediate.abs();
306
307        let imm_str = if immediate == 0 {
308            "".to_string()
309        } else if immediate < 64 {
310            format!("{sign}{immediate}")
311        } else {
312            format!("{sign}{immediate:#x}")
313        };
314
315        if self.pretty {
316            return format!(
317                "{name} {}, {imm_str}({})",
318                REGISTERS[ins.rt()],
319                REGISTERS[ins.rs()]
320            );
321        }
322
323        self.format_instruction(ins, name, ArgumentTypes::T_Mem, 0)
324    }
325
326    fn format_memory_cop2(&self, ins: Instruction, name: &str) -> String {
327        let immediate = ins.simm16();
328        let sign = if immediate < 0 { "-" } else { "" };
329        let immediate = immediate.abs();
330
331        let imm_str = if immediate == 0 {
332            "".to_string()
333        } else if immediate < 64 {
334            format!("{sign}{immediate}")
335        } else {
336            format!("{sign}{immediate:#x}")
337        };
338
339        if self.pretty {
340            return format!(
341                "{name} gte_r{}, {imm_str}({})",
342                ins.rt(),
343                REGISTERS[ins.rs()]
344            );
345        }
346
347        self.format_instruction(ins, name, ArgumentTypes::T_Mem, 0)
348    }
349
350    /// Format the instruction with its name and arguments
351    fn format_instruction(
352        &self,
353        ins: Instruction,
354        name: &str,
355        arg_types: ArgumentTypes,
356        pc: u32,
357    ) -> String {
358        let args = match arg_types {
359            ArgumentTypes::None => String::new(),
360            ArgumentTypes::D_S_T => format!(
361                "{}, {}, {}",
362                REGISTERS[ins.rd()],
363                REGISTERS[ins.rs()],
364                REGISTERS[ins.rt()]
365            ),
366            ArgumentTypes::D_T_S => format!(
367                "{}, {}, {}",
368                REGISTERS[ins.rd()],
369                REGISTERS[ins.rt()],
370                REGISTERS[ins.rs()]
371            ),
372            ArgumentTypes::D_T_Shift => format!(
373                "{}, {}, {}",
374                REGISTERS[ins.rd()],
375                REGISTERS[ins.rt()],
376                ins.shamt()
377            ),
378            ArgumentTypes::T_S_SImm => format!(
379                "{}, {}, {:#x}",
380                REGISTERS[ins.rt()],
381                REGISTERS[ins.rs()],
382                ins.simm16()
383            ),
384            ArgumentTypes::T_Imm => format!("{}, {:#x}", REGISTERS[ins.rt()], ins.imm16()),
385            ArgumentTypes::T_S_Imm => format!(
386                "{}, {}, {:#x}",
387                REGISTERS[ins.rt()],
388                REGISTERS[ins.rs()],
389                ins.imm16()
390            ),
391            ArgumentTypes::T_Mem => format!(
392                "{}, {}({})",
393                REGISTERS[ins.rt()],
394                ins.simm16(),
395                REGISTERS[ins.rs()]
396            ),
397            ArgumentTypes::S => format!("{}", REGISTERS[ins.rs()]),
398            ArgumentTypes::D => format!("{}", REGISTERS[ins.rd()]),
399            ArgumentTypes::S_D => format!("{}, {}", REGISTERS[ins.rs()], REGISTERS[ins.rd()]),
400            ArgumentTypes::S_T => format!("{}, {}", REGISTERS[ins.rs()], REGISTERS[ins.rt()]),
401            ArgumentTypes::S_T_Jump => {
402                let target = pc.wrapping_add((ins.simm16() << 2) as u32);
403
404                format!(
405                    "{}, {}, {:#x}",
406                    REGISTERS[ins.rs()],
407                    REGISTERS[ins.rt()],
408                    target
409                )
410            }
411            ArgumentTypes::S_Jump => {
412                let target = pc.wrapping_add((ins.simm16() << 2) as u32);
413                format!("{}, {:#x}", REGISTERS[ins.rs()], target)
414            }
415            ArgumentTypes::Jump => {
416                let target = (pc & 0xf000_0000) | (ins.jump_target() << 2);
417                format!("{:#x}", target)
418            }
419        };
420
421        format!("{name} {args}")
422    }
423
424    /// Coprocessor instruction have unique formatting rules. This function formats
425    /// them based on the opcode and the specific coprocessor instruction.
426    fn format_coprocessor_instruction(&self, ins: Instruction) -> String {
427        // Get the coprocessor number from the opcode
428        let cop = ins.opcode() & 3;
429
430        if ins.cop_execute() {
431            // Coprocessor specific opcode
432            match cop {
433                0 => self.format_cop0_instruction(ins),
434                1 => format!("cop{} execute: {:x}", cop, ins.cop_instruction()),
435                2 => self.format_gte_instruction(ins),
436                3 => format!("cop{} execute: {:x}", cop, ins.cop_instruction()),
437                _ => unreachable!(),
438            }
439        } else {
440            let gpr = REGISTERS[ins.rt()];
441            let cop_reg = ins.rd();
442
443            match ins.cop_funct() {
444                0 => format!("mfc{cop} {gpr}, cop{cop_reg}"),
445                2 => format!("cfc{cop} {gpr}, cop{}", cop_reg + 32),
446                4 => format!("mtc{cop} cop{cop_reg}, {gpr}"),
447                6 => format!("ctc{cop} cop{}, {gpr}", cop_reg + 32),
448                _ => format!("unknown cop{} funct: {:x}", cop, ins.cop_funct()),
449            }
450        }
451    }
452
453    fn format_cop0_instruction(&self, ins: Instruction) -> String {
454        if ins.cop_instruction() == 0x10 {
455            "rfe".to_string()
456        } else {
457            format!("cop0 execute: {:x}", ins.cop_instruction())
458        }
459    }
460
461    fn format_gte_instruction(&self, ins: Instruction) -> String {
462        let unsigned = if ins.cop_instruction() & (1 << 10) != 0 {
463            "U"
464        } else {
465            "u"
466        };
467        let fixed_point = if ins.cop_instruction() & (1 << 19) != 0 {
468            "F"
469        } else {
470            "f"
471        };
472
473        let mvmva_matrix = (ins.cop_instruction() >> 17) & 3;
474        let mvmva_vector = (ins.cop_instruction() >> 15) & 3;
475        let mvmva_tx_vector = (ins.cop_instruction() >> 13) & 3;
476
477        let base = match ins.cop_instruction() & 0x3f {
478            0x01 => "gte.rtps".to_string(),
479            0x06 => "gte.nclip".to_string(),
480            0x0c => "gte.op".to_string(),
481            0x10 => "gte.dpcs".to_string(),
482            0x11 => "gte.intpl".to_string(),
483            0x12 => "gte.mvmva".to_string(),
484            0x13 => "gte.ncds".to_string(),
485            0x14 => "gte.cdp".to_string(),
486            0x16 => "gte.ncdt".to_string(),
487            0x1b => "gte.nccs".to_string(),
488            0x1c => "gte.cc".to_string(),
489            0x1e => "gte.ncs".to_string(),
490            0x20 => "gte.nct".to_string(),
491            0x28 => "gte.sqr".to_string(),
492            0x29 => "gte.dcpl".to_string(),
493            0x2a => "gte.dpct".to_string(),
494            0x2d => "gte.avsz3".to_string(),
495            0x2e => "gte.avsz4".to_string(),
496            0x30 => "gte.rtpt".to_string(),
497            0x3d => "gte.gpf".to_string(),
498            0x3e => "gte.gpl".to_string(),
499            0x3f => "gte.ncct".to_string(),
500            _ => format!("gte execute: {:x}", ins.cop_instruction()),
501        };
502
503        let flags = unsigned.to_string() + fixed_point;
504        if base == "gte.mvmva" {
505            format!(
506                "{base} {flags}, midx: {}, vidx: {}, tvidx: {}",
507                mvmva_matrix, mvmva_vector, mvmva_tx_vector
508            )
509        } else {
510            format!("{base} {flags}")
511        }
512    }
513}