mosis/
lib.rs

1//! A simple disassembler for [`MOSIS`], a pen and paper instruction format for teaching and
2//! learning reverse engineering.
3//!
4//! [`MOSIS`]: https://github.com/JHUAPL/Beat-the-Machine
5
6/// `MOSIS` general purpose registers.
7#[derive(Debug, PartialEq, Eq, Copy, Clone)]
8#[repr(u8)]
9pub enum Register {
10    R0,
11    R1,
12    R2,
13    R3,
14    R4,
15    R5,
16    R6,
17    R7,
18    R8,
19    R9,
20    Ra,
21    Rb,
22    Rc,
23    Rd,
24    Re,
25    Rf,
26}
27
28impl From<Register> for u8 {
29    fn from(reg: Register) -> Self {
30        reg as u8
31    }
32}
33
34impl TryFrom<u8> for Register {
35    type Error = MOSISError;
36    fn try_from(value: u8) -> Result<Self, Self::Error> {
37        use Register::*;
38        Ok(match value {
39            0 => R0,
40            1 => R1,
41            2 => R2,
42            3 => R3,
43            4 => R4,
44            5 => R5,
45            6 => R6,
46            7 => R7,
47            8 => R8,
48            9 => R9,
49            10 => Ra,
50            11 => Rb,
51            12 => Rc,
52            13 => Rd,
53            14 => Re,
54            15 => Rf,
55            _ => return Err(MOSISError::InvalidRegister),
56        })
57    }
58}
59
60/// Type alias to `u16`
61///
62/// Addresses are used as [`Jmp`][`Instruction::Jmp`] and [`Call`][`Instruction::Call`] targets.
63/// These are 16-bit aligned, so addresses are effectively a target index to jump to, within an
64/// array of instructions.
65pub type Address = u16;
66
67/// `MOSIS` instruction type.
68///
69/// All instructions are 16-bits. The first 4 bits represent the opcode, and the remaining 12 bits
70/// are used in an instruction-dependent way. Instructions have between 0 and 3 parameters. Most of
71/// the instruction names are fixed, except the conditional jump instruction (Jcc).
72#[derive(Debug, PartialEq, Eq)]
73pub enum Instruction {
74    /// Copy the contents of the `src` register to the `dst` register.
75    Mov { dst: Register, src: Register },
76
77    /// Move a number encoded in the instruction (`imm`)into the `dst` register.
78    Movi { dst: Register, imm: u8 },
79
80    /// Add two registers `x` and `y`, and store the result in the `dst` register.
81    Add { dst: Register, x: Register, y: Register },
82
83    /// Subtract register `y` from register `x`, (`x - y`), and store the result in the `dst`
84    /// register.
85    Sub { dst: Register, x: Register, y: Register },
86
87    /// Compare two registers (using [`Sub`][`Instruction::Sub`]) and set the flags register. Used by
88    /// [`Jcc`][`Instruction::Jcc`].
89    Cmp { x: Register, y: Register },
90
91    /// Jump if a certain condition is true (based on the result of a [`Cmp`][`Instruction::Cmp`]).
92    ///
93    /// The conditional jump instruction’s name can be rendered differently depending on the
94    /// condition (e.g. it becomes `JLT` when checking for "less than" or `JNE` when checking for
95    /// "not equal").
96    ///
97    /// If condition is true, jump to the offset; otherwise, execute the next instruction. The
98    /// offset is an instruction count, positive or negative based on the sign bit. Offset is from
99    /// the _beginning_ of the `Jcc` instruction.
100    Jcc { cond: Condition, offset: i8 },
101
102    /// Jump to a fixed [address][`Address`].
103    Jmp { address: Address },
104
105    /// Call a function at a fixed [address][`Address`].
106    ///
107    /// More specifically, push the address of the following instruction onto the call stack, then
108    /// [`Jmp`][`Instruction::Jmp`] to the address specified.
109    Call { address: Address },
110
111    /// Return from a function.
112    ///
113    /// More specifically, pop the address off the call stack, and [`Jmp`][`Instruction::Jmp`]
114    /// there.
115    Ret,
116
117    /// No operation. Do nothing.
118    Nop,
119
120    /// Read from external memory or device and store in `dst` register.
121    In { dst: Register },
122
123    /// Write contents of `src` register to external memory or device.
124    Out { src: Register },
125
126    /// Multiply two registers `x` and `y` and store the result in the `dst` register.
127    Mul { dst: Register, x: Register, y: Register },
128}
129
130/// Condition codes for the [conditional jump][`Instruction::Jcc`] instruction.
131#[derive(Debug, PartialEq, Eq, Clone, Copy)]
132#[repr(u8)]
133pub enum Condition {
134    Equal,
135    LessThan,
136    GreaterThan,
137    LessThanOrEqual,
138    GreaterThanOrEqual,
139    NotEqual,
140}
141
142/// Possible errors that occur when disassembling `u16` to [`Instruction`].
143#[derive(Debug, PartialEq, Eq, thiserror::Error)]
144pub enum MOSISError {
145    #[error("invalid opcode")]
146    InvalidOpcode,
147    #[error("invalid register")]
148    InvalidRegister,
149    #[error("invalid condition")]
150    InvalidCondition,
151}
152
153mod opcode {
154    pub const MOV: u8 = 0;
155    pub const MOVI: u8 = 1;
156    pub const ADD: u8 = 2;
157    pub const SUB: u8 = 3;
158    pub const CMP: u8 = 4;
159    pub const JCC: u8 = 5;
160    pub const JMP: u8 = 6;
161    pub const CALL: u8 = 7;
162    pub const RET: u8 = 8;
163    pub const NOP: u8 = 9;
164    pub const IN: u8 = 10;
165    pub const OUT: u8 = 11;
166    pub const MUL: u8 = 12;
167}
168
169impl Instruction {
170    /// Assemble an instruction to `u16`.
171    pub fn assemble(&self) -> u16 {
172        use Instruction::*;
173        let (a, b, c, d) = match *self {
174            Mov { dst, src } => (opcode::MOV, 0u8, dst.into(), src.into()),
175            Movi { dst, imm } => (opcode::MOVI, dst.into(), 0, imm),
176            Add { dst, x, y } => (opcode::ADD, dst.into(), x.into(), y.into()),
177            Sub { dst, x, y } => (opcode::SUB, dst.into(), x.into(), y.into()),
178            Cmp { x, y } => (opcode::CMP, 0, x.into(), y.into()),
179            Jcc { cond, offset } => {
180                let sign = if offset.is_negative() { 1 } else { 0 };
181                let val = sign << 7 | offset.abs() as u8;
182                (opcode::JCC, cond as u8, 0, val)
183            }
184            Jmp { address } => (opcode::JMP, (address >> 8) as u8, 0, address as u8),
185            Call { address } => (opcode::CALL, (address >> 8) as u8, 0, address as u8),
186            Ret => (opcode::RET, 0, 0, 0),
187            Nop => (opcode::NOP, 0, 0, 0),
188            In { dst } => (opcode::IN, 0, 0, dst.into()),
189            Out { src } => (opcode::OUT, 0, 0, src.into()),
190            Mul { dst, x, y } => (opcode::MUL, dst.into(), x.into(), y.into()),
191        };
192
193        let hi = a << 4 | b;
194        let lo = c << 4 | d;
195        u16::from_be_bytes([hi, lo])
196    }
197
198    /// All MOSIS instructions are 16 bits, but endianness is not specified in the documentation,
199    /// so disassembly simply takes a `u16` to skirt around this ambiguity.
200    pub fn disassemble(inst: u16) -> Result<Self, MOSISError> {
201        // opcode is always first nibble
202        let opcode = ((inst & 0xf000) >> 12) as u8;
203
204        // next nibbles are often useful
205        let a = ((inst & 0x0f00) >> 8) as u8;
206        let b = ((inst & 0x00f0) >> 4) as u8;
207        let c = (inst & 0x000f) as u8;
208
209        // address mask
210        let address = inst & 0x0fff;
211
212        Ok(match opcode {
213            opcode::MOV => Self::Mov { dst: b.try_into()?, src: c.try_into()? },
214            0b0001 => Self::Movi {
215                dst: a.try_into()?,
216                imm: (inst & 0x00ff) as u8,
217            },
218            opcode::ADD => Self::Add {
219                dst: a.try_into()?,
220                x: b.try_into()?,
221                y: c.try_into()?,
222            },
223            opcode::SUB => Self::Sub {
224                dst: a.try_into()?,
225                x: b.try_into()?,
226                y: c.try_into()?,
227            },
228            opcode::CMP => Self::Cmp { x: b.try_into()?, y: c.try_into()? },
229            opcode::JCC => {
230                let mut offset = (inst & 0x007f) as u8 as i8;
231                if inst & 0x0080 != 0 {
232                    offset = -offset;
233                }
234                Self::Jcc {
235                    cond: match a {
236                        0b0000 => Condition::Equal,
237                        0b0001 => Condition::LessThan,
238                        0b0010 => Condition::GreaterThan,
239                        0b0011 => Condition::LessThanOrEqual,
240                        0b0100 => Condition::GreaterThanOrEqual,
241                        0b0101 => Condition::NotEqual,
242                        _ => return Err(MOSISError::InvalidCondition),
243                    },
244                    offset,
245                }
246            }
247            opcode::JMP => Self::Jmp { address },
248            opcode::CALL => Self::Call { address },
249            opcode::RET => Self::Ret,
250            opcode::NOP => Self::Nop,
251            opcode::IN => Self::In { dst: c.try_into()? },
252            opcode::OUT => Self::Out { src: c.try_into()? },
253            opcode::MUL => Self::Mul {
254                dst: a.try_into()?,
255                x: b.try_into()?,
256                y: c.try_into()?,
257            },
258            _ => return Err(MOSISError::InvalidOpcode),
259        })
260    }
261}
262
263/// Iterates over a slice of bytes, performing a linear sweep disassembly to MOSIS
264/// [`Instruction`]s.
265///
266/// Iterate over a byte slice, disassembling as big endian `u16`s, yielding [`Instruction`]s or
267/// [`MOSISError`]s when disassembly fails.
268///
269/// # Examples
270///
271/// ```
272/// use mosis::{linear_sweep, Instruction::*, Register::*};
273///
274/// let bytes = [0x2312, 0x2a34, 0x3a98, 0x3fed];
275/// let mut x = linear_sweep(&bytes);
276///
277/// assert_eq!(x.next(), Some(Ok(Add { dst: R3, x: R1, y: R2 })));
278/// assert_eq!(x.next(), Some(Ok(Add { dst: Ra, x: R3, y: R4 })));
279/// assert_eq!(x.next(), Some(Ok(Sub { dst: Ra, x: R9, y: R8 })));
280/// assert_eq!(x.next(), Some(Ok(Sub { dst: Rf, x: Re, y: Rd })));
281/// assert_eq!(x.next(), None);
282/// ```
283///
284/// ```
285/// use mosis::linear_sweep;
286///
287/// # let bytes = &[0x2312, 0x2a34, 0x3a98, 0x3fed];
288/// for instruction in linear_sweep(bytes) {
289///     println!("{}", instruction.unwrap());
290/// }
291/// ```
292pub fn linear_sweep(data: &impl AsRef<[u16]>) -> LinearSweep<'_> {
293    LinearSweep { data: data.as_ref() }
294}
295
296pub struct LinearSweep<'a> {
297    data: &'a [u16],
298}
299
300impl Iterator for LinearSweep<'_> {
301    type Item = Result<Instruction, MOSISError>;
302
303    fn next(&mut self) -> Option<Self::Item> {
304        // get the next instruction to decode
305        let inst = self.data.first()?;
306
307        // advance the data slice
308        self.data = &self.data[1..];
309
310        Some(Instruction::disassemble(*inst))
311    }
312}
313
314impl TryFrom<u16> for Instruction {
315    type Error = MOSISError;
316    fn try_from(value: u16) -> Result<Self, Self::Error> {
317        Instruction::disassemble(value)
318    }
319}
320
321impl std::fmt::Display for Register {
322    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
323        write!(f, "R{}", *self as u8)
324    }
325}
326
327impl std::fmt::Display for Instruction {
328    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
329        macro_rules! x {
330            ($op:tt) => {
331                write!(f, "{:<4}", stringify!($op))
332            };
333            ($op:tt,$a:expr) => {
334                write!(f, "{:<4} {}", stringify!($op), $a)
335            };
336            ($op:tt,$a:expr,$b:expr) => {{
337                let a = format!("{},", $a);
338                write!(f, "{:<4} {:<4} {}", stringify!($op), a, $b)
339            }};
340            ($op:tt,$a:expr,$b:expr,$c:expr) => {{
341                let a = format!("{},", $a);
342                let b = format!("{},", $b);
343                write!(f, "{:<4} {:<4} {:<4} {}", stringify!($op), a, b, $c)
344            }};
345        }
346
347        use Condition::*;
348        use Instruction::*;
349
350        match *self {
351            Mov { dst, src } => x!(MOV, dst, src),
352            Movi { dst, imm } => x!(MOVI, dst, imm),
353            Add { dst, x, y } => x!(ADD, dst, x, y),
354            Sub { dst, x, y } => x!(SUB, dst, x, y),
355            Cmp { x, y } => x!(CMP, x, y),
356            Jcc { cond, offset } => match cond {
357                Equal => x!(JEQ, offset),
358                LessThan => x!(JLT, offset),
359                GreaterThan => x!(JGT, offset),
360                LessThanOrEqual => x!(JLTE, offset),
361                GreaterThanOrEqual => x!(JGTE, offset),
362                NotEqual => x!(JNE, offset),
363            },
364            Jmp { address } => x!(JMP, address),
365            Call { address } => x!(CALL, address),
366            Ret => x!(RET),
367            Nop => x!(NOP),
368            In { dst } => x!(IN, dst),
369            Out { src } => x!(OUT, src),
370            Mul { dst, x, y } => x!(MUL, dst, x, y),
371        }
372    }
373}
374
375#[cfg(test)]
376mod tests {
377    use super::*;
378    use Condition::*;
379    use Instruction::*;
380    use Register::*;
381
382    #[test]
383    fn disassembly() -> Result<(), MOSISError> {
384        macro_rules! assert_dis {
385            ($d:expr, $i:expr) => {
386                assert_eq!(Instruction::disassemble($d)?, $i);
387            };
388        }
389
390        assert_dis!(0x0012, Mov { dst: R1, src: R2 });
391        assert_dis!(0x0021, Mov { dst: R2, src: R1 });
392        assert_dis!(0x1122, Movi { dst: R1, imm: 0x22 });
393        assert_dis!(0x1b01, Movi { dst: Rb, imm: 1 });
394        assert_dis!(0x2312, Add { dst: R3, x: R1, y: R2 });
395        assert_dis!(0x2a34, Add { dst: Ra, x: R3, y: R4 });
396        assert_dis!(0x3a98, Sub { dst: Ra, x: R9, y: R8 });
397        assert_dis!(0x3fed, Sub { dst: Rf, x: Re, y: Rd });
398        assert_dis!(0x4012, Cmp { x: R1, y: R2 });
399        assert_dis!(0x40bc, Cmp { x: Rb, y: Rc });
400        assert_dis!(0x5085, Jcc { cond: Equal, offset: -5 });
401        assert_dis!(0x40ab, Cmp { x: Ra, y: Rb });
402        assert_dis!(0x542a, Jcc { cond: GreaterThanOrEqual, offset: 42 });
403        assert_dis!(0x61a2, Jmp { address: 0x1a2 });
404        assert_dis!(0x6eab, Jmp { address: 0xeab });
405        assert_dis!(0x7fce, Call { address: 0xfce });
406        assert_dis!(0x7123, Call { address: 0x123 });
407        assert_dis!(0x8000, Ret);
408        assert_dis!(0x9000, Nop);
409        assert_dis!(0xa004, In { dst: R4 });
410        assert_dis!(0xa00c, In { dst: Rc });
411        assert_dis!(0xb005, Out { src: R5 });
412        assert_dis!(0xb00d, Out { src: Rd });
413        assert_dis!(0xca98, Mul { dst: Ra, x: R9, y: R8 });
414        assert_dis!(0xcfed, Mul { dst: Rf, x: Re, y: Rd });
415
416        Ok(())
417    }
418
419    fn roundtrip(mc: u16) -> bool {
420        let (inst, mc) = match Instruction::disassemble(mc) {
421            // special case: force 0 offset to have positive sign
422            Ok(i @ Jcc { offset: 0, .. }) => (i, mc & 0xff7f),
423
424            // mask out the unused bits to always be zero
425            Ok(i @ Nop) => (i, mc & 0xf000),
426            Ok(i @ Ret) => (i, mc & 0xf000),
427            Ok(i @ Mov { .. }) => (i, mc & 0xf0ff),
428            Ok(i @ Cmp { .. }) => (i, mc & 0xf0ff),
429            Ok(i @ In { .. }) => (i, mc & 0xf00f),
430            Ok(i @ Out { .. }) => (i, mc & 0xf00f),
431
432            // invalid instructions can't be re-assembled, so just pass
433            Err(_) => return true,
434            Ok(i) => (i, mc),
435        };
436        mc == inst.assemble()
437    }
438
439    #[test]
440    fn roundtrip_bruteforce() {
441        for x in 0..=u16::MAX {
442            assert!(roundtrip(x));
443        }
444    }
445
446    #[test]
447    fn print_test() -> Result<(), MOSISError> {
448        println!();
449        let bytes = &[0x23, 0x12, 0x2a, 0x34, 0x3a, 0x98, 0x3f, 0xed];
450        for instruction in linear_sweep(bytes) {
451            println!("{}", instruction?);
452        }
453        Ok(())
454    }
455}