sleigh/
lib.rs

1use std::{collections::HashMap, pin::Pin};
2
3use cxx::{let_cxx_string, UniquePtr};
4use sleigh_sys::{RustAssemblyEmit, RustLoadImage, RustPCodeEmit};
5
6pub mod sla;
7
8pub type Opcode = sleigh_sys::Opcode;
9pub type SpaceType = sleigh_sys::SpaceType;
10
11#[derive(Debug)]
12pub struct AddrSpace {
13    pub name: String,
14    pub ty: SpaceType,
15}
16
17#[derive(Debug)]
18pub struct VarnodeData {
19    pub space: AddrSpace,
20    pub offset: u64,
21    pub size: u32,
22}
23
24impl From<&sleigh_sys::ffi::VarnodeData> for VarnodeData {
25    fn from(var: &sleigh_sys::ffi::VarnodeData) -> Self {
26        let address = sleigh_sys::ffi::getVarnodeDataAddress(var);
27        let offset = address.getOffset();
28        let space = address.getSpace();
29        let space = unsafe {
30            let space = &*space;
31            let ty = sleigh_sys::ffi::getAddrSpaceType(space);
32            let ty = sleigh_sys::SpaceType::from_u32(ty).unwrap();
33            let name = space.getName().to_string();
34            AddrSpace { name, ty }
35        };
36        let size = sleigh_sys::ffi::getVarnodeSize(var);
37        Self {
38            space,
39            offset,
40            size,
41        }
42    }
43}
44
45#[derive(Debug)]
46pub struct PCode {
47    pub address: u64,
48    pub opcode: Opcode,
49    pub vars: Vec<VarnodeData>,
50    pub outvar: Option<VarnodeData>,
51}
52
53#[derive(Debug)]
54pub struct Instruction {
55    pub address: u64,
56    pub mnemonic: String,
57    pub body: String,
58}
59
60struct AssemblyEmit {
61    insts: Vec<Instruction>,
62}
63
64impl sleigh_sys::AssemblyEmit for AssemblyEmit {
65    fn dump(&mut self, addr: &sleigh_sys::ffi::Address, mnem: &str, body: &str) {
66        let address = addr.getOffset();
67        let mnemonic = mnem.to_string();
68        let body = body.to_string();
69        self.insts.push(Instruction {
70            address,
71            mnemonic,
72            body,
73        });
74    }
75}
76
77struct PCodeEmit {
78    pcodes: Vec<PCode>,
79}
80
81impl sleigh_sys::PCodeEmit for PCodeEmit {
82    fn dump(
83        &mut self,
84        address: &sleigh_sys::ffi::Address,
85        opcode: sleigh_sys::Opcode,
86        outvar: Option<&sleigh_sys::ffi::VarnodeData>,
87        vars: &[sleigh_sys::ffi::VarnodeData],
88    ) {
89        let vars = vars.iter().map(VarnodeData::from).collect::<Vec<_>>();
90        let outvar = outvar.map(VarnodeData::from);
91        let address = address.getOffset();
92        let pcode = PCode {
93            address,
94            opcode,
95            vars,
96            outvar,
97        };
98        self.pcodes.push(pcode);
99    }
100}
101
102struct SliceLoader<'a> {
103    start: u64,
104    data: &'a [u8],
105}
106
107impl<'a> sleigh_sys::LoadImage for SliceLoader<'a> {
108    fn load_fill(&mut self, ptr: &mut [u8], addr: &sleigh_sys::ffi::Address) {
109        let addr = addr.getOffset();
110        let len = self.data.len() as u64;
111        let required = ptr.len() as u64;
112        ptr.fill(0);
113
114        if self.start <= addr {
115            let fill_len = required.min(len) as usize;
116            let offset = (addr - self.start) as usize;
117            ptr[..fill_len].copy_from_slice(&self.data[offset..offset + fill_len]);
118        }
119    }
120}
121
122struct VectorLoader {
123    start: u64,
124    data: Vec<u8>,
125}
126
127impl sleigh_sys::LoadImage for VectorLoader {
128    fn load_fill(&mut self, ptr: &mut [u8], addr: &sleigh_sys::ffi::Address) {
129        let mut s = SliceLoader {
130            start: self.start,
131            data: &self.data,
132        };
133        s.load_fill(ptr, addr);
134    }
135}
136
137pub enum X86Mode {
138    Mode16,
139    Mode32,
140    Mode64,
141}
142
143pub enum X64Mode {
144    Mode16,
145    Mode32,
146    Mode64,
147}
148
149pub enum ArmMode {
150    Arm,
151    Thumb,
152}
153
154pub enum ArmVersion {
155    Arm4,
156    Arm4t,
157    Arm5,
158    Arm5t,
159    Arm6,
160    Arm7,
161    Arm8,
162}
163
164pub enum Endian {
165    LittleEndian,
166    BigEndian,
167}
168
169pub struct Image {
170    pub base_addr: u64,
171    pub data: Vec<u8>,
172}
173
174pub struct ArchState {
175    spec: String,
176    var: HashMap<String, u32>,
177}
178
179pub struct DecompilerBuilder<T> {
180    state: T,
181}
182
183impl DecompilerBuilder<()> {
184    pub fn x86(self, mode: X86Mode) -> DecompilerBuilder<ArchState> {
185        let mut var = HashMap::new();
186        let m = match mode {
187            X86Mode::Mode16 => 0,
188            X86Mode::Mode32 => 1,
189            X86Mode::Mode64 => 2,
190        };
191        var.insert("addrsize".to_string(), m);
192        var.insert("opsize".to_string(), m);
193        let spec = match mode {
194            X86Mode::Mode16 | X86Mode::Mode32 => sla::get_arch_sla("x86").unwrap(),
195            X86Mode::Mode64 => sla::get_arch_sla("x86-64").unwrap(),
196        };
197        DecompilerBuilder {
198            state: ArchState { spec, var },
199        }
200    }
201
202    pub fn aarch64(self, endian: Endian) -> DecompilerBuilder<ArchState> {
203        let e = match endian {
204            Endian::LittleEndian => "",
205            Endian::BigEndian => "BE",
206        };
207
208        let name = format!("AARCH64{}", e);
209        let spec = sla::get_arch_sla(&name).unwrap();
210
211        DecompilerBuilder {
212            state: ArchState {
213                spec,
214                var: HashMap::new(),
215            },
216        }
217    }
218
219    pub fn arm(
220        self,
221        version: ArmVersion,
222        endian: Endian,
223        mode: ArmMode,
224    ) -> DecompilerBuilder<ArchState> {
225        let v = match version {
226            ArmVersion::Arm4 => "4",
227            ArmVersion::Arm5 => "5",
228            ArmVersion::Arm6 => "6",
229            ArmVersion::Arm7 => "7",
230            ArmVersion::Arm8 => "8",
231            ArmVersion::Arm4t => "4t",
232            ArmVersion::Arm5t => "5t",
233        };
234        let e = match endian {
235            Endian::LittleEndian => "le",
236            Endian::BigEndian => "be",
237        };
238
239        let mut var = HashMap::new();
240        let t = if let ArmMode::Thumb = mode { 1 } else { 0 };
241        var.insert("TMode".to_string(), t);
242
243        let name = format!("ARM{}_{}", v, e);
244        let spec = sla::get_arch_sla(&name).unwrap();
245
246        DecompilerBuilder {
247            state: ArchState { spec, var },
248        }
249    }
250
251    pub fn dalvik(self) -> DecompilerBuilder<ArchState> {
252        DecompilerBuilder {
253            state: ArchState {
254                spec: sla::get_arch_sla("Dalvik").unwrap(),
255                var: HashMap::new(),
256            },
257        }
258    }
259
260    pub fn jvm(self) -> DecompilerBuilder<ArchState> {
261        DecompilerBuilder {
262            state: ArchState {
263                spec: sla::get_arch_sla("JVM").unwrap(),
264                var: HashMap::new(),
265            },
266        }
267    }
268}
269
270impl DecompilerBuilder<ArchState> {
271    pub fn build(self) -> Decompiler {
272        let_cxx_string!(spec = self.state.spec);
273        let doc = sleigh_sys::ffi::newDocumentStorage(&spec);
274        let loader = VectorLoader {
275            start: 0,
276            data: vec![],
277        };
278        let mut loader = Box::new(loader);
279
280        unsafe {
281            let rust_loader = Box::new(RustLoadImage::from_internal(std::mem::transmute::<
282                _,
283                &'static mut VectorLoader,
284            >(loader.as_mut())));
285            let rust_loader: *mut RustLoadImage<'static> = Box::leak(rust_loader);
286            let mut inner = sleigh_sys::ffi::newDecompiler(rust_loader, doc);
287
288            let ctx = inner.pin_mut().getContext();
289            for (k, v) in self.state.var.iter() {
290                let_cxx_string!(key = k);
291                let val = *v;
292                Pin::new_unchecked(&mut *ctx).setVariableDefault(&key, val)
293            }
294
295            Decompiler {
296                loader,
297                rust_loader,
298                inner,
299            }
300        }
301    }
302}
303
304pub struct Decompiler {
305    loader: Box<VectorLoader>,
306    rust_loader: *mut RustLoadImage<'static>,
307    inner: UniquePtr<sleigh_sys::ffi::Decompiler>,
308}
309
310impl Drop for Decompiler {
311    fn drop(&mut self) {
312        unsafe {
313            let _ = Box::from_raw(self.rust_loader);
314        }
315    }
316}
317
318impl Decompiler {
319    pub fn builder() -> DecompilerBuilder<()> {
320        DecompilerBuilder { state: () }
321    }
322
323    pub fn translate(&mut self, code: &[u8], addr: u64) -> (usize, Vec<PCode>) {
324        self.loader.data.clear();
325        self.loader.data.extend_from_slice(code);
326        self.loader.start = addr;
327        let mut emit = PCodeEmit { pcodes: vec![] };
328        unsafe {
329            let mut rust_emit = RustPCodeEmit::from_internal(&mut emit);
330            let n = self
331                .inner
332                .pin_mut()
333                .translate(&mut rust_emit as *mut _, addr);
334            (n as usize, emit.pcodes)
335        }
336    }
337
338    pub fn disassemble(&mut self, code: &[u8], addr: u64) -> (usize, Vec<Instruction>) {
339        self.loader.data.clear();
340        self.loader.data.extend_from_slice(code);
341        self.loader.start = addr;
342        let mut emit = AssemblyEmit { insts: vec![] };
343        unsafe {
344            let mut rust_emit = RustAssemblyEmit::from_internal(&mut emit);
345            let n = self.inner.pin_mut().disassemble(&mut rust_emit as _, addr);
346            (n as usize, emit.insts)
347        }
348    }
349}
350
351#[cfg(test)]
352mod tests {
353    use super::*;
354
355    #[test]
356    fn test_loop() {
357        let mut decompiler = Decompiler::builder()
358            .arm(ArmVersion::Arm8, Endian::LittleEndian, ArmMode::Arm)
359            .build();
360        for _ in 0..100 {
361            let (n, pcodes) = decompiler.translate(b"\x01\x00\x80\x00", 0x1000);
362            println!("{} {:?}", n, pcodes);
363            let (n, insts) = decompiler.disassemble(b"\x01\x00\x80\x00", 0x1000);
364            println!("{} {:?}", n, insts);
365        }
366    }
367
368    fn run(decompiler: &mut Decompiler, code: &[u8], addr: u64) {
369        let (n, pcodes) = decompiler.translate(code, addr);
370        println!("{} {:?}", n, pcodes);
371        let (n, insts) = decompiler.disassemble(code, addr);
372        println!("{} {:?}", n, insts);
373    }
374
375    #[test]
376    fn test_concurrent() {
377        let a = std::thread::spawn(test_x86);
378        let b = std::thread::spawn(test_arm);
379        a.join().unwrap();
380        b.join().unwrap();
381    }
382
383    #[test]
384    fn test_x86() {
385        let mut decompiler = Decompiler::builder().x86(X86Mode::Mode32).build();
386        run(&mut decompiler, b"\x05\x00\x10\x00\x00", 0x1000);
387        let mut decompiler = Decompiler::builder().x86(X86Mode::Mode64).build();
388        run(&mut decompiler, b"\x48\x31\xd8", 0x100010001);
389    }
390
391    #[test]
392    fn test_arm() {
393        let mut decompiler = Decompiler::builder()
394            .arm(ArmVersion::Arm8, Endian::LittleEndian, ArmMode::Arm)
395            .build();
396        run(&mut decompiler, b"\x01\x00\x80\x00", 0x1000);
397    }
398
399    #[test]
400    fn test_arm_thumb() {
401        let mut decompiler = Decompiler::builder()
402            .arm(ArmVersion::Arm5t, Endian::LittleEndian, ArmMode::Thumb)
403            .build();
404        run(&mut decompiler, b"\x11\x44\x11\x44", 0x1000);
405    }
406
407    #[test]
408    fn test_dalvik() {
409        let mut decompiler = Decompiler::builder().dalvik().build();
410        run(&mut decompiler, b"\x90\x00\x02\x03", 0x1000);
411    }
412}