1use std::{collections::HashMap, pin::Pin};
2
3use cxx::{let_cxx_string, UniquePtr};
4use sleigh_sys::{RustAssemblyEmit, RustLoadImage, RustPCodeEmit};
5
6pub mod sla;
7
8pub type Opcode = sleigh_sys::Opcode;
9pub type SpaceType = sleigh_sys::SpaceType;
10
11#[derive(Debug)]
12pub struct AddrSpace {
13 pub name: String,
14 pub ty: SpaceType,
15}
16
17#[derive(Debug)]
18pub struct VarnodeData {
19 pub space: AddrSpace,
20 pub offset: u64,
21 pub size: u32,
22}
23
24impl From<&sleigh_sys::ffi::VarnodeData> for VarnodeData {
25 fn from(var: &sleigh_sys::ffi::VarnodeData) -> Self {
26 let address = sleigh_sys::ffi::getVarnodeDataAddress(var);
27 let offset = address.getOffset();
28 let space = address.getSpace();
29 let space = unsafe {
30 let space = &*space;
31 let ty = sleigh_sys::ffi::getAddrSpaceType(space);
32 let ty = sleigh_sys::SpaceType::from_u32(ty).unwrap();
33 let name = space.getName().to_string();
34 AddrSpace { name, ty }
35 };
36 let size = sleigh_sys::ffi::getVarnodeSize(var);
37 Self {
38 space,
39 offset,
40 size,
41 }
42 }
43}
44
45#[derive(Debug)]
46pub struct PCode {
47 pub address: u64,
48 pub opcode: Opcode,
49 pub vars: Vec<VarnodeData>,
50 pub outvar: Option<VarnodeData>,
51}
52
53#[derive(Debug)]
54pub struct Instruction {
55 pub address: u64,
56 pub mnemonic: String,
57 pub body: String,
58}
59
60struct AssemblyEmit {
61 insts: Vec<Instruction>,
62}
63
64impl sleigh_sys::AssemblyEmit for AssemblyEmit {
65 fn dump(&mut self, addr: &sleigh_sys::ffi::Address, mnem: &str, body: &str) {
66 let address = addr.getOffset();
67 let mnemonic = mnem.to_string();
68 let body = body.to_string();
69 self.insts.push(Instruction {
70 address,
71 mnemonic,
72 body,
73 });
74 }
75}
76
77struct PCodeEmit {
78 pcodes: Vec<PCode>,
79}
80
81impl sleigh_sys::PCodeEmit for PCodeEmit {
82 fn dump(
83 &mut self,
84 address: &sleigh_sys::ffi::Address,
85 opcode: sleigh_sys::Opcode,
86 outvar: Option<&sleigh_sys::ffi::VarnodeData>,
87 vars: &[sleigh_sys::ffi::VarnodeData],
88 ) {
89 let vars = vars.iter().map(VarnodeData::from).collect::<Vec<_>>();
90 let outvar = outvar.map(VarnodeData::from);
91 let address = address.getOffset();
92 let pcode = PCode {
93 address,
94 opcode,
95 vars,
96 outvar,
97 };
98 self.pcodes.push(pcode);
99 }
100}
101
102struct SliceLoader<'a> {
103 start: u64,
104 data: &'a [u8],
105}
106
107impl<'a> sleigh_sys::LoadImage for SliceLoader<'a> {
108 fn load_fill(&mut self, ptr: &mut [u8], addr: &sleigh_sys::ffi::Address) {
109 let addr = addr.getOffset();
110 let len = self.data.len() as u64;
111 let required = ptr.len() as u64;
112 ptr.fill(0);
113
114 if self.start <= addr {
115 let fill_len = required.min(len) as usize;
116 let offset = (addr - self.start) as usize;
117 ptr[..fill_len].copy_from_slice(&self.data[offset..offset + fill_len]);
118 }
119 }
120}
121
122struct VectorLoader {
123 start: u64,
124 data: Vec<u8>,
125}
126
127impl sleigh_sys::LoadImage for VectorLoader {
128 fn load_fill(&mut self, ptr: &mut [u8], addr: &sleigh_sys::ffi::Address) {
129 let mut s = SliceLoader {
130 start: self.start,
131 data: &self.data,
132 };
133 s.load_fill(ptr, addr);
134 }
135}
136
137pub enum X86Mode {
138 Mode16,
139 Mode32,
140 Mode64,
141}
142
143pub enum X64Mode {
144 Mode16,
145 Mode32,
146 Mode64,
147}
148
149pub enum ArmMode {
150 Arm,
151 Thumb,
152}
153
154pub enum ArmVersion {
155 Arm4,
156 Arm4t,
157 Arm5,
158 Arm5t,
159 Arm6,
160 Arm7,
161 Arm8,
162}
163
164pub enum Endian {
165 LittleEndian,
166 BigEndian,
167}
168
169pub struct Image {
170 pub base_addr: u64,
171 pub data: Vec<u8>,
172}
173
174pub struct ArchState {
175 spec: String,
176 var: HashMap<String, u32>,
177}
178
179pub struct DecompilerBuilder<T> {
180 state: T,
181}
182
183impl DecompilerBuilder<()> {
184 pub fn x86(self, mode: X86Mode) -> DecompilerBuilder<ArchState> {
185 let mut var = HashMap::new();
186 let m = match mode {
187 X86Mode::Mode16 => 0,
188 X86Mode::Mode32 => 1,
189 X86Mode::Mode64 => 2,
190 };
191 var.insert("addrsize".to_string(), m);
192 var.insert("opsize".to_string(), m);
193 let spec = match mode {
194 X86Mode::Mode16 | X86Mode::Mode32 => sla::get_arch_sla("x86").unwrap(),
195 X86Mode::Mode64 => sla::get_arch_sla("x86-64").unwrap(),
196 };
197 DecompilerBuilder {
198 state: ArchState { spec, var },
199 }
200 }
201
202 pub fn aarch64(self, endian: Endian) -> DecompilerBuilder<ArchState> {
203 let e = match endian {
204 Endian::LittleEndian => "",
205 Endian::BigEndian => "BE",
206 };
207
208 let name = format!("AARCH64{}", e);
209 let spec = sla::get_arch_sla(&name).unwrap();
210
211 DecompilerBuilder {
212 state: ArchState {
213 spec,
214 var: HashMap::new(),
215 },
216 }
217 }
218
219 pub fn arm(
220 self,
221 version: ArmVersion,
222 endian: Endian,
223 mode: ArmMode,
224 ) -> DecompilerBuilder<ArchState> {
225 let v = match version {
226 ArmVersion::Arm4 => "4",
227 ArmVersion::Arm5 => "5",
228 ArmVersion::Arm6 => "6",
229 ArmVersion::Arm7 => "7",
230 ArmVersion::Arm8 => "8",
231 ArmVersion::Arm4t => "4t",
232 ArmVersion::Arm5t => "5t",
233 };
234 let e = match endian {
235 Endian::LittleEndian => "le",
236 Endian::BigEndian => "be",
237 };
238
239 let mut var = HashMap::new();
240 let t = if let ArmMode::Thumb = mode { 1 } else { 0 };
241 var.insert("TMode".to_string(), t);
242
243 let name = format!("ARM{}_{}", v, e);
244 let spec = sla::get_arch_sla(&name).unwrap();
245
246 DecompilerBuilder {
247 state: ArchState { spec, var },
248 }
249 }
250
251 pub fn dalvik(self) -> DecompilerBuilder<ArchState> {
252 DecompilerBuilder {
253 state: ArchState {
254 spec: sla::get_arch_sla("Dalvik").unwrap(),
255 var: HashMap::new(),
256 },
257 }
258 }
259
260 pub fn jvm(self) -> DecompilerBuilder<ArchState> {
261 DecompilerBuilder {
262 state: ArchState {
263 spec: sla::get_arch_sla("JVM").unwrap(),
264 var: HashMap::new(),
265 },
266 }
267 }
268}
269
270impl DecompilerBuilder<ArchState> {
271 pub fn build(self) -> Decompiler {
272 let_cxx_string!(spec = self.state.spec);
273 let doc = sleigh_sys::ffi::newDocumentStorage(&spec);
274 let loader = VectorLoader {
275 start: 0,
276 data: vec![],
277 };
278 let mut loader = Box::new(loader);
279
280 unsafe {
281 let rust_loader = Box::new(RustLoadImage::from_internal(std::mem::transmute::<
282 _,
283 &'static mut VectorLoader,
284 >(loader.as_mut())));
285 let rust_loader: *mut RustLoadImage<'static> = Box::leak(rust_loader);
286 let mut inner = sleigh_sys::ffi::newDecompiler(rust_loader, doc);
287
288 let ctx = inner.pin_mut().getContext();
289 for (k, v) in self.state.var.iter() {
290 let_cxx_string!(key = k);
291 let val = *v;
292 Pin::new_unchecked(&mut *ctx).setVariableDefault(&key, val)
293 }
294
295 Decompiler {
296 loader,
297 rust_loader,
298 inner,
299 }
300 }
301 }
302}
303
304pub struct Decompiler {
305 loader: Box<VectorLoader>,
306 rust_loader: *mut RustLoadImage<'static>,
307 inner: UniquePtr<sleigh_sys::ffi::Decompiler>,
308}
309
310impl Drop for Decompiler {
311 fn drop(&mut self) {
312 unsafe {
313 let _ = Box::from_raw(self.rust_loader);
314 }
315 }
316}
317
318impl Decompiler {
319 pub fn builder() -> DecompilerBuilder<()> {
320 DecompilerBuilder { state: () }
321 }
322
323 pub fn translate(&mut self, code: &[u8], addr: u64) -> (usize, Vec<PCode>) {
324 self.loader.data.clear();
325 self.loader.data.extend_from_slice(code);
326 self.loader.start = addr;
327 let mut emit = PCodeEmit { pcodes: vec![] };
328 unsafe {
329 let mut rust_emit = RustPCodeEmit::from_internal(&mut emit);
330 let n = self
331 .inner
332 .pin_mut()
333 .translate(&mut rust_emit as *mut _, addr);
334 (n as usize, emit.pcodes)
335 }
336 }
337
338 pub fn disassemble(&mut self, code: &[u8], addr: u64) -> (usize, Vec<Instruction>) {
339 self.loader.data.clear();
340 self.loader.data.extend_from_slice(code);
341 self.loader.start = addr;
342 let mut emit = AssemblyEmit { insts: vec![] };
343 unsafe {
344 let mut rust_emit = RustAssemblyEmit::from_internal(&mut emit);
345 let n = self.inner.pin_mut().disassemble(&mut rust_emit as _, addr);
346 (n as usize, emit.insts)
347 }
348 }
349}
350
351#[cfg(test)]
352mod tests {
353 use super::*;
354
355 #[test]
356 fn test_loop() {
357 let mut decompiler = Decompiler::builder()
358 .arm(ArmVersion::Arm8, Endian::LittleEndian, ArmMode::Arm)
359 .build();
360 for _ in 0..100 {
361 let (n, pcodes) = decompiler.translate(b"\x01\x00\x80\x00", 0x1000);
362 println!("{} {:?}", n, pcodes);
363 let (n, insts) = decompiler.disassemble(b"\x01\x00\x80\x00", 0x1000);
364 println!("{} {:?}", n, insts);
365 }
366 }
367
368 fn run(decompiler: &mut Decompiler, code: &[u8], addr: u64) {
369 let (n, pcodes) = decompiler.translate(code, addr);
370 println!("{} {:?}", n, pcodes);
371 let (n, insts) = decompiler.disassemble(code, addr);
372 println!("{} {:?}", n, insts);
373 }
374
375 #[test]
376 fn test_concurrent() {
377 let a = std::thread::spawn(test_x86);
378 let b = std::thread::spawn(test_arm);
379 a.join().unwrap();
380 b.join().unwrap();
381 }
382
383 #[test]
384 fn test_x86() {
385 let mut decompiler = Decompiler::builder().x86(X86Mode::Mode32).build();
386 run(&mut decompiler, b"\x05\x00\x10\x00\x00", 0x1000);
387 let mut decompiler = Decompiler::builder().x86(X86Mode::Mode64).build();
388 run(&mut decompiler, b"\x48\x31\xd8", 0x100010001);
389 }
390
391 #[test]
392 fn test_arm() {
393 let mut decompiler = Decompiler::builder()
394 .arm(ArmVersion::Arm8, Endian::LittleEndian, ArmMode::Arm)
395 .build();
396 run(&mut decompiler, b"\x01\x00\x80\x00", 0x1000);
397 }
398
399 #[test]
400 fn test_arm_thumb() {
401 let mut decompiler = Decompiler::builder()
402 .arm(ArmVersion::Arm5t, Endian::LittleEndian, ArmMode::Thumb)
403 .build();
404 run(&mut decompiler, b"\x11\x44\x11\x44", 0x1000);
405 }
406
407 #[test]
408 fn test_dalvik() {
409 let mut decompiler = Decompiler::builder().dalvik().build();
410 run(&mut decompiler, b"\x90\x00\x02\x03", 0x1000);
411 }
412}