1#![feature(get_mut_unchecked)]
2#![feature(map_first_last)]
3
4use crate::error::Error;
5use code_graph::CodeGraph;
6use partial_execution::ExecutionPath;
7use petgraph::stable_graph::NodeIndex;
8use pydis::opcode::py27::{self, Standard};
9use pydis::prelude::{Instruction, Opcode};
10use rayon::prelude::*;
11
12use py27_marshal::{Code, Obj};
13use rayon::Scope;
14use smallvm::InstructionTracker;
15use std::collections::{HashMap, HashSet};
16use std::fmt::Debug;
17use std::marker::PhantomData;
18use std::path::Path;
19use std::sync::atomic::{AtomicUsize, Ordering};
20use std::sync::{Arc, Mutex, RwLock};
21use strings::CodeObjString;
22
23pub mod code_graph;
25pub mod deob;
27pub mod error;
29pub mod partial_execution;
31pub mod smallvm;
33pub mod strings;
35
36pub struct Deobfuscator<'a, O: Opcode<Mnemonic = py27::Mnemonic> + PartialEq> {
37 input: &'a [u8],
39
40 enable_dotviz_graphs: bool,
42 files_processed: AtomicUsize,
43 graphviz_graphs: HashMap<String, String>,
44 on_graph_generated: Option<Box<dyn Fn(&str, &str) + Send + Sync>>,
45 on_store_to_named_var: Option<
46 Box<
47 dyn Fn(
48 &Code,
49 &HashSet<String>,
50 &RwLock<&mut CodeGraph<O>>,
51 &Instruction<O>,
52 &(Option<Obj>, InstructionTracker<(NodeIndex<u32>, usize)>),
53 ) + Send
54 + Sync,
55 >,
56 >,
57 _opcode_phantom: PhantomData<O>,
58}
59
60impl<'a, O: Opcode<Mnemonic = py27::Mnemonic> + PartialEq> Debug for Deobfuscator<'a, O> {
61 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
62 f.debug_struct("Deobfuscator")
63 .field("input", &self.input)
64 .field("enable_dotviz_graphs", &self.enable_dotviz_graphs)
65 .field("files_processed", &self.files_processed)
66 .field("graphviz_graphs", &self.graphviz_graphs)
67 .field(
68 "on_graph_generated",
69 if let Some(callback) = &self.on_graph_generated {
70 &"Some(callback)"
71 } else {
72 &"None"
73 },
74 )
75 .field(
76 "on_store_to_named_var",
77 if let Some(callback) = &self.on_store_to_named_var {
78 &"Some(callback)"
79 } else {
80 &"None"
81 },
82 )
83 .field("_opcode_phantom", &self._opcode_phantom)
84 .finish()
85 }
86}
87
88impl<'a, O: Opcode<Mnemonic = py27::Mnemonic> + PartialEq> Deobfuscator<'a, O> {
89 pub fn new(input: &'a [u8]) -> Deobfuscator<'a, O> {
91 Deobfuscator {
92 input,
93 enable_dotviz_graphs: false,
94 files_processed: AtomicUsize::new(0),
95 graphviz_graphs: HashMap::new(),
96 on_graph_generated: None,
97 on_store_to_named_var: None,
98 _opcode_phantom: Default::default(),
99 }
100 }
101
102 pub fn enable_graphs(mut self) -> Deobfuscator<'a, O> {
105 self.enable_dotviz_graphs = true;
106 self
107 }
108
109 pub fn on_graph_generated(
113 mut self,
114 callback: impl Fn(&str, &str) + 'static + Send + Sync,
115 ) -> Deobfuscator<'a, O> {
116 self.on_graph_generated = Some(Box::new(callback));
117 self
118 }
119
120 pub fn on_store_to_named_var(
123 mut self,
124 callback: impl Fn(
125 &Code,
126 &HashSet<String>,
127 &RwLock<&mut CodeGraph<O>>,
128 &Instruction<O>,
129 &(Option<Obj>, InstructionTracker<(NodeIndex<u32>, usize)>),
130 )
131 + 'static
132 + Send
133 + Sync,
134 ) -> Deobfuscator<'a, O> {
135 self.on_store_to_named_var = Some(Box::new(callback));
136 self
137 }
138
139 pub fn graphs(&self) -> &HashMap<String, String> {
142 &self.graphviz_graphs
143 }
144
145 pub fn deobfuscate(&self) -> Result<DeobfuscatedCodeObject, Error<O>> {
148 if let py27_marshal::Obj::Code(code) = py27_marshal::read::marshal_loads(&self.input)? {
149 let mut results = vec![];
151 let mut mapped_names = HashMap::new();
152 let mut graphs = HashMap::new();
153 let out_results = Arc::new(Mutex::new(vec![]));
154 rayon::scope(|scope| {
155 self.deobfuscate_nested_code_objects(
156 Arc::clone(&code),
157 scope,
158 Arc::clone(&out_results),
159 );
160 });
161
162 let out_results = Arc::try_unwrap(out_results)
163 .unwrap_or_else(|_| panic!("failed to unwrap mapped names"))
164 .into_inner()
165 .unwrap();
166 for result in out_results {
167 let result = result?;
168 results.push((result.file_number, result.new_bytecode));
169 mapped_names.extend(result.mapped_function_names);
170 graphs.extend(result.graphviz_graphs);
171 }
172
173 results.sort_by(|a, b| a.0.cmp(&b.0));
176
177 let output_data = self
178 .rename_vars(
179 &mut results.iter().map(|result| result.1.as_slice()),
180 &mapped_names,
181 )
182 .unwrap();
183
184 Ok(DeobfuscatedCodeObject {
185 data: output_data,
186 graphs,
187 })
188 } else {
189 Err(Error::InvalidCodeObject)
190 }
191 }
192
193 pub(crate) fn deobfuscate_nested_code_objects(
194 &'a self,
195 code: Arc<Code>,
196 scope: &Scope<'a>,
197 out_results: Arc<Mutex<Vec<Result<DeobfuscatedBytecode, Error<O>>>>>,
198 ) {
199 let file_number = self.files_processed.fetch_add(1, Ordering::Relaxed);
200
201 let task_code = Arc::clone(&code);
202 let thread_results = Arc::clone(&out_results);
203 scope.spawn(move |_scope| {
204 let res = self.deobfuscate_code(task_code, file_number);
205 thread_results.lock().unwrap().push(res);
206 });
207
208 for c in code.consts.iter() {
210 if let Obj::Code(const_code) = c {
211 let thread_results = Arc::clone(&out_results);
212 let thread_code = Arc::clone(const_code);
213
214 self.deobfuscate_nested_code_objects(thread_code, scope, thread_results);
215 }
216 }
217 }
218}
219
220pub struct DeobfuscatedCodeObject {
221 pub data: Vec<u8>,
223 pub graphs: HashMap<String, String>,
227}
228
229pub(crate) struct DeobfuscatedBytecode {
230 pub(crate) file_number: usize,
231 pub(crate) new_bytecode: Vec<u8>,
232 pub(crate) mapped_function_names: HashMap<String, String>,
233 pub(crate) graphviz_graphs: HashMap<String, String>,
234}
235
236pub fn dump_strings<'a>(
239 pyc_filename: &'a Path,
240 data: &[u8],
241) -> Result<Vec<CodeObjString<'a>>, Error<Standard>> {
242 if let py27_marshal::Obj::Code(code) = py27_marshal::read::marshal_loads(data)? {
243 Ok(dump_codeobject_strings(pyc_filename, code))
244 } else {
245 Err(Error::InvalidCodeObject)
246 }
247}
248
249fn dump_codeobject_strings(pyc_filename: &Path, code: Arc<Code>) -> Vec<CodeObjString> {
252 let new_strings = Mutex::new(vec![]);
253 code.names.par_iter().for_each(|name| {
254 new_strings.lock().unwrap().push(CodeObjString::new(
255 code.as_ref(),
256 pyc_filename,
257 crate::strings::StringType::Name,
258 name.to_string().as_ref(),
259 ))
260 });
261
262 code.varnames.par_iter().for_each(|name| {
263 new_strings.lock().unwrap().push(CodeObjString::new(
264 code.as_ref(),
265 pyc_filename,
266 crate::strings::StringType::VarName,
267 name.to_string().as_ref(),
268 ))
269 });
270
271 code.consts.as_ref().par_iter().for_each(|c| {
272 if let py27_marshal::Obj::String(s) = c {
273 new_strings.lock().unwrap().push(CodeObjString::new(
274 code.as_ref(),
275 pyc_filename,
276 crate::strings::StringType::Const,
277 s.to_string().as_ref(),
278 ))
279 }
280 });
281
282 code.consts.par_iter().for_each(|c| {
284 if let Obj::Code(const_code) = c {
285 let mut strings = dump_codeobject_strings(pyc_filename, Arc::clone(&const_code));
287 new_strings.lock().unwrap().append(&mut strings);
288 }
289 });
290
291 new_strings.into_inner().unwrap()
292}