1use std::fs::File;
7use std::io::{ Read, Cursor, Seek, SeekFrom };
8use zip::ZipArchive;
9use byteorder::{BigEndian, LittleEndian, ReadBytesExt};
10
11use crate::error::DexError;
12
13const ENDIAN_CONSTANT: [u8; 4] = [0x12, 0x34, 0x56, 0x78];
15const REVERSE_ENDIAN_CONSTANT: [u8; 4] = [0x78, 0x56, 0x34, 0x12];
17
18#[derive(Debug, Clone, Copy, PartialEq)]
20pub enum DexEndianness {
21 LittleEndian,
22 BigEndian,
23}
24
25#[derive(Debug)]
27pub struct DexReader {
28 pub bytes: Cursor<Vec<u8>>,
30 pub bytes_len: u64,
32 pub endianness: DexEndianness,
34}
35
36impl DexReader {
37 pub fn build_from_file(filepath: &str) -> Result<Vec<DexReader>, DexError> {
42 let raw_file = File::open(filepath)
43 .unwrap_or_else(|err| panic!("could not open input file: {err}"));
44 let mut zip_file = ZipArchive::new(raw_file)
45 .unwrap_or_else(|err| panic!("could not create ZipArchive object: {err}"));
46
47 let dex_entries_names = zip_file.file_names()
48 .filter(|name| name.ends_with(".dex"))
49 .map(|name| name.to_string())
50 .collect::<Vec<String>>();
51
52 let mut readers = Vec::new();
53 for entry in dex_entries_names.iter() {
54 let mut dex_entry = zip_file.by_name(entry)
55 .unwrap_or_else(|_| panic!("cannot find classes.dex in the APK"));
56 let mut raw_dex = Vec::new();
57 dex_entry.read_to_end(&mut raw_dex)
58 .unwrap_or_else(|err| panic!("Could not read input file: {err}"));
59 let reader = DexReader::build(raw_dex)?;
60 readers.push(reader);
61 }
62
63 Ok(readers)
64 }
65
66 pub fn build(raw_dex: Vec<u8>) -> Result<Self, DexError> {
68 let endianness = DexReader::check_endianness(&raw_dex)?;
69
70 let mut bytes = Cursor::new(raw_dex);
71 let bytes_len = bytes.seek(SeekFrom::End(0))?;
72 bytes.rewind()?;
73
74 Ok(DexReader {
75 bytes,
76 bytes_len,
77 endianness
78 })
79 }
80
81 pub fn check_endianness(bytes: &[u8]) -> Result<DexEndianness, DexError> {
83 if bytes.len() < 44 {
86 return Err(DexError::DexHeaderTooShortError);
87 }
88
89 let endian_tag = &bytes[40..44];
90
91 match endian_tag.try_into().unwrap() {
93 ENDIAN_CONSTANT => Ok(DexEndianness::BigEndian),
94 REVERSE_ENDIAN_CONSTANT => Ok(DexEndianness::LittleEndian),
95 _ => Err(DexError::InvalidEndianessTag)
96 }
97 }
98
99 pub fn align_cursor(&mut self) -> Result<(), DexError> {
102 while self.bytes.position() % 2 != 0 {
103 let _ = self.read_u8()?;
104 }
105
106 Ok(())
107 }
108
109 pub fn read_u8(&mut self) -> Result<u8, DexError> {
111 if self.bytes.position() >= self.bytes_len {
112 return Err(DexError::NoDataLeftError);
113 }
114
115 Ok(self.bytes.read_u8()?)
116 }
117
118 pub fn read_u16(&mut self) -> Result<u16, DexError> {
120 if self.bytes.position() > self.bytes_len - 2 {
121 return Err(DexError::NoDataLeftError);
122 }
123
124 match self.endianness {
125 DexEndianness::BigEndian => Ok(self.bytes.read_u16::<BigEndian>()?),
126 DexEndianness::LittleEndian => Ok(self.bytes.read_u16::<LittleEndian>()?),
127 }
128 }
129
130 pub fn read_u32(&mut self) -> Result<u32, DexError> {
132 if self.bytes.position() > self.bytes_len - 4 {
133 return Err(DexError::NoDataLeftError);
134 }
135
136 match self.endianness {
137 DexEndianness::BigEndian => Ok(self.bytes.read_u32::<BigEndian>()?),
138 DexEndianness::LittleEndian => Ok(self.bytes.read_u32::<LittleEndian>()?),
139 }
140 }
141
142 pub fn read_i32(&mut self) -> Result<i32, DexError> {
144 if self.bytes.position() > self.bytes_len - 4 {
145 return Err(DexError::NoDataLeftError);
146 }
147
148 match self.endianness {
149 DexEndianness::BigEndian => Ok(self.bytes.read_i32::<BigEndian>()?),
150 DexEndianness::LittleEndian => Ok(self.bytes.read_i32::<LittleEndian>()?),
151 }
152 }
153
154 pub fn read_uleb128(&mut self) -> Result<(u32, usize), DexError> {
156 let mut bytes_read: usize = 0;
157 let mut result: u32 = 0;
158 let mut shift = 0;
159
160 loop {
161 let byte = self.bytes.read_u8()?;
162 bytes_read += 1;
163 let payload = (byte & 0b0111_1111) as u32;
164 result |= payload << shift;
165 shift += 7;
166
167 if (byte & 0b1000_0000) == 0 {
168 break;
169 }
170
171 if bytes_read >= 5 {
172 return Err(DexError::InvalidUleb128Value);
173 }
174 }
175
176 Ok((result, bytes_read))
177 }
178
179 pub fn read_sleb128(&mut self) -> Result<(i32, usize), DexError> {
181 let mut bytes_read: usize = 0;
182 let mut result: u32 = 0;
183 let mut shift = 0;
184 let mut byte;
185
186 loop {
187 byte = self.bytes.read_u8()? as u32;
188 bytes_read += 1;
189 let payload = byte & 0b0111_1111;
190 result |= payload << shift;
191
192 shift += 7;
193
194 if (byte & 0b1000_0000) == 0 {
195 break;
196 }
197
198 if bytes_read >= 5 {
199 return Err(DexError::InvalidSleb128Value);
200 }
201 }
202
203 let mut result = result as i32;
204 if (byte & 0b0100_0000) == 0b0100_0000 {
205 result |= -(1 << shift);
207 }
208
209 Ok((result, bytes_read))
210 }
211
212 pub fn read_uleb128p1(&mut self) -> Result<(i32, usize), DexError> {
214 match self.read_uleb128() {
215 Ok((uleb128, bytes_read)) => Ok(((uleb128 as i32) - 1, bytes_read)),
216 Err(_) => Err(DexError::InvalidUleb128p1Value)
217 }
218 }
219}
220
221#[cfg(test)]
222mod tests {
223 use super::*;
224
225 const DEX_DATA: [u8; 50] = [
226 0x64, 0x65, 0x78, 0x0a, 0x30, 0x33, 0x35, 0x00, 0x00, 0x00, 0x7f, 0xdf, 0x80, 0x01, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x7f, 0x80, 0x7f, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x7f, 0xdf, 0x00, 0x00, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x78, 0x56, 0x34, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ];
232
233 #[test]
234 fn test_build() {
235 let dex_reader = DexReader::build(DEX_DATA.to_vec()).unwrap();
236 assert_eq!(dex_reader.bytes_len, DEX_DATA.len() as u64);
237 assert_eq!(dex_reader.endianness, DexEndianness::LittleEndian);
238 }
239
240 #[test]
241 fn test_check_endianness() {
242 let dex_reader = DexReader::build(DEX_DATA.to_vec()).unwrap();
243 let endianness = DexReader::check_endianness(&DEX_DATA).unwrap();
244 assert_eq!(endianness, DexEndianness::LittleEndian);
245 assert_eq!(dex_reader.endianness, endianness);
246 }
247
248 #[test]
249 fn test_check_endianness_invalid() {
250 let invalid_data = vec![0x00; 10];
251 let result = DexReader::check_endianness(&invalid_data);
252 assert_eq!(
253 result.unwrap_err().to_string(),
254 "DEX header too short"
255 );
256 }
257
258 #[test]
259 fn test_check_endianness_invalid_long() {
260 let invalid_long_data = vec![0x00; 100];
261 let result = DexReader::check_endianness(&invalid_long_data);
262 assert_eq!(
263 result.unwrap_err().to_string(),
264 "invalid endianness tag"
265 );
266 }
267
268 #[test]
269 fn test_read_u8() {
270 let mut dex_reader = DexReader::build(DEX_DATA.to_vec()).unwrap();
271 let byte = dex_reader.read_u8().unwrap();
272 assert_eq!(byte, 0x64);
273
274 dex_reader.bytes.seek(SeekFrom::End(0)).unwrap();
276 let result = dex_reader.read_u8();
277 assert_eq!(
278 result.unwrap_err().to_string(),
279 "no data left to read"
280 );
281
282 let bound = DEX_DATA.len() + 10;
283 dex_reader.bytes.seek(SeekFrom::Start(bound as u64)).unwrap();
284 let result = dex_reader.read_u8();
285 assert_eq!(
286 result.unwrap_err().to_string(),
287 "no data left to read"
288 );
289 }
290
291 #[test]
292 fn test_read_u16() {
293 let mut dex_reader = DexReader::build(DEX_DATA.to_vec()).unwrap();
294 let u16_val = dex_reader.read_u16().unwrap();
295 assert_eq!(u16_val, 0x6564);
296
297 dex_reader.bytes.seek(SeekFrom::End(0)).unwrap();
299 let result = dex_reader.read_u16();
300 assert_eq!(
301 result.unwrap_err().to_string(),
302 "no data left to read"
303 );
304
305 let bound = DEX_DATA.len() + 10;
306 dex_reader.bytes.seek(SeekFrom::Start(bound as u64)).unwrap();
307 let result = dex_reader.read_u16();
308 assert_eq!(
309 result.unwrap_err().to_string(),
310 "no data left to read"
311 );
312 }
313
314 #[test]
315 fn test_read_u32() {
316 let mut dex_reader = DexReader::build(DEX_DATA.to_vec()).unwrap();
317 let u32_val = dex_reader.read_u32().unwrap();
318 assert_eq!(u32_val, 0x0a786564);
319
320 dex_reader.bytes.seek(SeekFrom::End(0)).unwrap();
322 let result = dex_reader.read_u32();
323 assert_eq!(
324 result.unwrap_err().to_string(),
325 "no data left to read"
326 );
327
328 let bound = DEX_DATA.len() + 10;
329 dex_reader.bytes.seek(SeekFrom::Start(bound as u64)).unwrap();
330 let result = dex_reader.read_u32();
331 assert_eq!(
332 result.unwrap_err().to_string(),
333 "no data left to read"
334 );
335 }
336
337 #[test]
338 fn test_read_uleb128() {
339 let mut reader = DexReader::build(DEX_DATA.to_vec()).unwrap();
340 reader.bytes.seek(SeekFrom::Start(10)).unwrap();
341
342 let result = reader.read_uleb128().unwrap();
343 assert_eq!(result, (0x7f, 1));
344
345 let result = reader.read_uleb128().unwrap();
346 assert_eq!(result, (0x405f, 3));
347
348 let result = reader.read_uleb128();
349 assert_eq!(
350 result.unwrap_err().to_string(),
351 "too many bytes in unsigned LEB128 value"
352 );
353
354 let dex_data = [
355 0x64, 0x65, 0x78, 0x0a, 0x30, 0x33, 0x35, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x56, 0x34, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x7f, 0x80, 0x7f, 0xb4, 0x07, 0x8c, 0x08, 0xff, 0xff, 0xff, 0xff, 0xf ];
368
369 let mut reader = DexReader::build(dex_data.to_vec()).unwrap();
370 reader.bytes.seek(SeekFrom::Start(50)).unwrap();
371
372 let result = reader.read_uleb128().unwrap();
373 assert_eq!(result, (0, 1));
374
375 let result = reader.read_uleb128().unwrap();
376 assert_eq!(result, (1, 1));
377
378 let result = reader.read_uleb128().unwrap();
379 assert_eq!(result, (127, 1));
380
381 let result = reader.read_uleb128().unwrap();
382 assert_eq!(result, (16256, 2));
383
384 let result = reader.read_uleb128().unwrap();
385 assert_eq!(result, (0x3b4, 2));
386
387 let result = reader.read_uleb128().unwrap();
388 assert_eq!(result, (0x40c, 2));
389
390 let result = reader.read_uleb128().unwrap();
391 assert_eq!(result, (0xffffffff, 5));
392 }
393
394 #[test]
395 fn test_read_sleb128() {
396 let mut reader = DexReader::build(DEX_DATA.to_vec()).unwrap();
397 reader.bytes.seek(SeekFrom::Start(20)).unwrap();
398
399 let result = reader.read_sleb128().unwrap();
400 assert_eq!(result, (-1, 1));
401
402 let result = reader.read_sleb128().unwrap();
403 assert_eq!(result, (-128, 2));
404
405 let result = reader.read_sleb128();
406 assert_eq!(
407 result.unwrap_err().to_string(),
408 "too many bytes in signed LEB128 value"
409 );
410
411 let dex_data = [
412 0x64, 0x65, 0x78, 0x0a, 0x30, 0x33, 0x35, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x56, 0x34, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x7f, 0x80, 0x7f, 0x3c, ];
423
424 let mut reader = DexReader::build(dex_data.to_vec()).unwrap();
425 reader.bytes.seek(SeekFrom::Start(50)).unwrap();
426
427 let result = reader.read_sleb128().unwrap();
428 assert_eq!(result, (0, 1));
429
430 let result = reader.read_sleb128().unwrap();
431 assert_eq!(result, (1, 1));
432
433 let result = reader.read_sleb128().unwrap();
434 assert_eq!(result, (-1, 1));
435
436 let result = reader.read_sleb128().unwrap();
437 assert_eq!(result, (-128, 2));
438
439 let result = reader.read_sleb128().unwrap();
440 assert_eq!(result, (0x3c, 1));
441 }
442
443 #[test]
444 fn test_read_uleb128p1() {
445 let mut reader = DexReader::build(DEX_DATA.to_vec()).unwrap();
446 reader.bytes.seek(SeekFrom::Start(30)).unwrap();
447
448 let result = reader.read_uleb128p1().unwrap();
449 assert_eq!(result, (0x7e, 1));
450
451 let result = reader.read_uleb128p1().unwrap();
452 assert_eq!(result, (0x5e, 2));
453
454 let result = reader.read_uleb128p1().unwrap();
455 assert_eq!(result, (-1, 1));
456
457 let result = reader.read_uleb128p1();
458 assert_eq!(
459 result.unwrap_err().to_string(),
460 "too many bytes in unsigned LEB128p1 value"
461 );
462 }
463}