rusty_dex/dex/
reader.rs

1//! DEX reader
2//!
3//! This module defines all the methods to read bytes from the DEX file while respecint ght
4//! endianess which can change from one DEX file to another.
5
6use std::fs::File;
7use std::io::{ Read, Cursor, Seek, SeekFrom };
8use zip::ZipArchive;
9use byteorder::{BigEndian, LittleEndian, ReadBytesExt};
10
11use crate::error::DexError;
12
13/// Little-endian DEX file
14const ENDIAN_CONSTANT: [u8; 4] = [0x12, 0x34, 0x56, 0x78];
15/// Big-endian DEX file
16const REVERSE_ENDIAN_CONSTANT: [u8; 4] = [0x78, 0x56, 0x34, 0x12];
17
18/// Possible endianness
19#[derive(Debug, Clone, Copy, PartialEq)]
20pub enum DexEndianness {
21    LittleEndian,
22    BigEndian,
23}
24
25/// A reader for a DEX file
26#[derive(Debug)]
27pub struct DexReader {
28    /// A cursor over the bytes of the DEX file
29    pub bytes: Cursor<Vec<u8>>,
30    /// Number of bytes in the DEX file
31    pub bytes_len: u64,
32    /// Endianness of the DEX file
33    pub endianness: DexEndianness,
34}
35
36impl DexReader {
37    /// Open the file at the given path and create reader(s)
38    ///
39    /// Each APK can contain multiple DEX files. This function extracts them all, create a reader
40    /// from each, and returns a vector of readers.
41    pub fn build_from_file(filepath: &str) -> Result<Vec<DexReader>, DexError> {
42        let raw_file = File::open(filepath)
43            .unwrap_or_else(|err| panic!("could not open input file: {err}"));
44        let mut zip_file = ZipArchive::new(raw_file)
45            .unwrap_or_else(|err| panic!("could not create ZipArchive object: {err}"));
46
47        let dex_entries_names = zip_file.file_names()
48                                        .filter(|name| name.ends_with(".dex"))
49                                        .map(|name| name.to_string())
50                                        .collect::<Vec<String>>();
51
52        let mut readers = Vec::new();
53        for entry in dex_entries_names.iter() {
54            let mut dex_entry = zip_file.by_name(entry)
55                                        .unwrap_or_else(|_| panic!("cannot find classes.dex in the APK"));
56            let mut raw_dex = Vec::new();
57            dex_entry.read_to_end(&mut raw_dex)
58                     .unwrap_or_else(|err| panic!("Could not read input file: {err}"));
59            let reader = DexReader::build(raw_dex)?;
60            readers.push(reader);
61        }
62
63        Ok(readers)
64    }
65
66    /// Read a DEX file and create a reader from it
67    pub fn build(raw_dex: Vec<u8>) -> Result<Self, DexError> {
68        let endianness = DexReader::check_endianness(&raw_dex)?;
69
70        let mut bytes = Cursor::new(raw_dex);
71        let bytes_len = bytes.seek(SeekFrom::End(0))?;
72        bytes.rewind()?;
73
74        Ok(DexReader {
75            bytes,
76            bytes_len,
77            endianness
78        })
79    }
80
81    /// Check the endianness of a DEX file
82    pub fn check_endianness(bytes: &[u8]) -> Result<DexEndianness, DexError> {
83        // Cannot use self here as we need to know the endianness before anything else
84
85        if bytes.len() < 44 {
86            return Err(DexError::DexHeaderTooShortError);
87        }
88
89        let endian_tag = &bytes[40..44];
90
91        // try_into to convert the slice into an array
92        match endian_tag.try_into().unwrap() {
93            ENDIAN_CONSTANT => Ok(DexEndianness::BigEndian),
94            REVERSE_ENDIAN_CONSTANT => Ok(DexEndianness::LittleEndian),
95            _ => Err(DexError::InvalidEndianessTag)
96        }
97    }
98
99    /// Check if the cursor is on an even-numbered bytecode offsets
100    /// and, if not, consume data until it is
101    pub fn align_cursor(&mut self) -> Result<(), DexError> {
102        while self.bytes.position() % 2 != 0 {
103            let _ = self.read_u8()?;
104        }
105
106        Ok(())
107    }
108
109    /// Read an unsigned 8 bits integer from the reader
110    pub fn read_u8(&mut self) -> Result<u8, DexError> {
111        if self.bytes.position() >= self.bytes_len {
112            return Err(DexError::NoDataLeftError);
113        }
114
115        Ok(self.bytes.read_u8()?)
116    }
117
118    /// Read an unsigned 16 bits integer from the reader
119    pub fn read_u16(&mut self) -> Result<u16, DexError> {
120        if self.bytes.position() > self.bytes_len - 2 {
121            return Err(DexError::NoDataLeftError);
122        }
123
124        match self.endianness {
125            DexEndianness::BigEndian => Ok(self.bytes.read_u16::<BigEndian>()?),
126            DexEndianness::LittleEndian => Ok(self.bytes.read_u16::<LittleEndian>()?),
127        }
128    }
129
130    /// Read an unsigned 32 bits integer from the reader
131    pub fn read_u32(&mut self) -> Result<u32, DexError> {
132        if self.bytes.position() > self.bytes_len - 4 {
133            return Err(DexError::NoDataLeftError);
134        }
135
136        match self.endianness {
137            DexEndianness::BigEndian => Ok(self.bytes.read_u32::<BigEndian>()?),
138            DexEndianness::LittleEndian => Ok(self.bytes.read_u32::<LittleEndian>()?),
139        }
140    }
141
142    /// Read a signed 32 bits integer from the reader
143    pub fn read_i32(&mut self) -> Result<i32, DexError> {
144        if self.bytes.position() > self.bytes_len - 4 {
145            return Err(DexError::NoDataLeftError);
146        }
147
148        match self.endianness {
149            DexEndianness::BigEndian => Ok(self.bytes.read_i32::<BigEndian>()?),
150            DexEndianness::LittleEndian => Ok(self.bytes.read_i32::<LittleEndian>()?),
151        }
152    }
153
154    /// Read an unsigned LEB128 value from the reader
155    pub fn read_uleb128(&mut self) -> Result<(u32, usize), DexError> {
156        let mut bytes_read: usize = 0;
157        let mut result: u32 = 0;
158        let mut shift = 0;
159
160        loop {
161            let byte = self.bytes.read_u8()?;
162            bytes_read += 1;
163            let payload = (byte & 0b0111_1111) as u32;
164            result |= payload << shift;
165            shift += 7;
166
167            if (byte & 0b1000_0000) == 0 {
168                break;
169            }
170
171            if bytes_read >= 5 {
172                return Err(DexError::InvalidUleb128Value);
173            }
174        }
175
176        Ok((result, bytes_read))
177    }
178
179    /// Read a signed LEB128 value from the reader
180    pub fn read_sleb128(&mut self) -> Result<(i32, usize), DexError> {
181        let mut bytes_read: usize = 0;
182        let mut result: u32 = 0;
183        let mut shift = 0;
184        let mut byte;
185
186        loop {
187            byte = self.bytes.read_u8()? as u32;
188            bytes_read += 1;
189            let payload = byte & 0b0111_1111;
190            result |= payload << shift;
191
192            shift += 7;
193
194            if (byte & 0b1000_0000) == 0 {
195                break;
196            }
197
198            if bytes_read >= 5 {
199                return Err(DexError::InvalidSleb128Value);
200            }
201        }
202
203        let mut result = result as i32;
204        if (byte & 0b0100_0000) == 0b0100_0000 {
205            /* sign extend */
206            result |= -(1 << shift);
207        }
208
209        Ok((result, bytes_read))
210    }
211
212    /// Read a signed LEB128p1 value from the reader
213    pub fn read_uleb128p1(&mut self) -> Result<(i32, usize), DexError> {
214        match self.read_uleb128() {
215            Ok((uleb128, bytes_read)) => Ok(((uleb128 as i32) - 1, bytes_read)),
216            Err(_) => Err(DexError::InvalidUleb128p1Value)
217        }
218    }
219}
220
221#[cfg(test)]
222mod tests {
223    use super::*;
224
225    const DEX_DATA: [u8; 50] = [
226        0x64, 0x65, 0x78, 0x0a, 0x30, 0x33, 0x35, 0x00, 0x00, 0x00,  // DEX magic
227        0x7f, 0xdf, 0x80, 0x01, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00,  // uleb128 data
228        0x7f, 0x80, 0x7f, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00,  // sleb128 data
229        0x7f, 0xdf, 0x00, 0x00, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00,  // uleb128p1 data
230        0x78, 0x56, 0x34, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // endianness tag
231    ];
232
233    #[test]
234    fn test_build() {
235        let dex_reader = DexReader::build(DEX_DATA.to_vec()).unwrap();
236        assert_eq!(dex_reader.bytes_len, DEX_DATA.len() as u64);
237        assert_eq!(dex_reader.endianness, DexEndianness::LittleEndian);
238    }
239
240    #[test]
241    fn test_check_endianness() {
242        let dex_reader = DexReader::build(DEX_DATA.to_vec()).unwrap();
243        let endianness = DexReader::check_endianness(&DEX_DATA).unwrap();
244        assert_eq!(endianness, DexEndianness::LittleEndian);
245        assert_eq!(dex_reader.endianness, endianness);
246    }
247
248    #[test]
249    fn test_check_endianness_invalid() {
250        let invalid_data = vec![0x00; 10];
251        let result = DexReader::check_endianness(&invalid_data);
252        assert_eq!(
253            result.unwrap_err().to_string(),
254            "DEX header too short"
255        );
256    }
257
258    #[test]
259    fn test_check_endianness_invalid_long() {
260        let invalid_long_data = vec![0x00; 100];
261        let result = DexReader::check_endianness(&invalid_long_data);
262        assert_eq!(
263            result.unwrap_err().to_string(),
264            "invalid endianness tag"
265        );
266    }
267
268    #[test]
269    fn test_read_u8() {
270        let mut dex_reader = DexReader::build(DEX_DATA.to_vec()).unwrap();
271        let byte = dex_reader.read_u8().unwrap();
272        assert_eq!(byte, 0x64);
273
274        // Test reading at and after end of file
275        dex_reader.bytes.seek(SeekFrom::End(0)).unwrap();
276        let result = dex_reader.read_u8();
277        assert_eq!(
278            result.unwrap_err().to_string(),
279            "no data left to read"
280        );
281
282        let bound = DEX_DATA.len() + 10;
283        dex_reader.bytes.seek(SeekFrom::Start(bound as u64)).unwrap();
284        let result = dex_reader.read_u8();
285        assert_eq!(
286            result.unwrap_err().to_string(),
287            "no data left to read"
288        );
289    }
290
291    #[test]
292    fn test_read_u16() {
293        let mut dex_reader = DexReader::build(DEX_DATA.to_vec()).unwrap();
294        let u16_val = dex_reader.read_u16().unwrap();
295        assert_eq!(u16_val, 0x6564);
296
297        // Test reading at and after end of file
298        dex_reader.bytes.seek(SeekFrom::End(0)).unwrap();
299        let result = dex_reader.read_u16();
300        assert_eq!(
301            result.unwrap_err().to_string(),
302            "no data left to read"
303        );
304
305        let bound = DEX_DATA.len() + 10;
306        dex_reader.bytes.seek(SeekFrom::Start(bound as u64)).unwrap();
307        let result = dex_reader.read_u16();
308        assert_eq!(
309            result.unwrap_err().to_string(),
310            "no data left to read"
311        );
312    }
313
314    #[test]
315    fn test_read_u32() {
316        let mut dex_reader = DexReader::build(DEX_DATA.to_vec()).unwrap();
317        let u32_val = dex_reader.read_u32().unwrap();
318        assert_eq!(u32_val, 0x0a786564);
319
320        // Test reading at and after end of file
321        dex_reader.bytes.seek(SeekFrom::End(0)).unwrap();
322        let result = dex_reader.read_u32();
323        assert_eq!(
324            result.unwrap_err().to_string(),
325            "no data left to read"
326        );
327
328        let bound = DEX_DATA.len() + 10;
329        dex_reader.bytes.seek(SeekFrom::Start(bound as u64)).unwrap();
330        let result = dex_reader.read_u32();
331        assert_eq!(
332            result.unwrap_err().to_string(),
333            "no data left to read"
334        );
335    }
336
337    #[test]
338    fn test_read_uleb128() {
339        let mut reader = DexReader::build(DEX_DATA.to_vec()).unwrap();
340        reader.bytes.seek(SeekFrom::Start(10)).unwrap();
341
342        let result = reader.read_uleb128().unwrap();
343        assert_eq!(result, (0x7f, 1));
344
345        let result = reader.read_uleb128().unwrap();
346        assert_eq!(result, (0x405f, 3));
347
348        let result = reader.read_uleb128();
349        assert_eq!(
350            result.unwrap_err().to_string(),
351            "too many bytes in unsigned LEB128 value"
352        );
353
354        let dex_data = [
355            0x64, 0x65, 0x78, 0x0a, 0x30, 0x33, 0x35, 0x00, 0x00, 0x00,  // DEX magic
356            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // padding
357            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // padding
358            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // padding
359            0x78, 0x56, 0x34, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // endianness tag
360            0x00,                                                        // 0
361            0x01,                                                        // 1
362            0x7f,                                                        // 127
363            0x80, 0x7f,                                                  // 16256
364            0xb4, 0x07,                                                  // 0x3b4
365            0x8c, 0x08,                                                  // 0x40c
366            0xff, 0xff, 0xff, 0xff, 0xf                                  // 0xffffffff
367        ];
368
369        let mut reader = DexReader::build(dex_data.to_vec()).unwrap();
370        reader.bytes.seek(SeekFrom::Start(50)).unwrap();
371
372        let result = reader.read_uleb128().unwrap();
373        assert_eq!(result, (0, 1));
374
375        let result = reader.read_uleb128().unwrap();
376        assert_eq!(result, (1, 1));
377
378        let result = reader.read_uleb128().unwrap();
379        assert_eq!(result, (127, 1));
380
381        let result = reader.read_uleb128().unwrap();
382        assert_eq!(result, (16256, 2));
383
384        let result = reader.read_uleb128().unwrap();
385        assert_eq!(result, (0x3b4, 2));
386
387        let result = reader.read_uleb128().unwrap();
388        assert_eq!(result, (0x40c, 2));
389
390        let result = reader.read_uleb128().unwrap();
391        assert_eq!(result, (0xffffffff, 5));
392    }
393
394    #[test]
395    fn test_read_sleb128() {
396        let mut reader = DexReader::build(DEX_DATA.to_vec()).unwrap();
397        reader.bytes.seek(SeekFrom::Start(20)).unwrap();
398
399        let result = reader.read_sleb128().unwrap();
400        assert_eq!(result, (-1, 1));
401
402        let result = reader.read_sleb128().unwrap();
403        assert_eq!(result, (-128, 2));
404
405        let result = reader.read_sleb128();
406        assert_eq!(
407            result.unwrap_err().to_string(),
408            "too many bytes in signed LEB128 value"
409        );
410
411        let dex_data = [
412            0x64, 0x65, 0x78, 0x0a, 0x30, 0x33, 0x35, 0x00, 0x00, 0x00,  // DEX magic
413            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // padding
414            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // padding
415            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // padding
416            0x78, 0x56, 0x34, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // endianness tag
417            0x00,                                                        // 0
418            0x01,                                                        // 1
419            0x7f,                                                        // -1
420            0x80, 0x7f,                                                  // -128
421            0x3c,                                                        // 0x3c
422        ];
423
424        let mut reader = DexReader::build(dex_data.to_vec()).unwrap();
425        reader.bytes.seek(SeekFrom::Start(50)).unwrap();
426
427        let result = reader.read_sleb128().unwrap();
428        assert_eq!(result, (0, 1));
429
430        let result = reader.read_sleb128().unwrap();
431        assert_eq!(result, (1, 1));
432
433        let result = reader.read_sleb128().unwrap();
434        assert_eq!(result, (-1, 1));
435
436        let result = reader.read_sleb128().unwrap();
437        assert_eq!(result, (-128, 2));
438
439        let result = reader.read_sleb128().unwrap();
440        assert_eq!(result, (0x3c, 1));
441    }
442
443    #[test]
444    fn test_read_uleb128p1() {
445        let mut reader = DexReader::build(DEX_DATA.to_vec()).unwrap();
446        reader.bytes.seek(SeekFrom::Start(30)).unwrap();
447
448        let result = reader.read_uleb128p1().unwrap();
449        assert_eq!(result, (0x7e, 1));
450
451        let result = reader.read_uleb128p1().unwrap();
452        assert_eq!(result, (0x5e, 2));
453
454        let result = reader.read_uleb128p1().unwrap();
455        assert_eq!(result, (-1, 1));
456
457        let result = reader.read_uleb128p1();
458        assert_eq!(
459            result.unwrap_err().to_string(),
460            "too many bytes in unsigned LEB128p1 value"
461        );
462    }
463}