rusty_dex/dex/
reader.rs

1use std::fs::File;
2use std::io::{ Read, Cursor, Seek, SeekFrom };
3use zip::ZipArchive;
4use byteorder::{BigEndian, LittleEndian, ReadBytesExt};
5
6use crate::error::DexError;
7
8/* Endianness constants */
9const ENDIAN_CONSTANT: [u8; 4] = [0x12, 0x34, 0x56, 0x78];
10const REVERSE_ENDIAN_CONSTANT: [u8; 4] = [0x78, 0x56, 0x34, 0x12];
11
12#[derive(Debug, Clone, Copy, PartialEq)]
13pub enum DexEndianness {
14    LittleEndian,
15    BigEndian,
16}
17
18#[derive(Debug)]
19pub struct DexReader {
20    pub bytes: Cursor<Vec<u8>>,
21    pub bytes_len: u64,
22    pub endianness: DexEndianness,
23}
24
25impl DexReader {
26    pub fn build_from_file(filepath: &str) -> Vec<DexReader> {
27        let raw_file = File::open(filepath)
28            .unwrap_or_else(|err| panic!("could not open input file: {err}"));
29        let mut zip_file = ZipArchive::new(raw_file)
30            .unwrap_or_else(|err| panic!("could not create ZipArchive object: {err}"));
31
32        let dex_entries_names = zip_file.file_names()
33                                        .filter(|name| name.ends_with(".dex"))
34                                        .map(|name| name.to_string())
35                                        .collect::<Vec<String>>();
36
37        let mut readers = Vec::new();
38        for entry in dex_entries_names.iter() {
39            let mut dex_entry = zip_file.by_name(entry)
40                                        .unwrap_or_else(|_| panic!("cannot find classes.dex in the APK"));
41            let mut raw_dex = Vec::new();
42            dex_entry.read_to_end(&mut raw_dex)
43                     .unwrap_or_else(|err| panic!("Could not read input file: {err}"));
44            let reader = DexReader::build(raw_dex);
45            readers.push(reader);
46        }
47
48        readers
49    }
50
51    pub fn build(raw_dex: Vec<u8>) -> Self {
52        let endianness = DexReader::check_endianness(&raw_dex).unwrap();
53
54        let mut bytes = Cursor::new(raw_dex);
55        let bytes_len = bytes.seek(SeekFrom::End(0)).unwrap();
56        bytes.rewind().unwrap();
57
58        DexReader {
59            bytes,
60            bytes_len,
61            endianness
62        }
63    }
64
65    pub fn check_endianness(bytes: &[u8]) -> Result<DexEndianness, DexError> {
66        // Cannot use self here as we need to know the endianness before anything else
67
68        if bytes.len() < 44 {
69            return Err(DexError::new("Error: DEX header too short"));
70        }
71
72        let endian_tag = &bytes[40..44];
73        // try_into to convert the slice into an array
74        match endian_tag.try_into().unwrap() {
75            ENDIAN_CONSTANT => Ok(DexEndianness::BigEndian),
76            REVERSE_ENDIAN_CONSTANT => Ok(DexEndianness::LittleEndian),
77            _ => panic!("Error: invalid endian tag in DEX header")
78        }
79    }
80
81    /// Check if the cursor is on an even-numbered bytecode offsets
82    /// and, if not, consume data until it is
83    pub fn align_cursor(&mut self) {
84        while ! (self.bytes.position() % 4 == 0) {
85            let _ = self.read_u8().unwrap();
86        }
87    }
88
89    pub fn read_u8(&mut self) -> Result<u8, DexError> {
90        if self.bytes.position() >= self.bytes_len {
91            return Err(DexError::new("Error: no data left to read"));
92        }
93
94        Ok(self.bytes.read_u8().unwrap())
95    }
96
97    pub fn read_u16(&mut self) -> Result<u16, DexError> {
98        if self.bytes.position() > self.bytes_len - 2 {
99            return Err(DexError::new("Error: no data left to read"));
100        }
101
102        match self.endianness {
103            DexEndianness::BigEndian => Ok(self.bytes.read_u16::<BigEndian>().unwrap()),
104            DexEndianness::LittleEndian => Ok(self.bytes.read_u16::<LittleEndian>().unwrap()),
105        }
106    }
107
108    pub fn read_u32(&mut self) -> Result<u32, DexError> {
109        if self.bytes.position() > self.bytes_len - 4 {
110            return Err(DexError::new("Error: no data left to read"));
111        }
112
113        match self.endianness {
114            DexEndianness::BigEndian => Ok(self.bytes.read_u32::<BigEndian>().unwrap()),
115            DexEndianness::LittleEndian => Ok(self.bytes.read_u32::<LittleEndian>().unwrap()),
116        }
117    }
118
119    pub fn read_i32(&mut self) -> Result<i32, DexError> {
120        if self.bytes.position() > self.bytes_len - 4 {
121            return Err(DexError::new("Error: no data left to read"));
122        }
123
124        match self.endianness {
125            DexEndianness::BigEndian => Ok(self.bytes.read_i32::<BigEndian>().unwrap()),
126            DexEndianness::LittleEndian => Ok(self.bytes.read_i32::<LittleEndian>().unwrap()),
127        }
128    }
129
130    pub fn read_uleb128(&mut self) -> Result<(u32, usize), DexError> {
131        let mut bytes_read: usize = 0;
132        let mut result: u32 = 0;
133        let mut shift = 0;
134
135        loop {
136            let byte = self.bytes.read_u8().unwrap();
137            bytes_read += 1;
138            let payload = (byte & 0b0111_1111) as u32;
139            result |= payload << shift;
140            shift += 7;
141
142            if (byte & 0b1000_0000) == 0 {
143                break;
144            }
145
146            if bytes_read >= 5 {
147                return Err(DexError::new("Error: too many bytes in unsigned LEB128 value"));
148            }
149        }
150
151        Ok((result, bytes_read))
152    }
153
154    pub fn read_sleb128(&mut self) -> Result<(i32, usize), DexError> {
155        let mut bytes_read: usize = 0;
156        let mut result: u32 = 0;
157        let mut shift = 0;
158        let mut byte;
159
160        loop {
161            byte = self.bytes.read_u8().unwrap() as u32;
162            bytes_read += 1;
163            let payload = byte & 0b0111_1111;
164            result |= payload << shift;
165
166            shift += 7;
167
168            if (byte & 0b1000_0000) == 0 {
169                break;
170            }
171
172            if bytes_read >= 5 {
173                return Err(DexError::new("Error: too many bytes in unsigned LEB128 value"));
174            }
175        }
176
177        let mut result = result as i32;
178        if (byte & 0b0100_0000) == 0b0100_0000 {
179            /* sign extend */
180            result |= -(1 << shift);
181        }
182
183        Ok((result, bytes_read))
184    }
185
186    pub fn read_uleb128p1(&mut self) -> Result<(i32, usize), DexError> {
187        match self.read_uleb128() {
188            Ok((uleb128, bytes_read)) => Ok(((uleb128 as i32) - 1, bytes_read)),
189            Err(err) => Err(err)
190        }
191    }
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197
198    const DEX_DATA: [u8; 50] = [
199        0x64, 0x65, 0x78, 0x0a, 0x30, 0x33, 0x35, 0x00, 0x00, 0x00,  // DEX magic
200        0x7f, 0xdf, 0x80, 0x01, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00,  // uleb128 data
201        0x7f, 0x80, 0x7f, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00,  // sleb128 data
202        0x7f, 0xdf, 0x00, 0x00, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00,  // uleb128p1 data
203        0x78, 0x56, 0x34, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // endianness tag
204    ];
205
206    #[test]
207    fn test_build() {
208        let dex_reader = DexReader::build(DEX_DATA.to_vec());
209        assert_eq!(dex_reader.bytes_len, DEX_DATA.len() as u64);
210        assert_eq!(dex_reader.endianness, DexEndianness::LittleEndian);
211    }
212
213    #[test]
214    fn test_check_endianness() {
215        let dex_reader = DexReader::build(DEX_DATA.to_vec());
216        let endianness = DexReader::check_endianness(&DEX_DATA).unwrap();
217        assert_eq!(endianness, DexEndianness::LittleEndian);
218        assert_eq!(dex_reader.endianness, endianness);
219
220        let invalid_data = vec![0x00; 10];
221        let error = DexReader::check_endianness(&invalid_data).unwrap_err();
222        assert_eq!(error.message, "Error: DEX header too short");
223    }
224
225    #[test]
226    #[should_panic]
227    fn test_check_invalid_endianess() {
228        let invalid_long_data = vec![0x00; 100];
229        let _ = DexReader::check_endianness(&invalid_long_data).unwrap_err();
230    }
231
232    #[test]
233    fn test_read_u8() {
234        let mut dex_reader = DexReader::build(DEX_DATA.to_vec());
235        let byte = dex_reader.read_u8().unwrap();
236        assert_eq!(byte, 0x64);
237
238        // Test reading at and after end of file
239        dex_reader.bytes.seek(SeekFrom::End(0)).unwrap();
240        let result = dex_reader.read_u8();
241        assert_eq!(
242            result.unwrap_err().to_string(),
243            "Error: no data left to read"
244        );
245
246        let bound = DEX_DATA.len() + 10;
247        dex_reader.bytes.seek(SeekFrom::Start(bound as u64)).unwrap();
248        let result = dex_reader.read_u8();
249        assert_eq!(
250            result.unwrap_err().to_string(),
251            "Error: no data left to read"
252        );
253    }
254
255    #[test]
256    fn test_read_u16() {
257        let mut dex_reader = DexReader::build(DEX_DATA.to_vec());
258        let u16_val = dex_reader.read_u16().unwrap();
259        assert_eq!(u16_val, 0x6564);
260
261        // Test reading at and after end of file
262        dex_reader.bytes.seek(SeekFrom::End(0)).unwrap();
263        let result = dex_reader.read_u16();
264        assert_eq!(
265            result.unwrap_err().to_string(),
266            "Error: no data left to read"
267        );
268
269        let bound = DEX_DATA.len() + 10;
270        dex_reader.bytes.seek(SeekFrom::Start(bound as u64)).unwrap();
271        let result = dex_reader.read_u16();
272        assert_eq!(
273            result.unwrap_err().to_string(),
274            "Error: no data left to read"
275        );
276    }
277
278    #[test]
279    fn test_read_u32() {
280        let mut dex_reader = DexReader::build(DEX_DATA.to_vec());
281        let u32_val = dex_reader.read_u32().unwrap();
282        assert_eq!(u32_val, 0x0a786564);
283
284        // Test reading at and after end of file
285        dex_reader.bytes.seek(SeekFrom::End(0)).unwrap();
286        let result = dex_reader.read_u32();
287        assert_eq!(
288            result.unwrap_err().to_string(),
289            "Error: no data left to read"
290        );
291
292        let bound = DEX_DATA.len() + 10;
293        dex_reader.bytes.seek(SeekFrom::Start(bound as u64)).unwrap();
294        let result = dex_reader.read_u32();
295        assert_eq!(
296            result.unwrap_err().to_string(),
297            "Error: no data left to read"
298        );
299    }
300
301        #[test]
302    fn test_read_uleb128() {
303        let mut reader = DexReader::build(DEX_DATA.to_vec());
304        reader.bytes.seek(SeekFrom::Start(10)).unwrap();
305
306        let result = reader.read_uleb128().unwrap();
307        assert_eq!(result, (0x7f, 1));
308
309        let result = reader.read_uleb128().unwrap();
310        assert_eq!(result, (0x405f, 3));
311
312        let result = reader.read_uleb128();
313        assert_eq!(
314            result.unwrap_err().to_string(),
315            "Error: too many bytes in unsigned LEB128 value"
316        );
317    }
318
319    #[test]
320    fn test_read_sleb128() {
321        let mut reader = DexReader::build(DEX_DATA.to_vec());
322        reader.bytes.seek(SeekFrom::Start(20)).unwrap();
323
324        let result = reader.read_sleb128().unwrap();
325        assert_eq!(result, (-1, 1));
326
327        let result = reader.read_sleb128().unwrap();
328        assert_eq!(result, (-128, 2));
329
330        let result = reader.read_sleb128();
331        assert_eq!(
332            result.unwrap_err().to_string(),
333            "Error: too many bytes in unsigned LEB128 value"
334        );
335    }
336
337    #[test]
338    fn test_read_uleb128p1() {
339        let mut reader = DexReader::build(DEX_DATA.to_vec());
340        reader.bytes.seek(SeekFrom::Start(30)).unwrap();
341
342        let result = reader.read_uleb128p1().unwrap();
343        assert_eq!(result, (0x7e, 1));
344
345        let result = reader.read_uleb128p1().unwrap();
346        assert_eq!(result, (0x5e, 2));
347
348        let result = reader.read_uleb128p1().unwrap();
349        assert_eq!(result, (-1, 1));
350
351        let result = reader.read_uleb128p1();
352        assert_eq!(
353            result.unwrap_err().to_string(),
354            "Error: too many bytes in unsigned LEB128 value"
355        );
356    }
357}