1use std::fs::File;
2use std::io::{ Read, Cursor, Seek, SeekFrom };
3use zip::ZipArchive;
4use byteorder::{BigEndian, LittleEndian, ReadBytesExt};
5
6use crate::error::DexError;
7
8const ENDIAN_CONSTANT: [u8; 4] = [0x12, 0x34, 0x56, 0x78];
10const REVERSE_ENDIAN_CONSTANT: [u8; 4] = [0x78, 0x56, 0x34, 0x12];
11
12#[derive(Debug, Clone, Copy, PartialEq)]
13pub enum DexEndianness {
14 LittleEndian,
15 BigEndian,
16}
17
18#[derive(Debug)]
19pub struct DexReader {
20 pub bytes: Cursor<Vec<u8>>,
21 pub bytes_len: u64,
22 pub endianness: DexEndianness,
23}
24
25impl DexReader {
26 pub fn build_from_file(filepath: &str) -> Vec<DexReader> {
27 let raw_file = File::open(filepath)
28 .unwrap_or_else(|err| panic!("could not open input file: {err}"));
29 let mut zip_file = ZipArchive::new(raw_file)
30 .unwrap_or_else(|err| panic!("could not create ZipArchive object: {err}"));
31
32 let dex_entries_names = zip_file.file_names()
33 .filter(|name| name.ends_with(".dex"))
34 .map(|name| name.to_string())
35 .collect::<Vec<String>>();
36
37 let mut readers = Vec::new();
38 for entry in dex_entries_names.iter() {
39 let mut dex_entry = zip_file.by_name(entry)
40 .unwrap_or_else(|_| panic!("cannot find classes.dex in the APK"));
41 let mut raw_dex = Vec::new();
42 dex_entry.read_to_end(&mut raw_dex)
43 .unwrap_or_else(|err| panic!("Could not read input file: {err}"));
44 let reader = DexReader::build(raw_dex);
45 readers.push(reader);
46 }
47
48 readers
49 }
50
51 pub fn build(raw_dex: Vec<u8>) -> Self {
52 let endianness = DexReader::check_endianness(&raw_dex).unwrap();
53
54 let mut bytes = Cursor::new(raw_dex);
55 let bytes_len = bytes.seek(SeekFrom::End(0)).unwrap();
56 bytes.rewind().unwrap();
57
58 DexReader {
59 bytes,
60 bytes_len,
61 endianness
62 }
63 }
64
65 pub fn check_endianness(bytes: &[u8]) -> Result<DexEndianness, DexError> {
66 if bytes.len() < 44 {
69 return Err(DexError::new("Error: DEX header too short"));
70 }
71
72 let endian_tag = &bytes[40..44];
73 match endian_tag.try_into().unwrap() {
75 ENDIAN_CONSTANT => Ok(DexEndianness::BigEndian),
76 REVERSE_ENDIAN_CONSTANT => Ok(DexEndianness::LittleEndian),
77 _ => panic!("Error: invalid endian tag in DEX header")
78 }
79 }
80
81 pub fn align_cursor(&mut self) {
84 while ! (self.bytes.position() % 4 == 0) {
85 let _ = self.read_u8().unwrap();
86 }
87 }
88
89 pub fn read_u8(&mut self) -> Result<u8, DexError> {
90 if self.bytes.position() >= self.bytes_len {
91 return Err(DexError::new("Error: no data left to read"));
92 }
93
94 Ok(self.bytes.read_u8().unwrap())
95 }
96
97 pub fn read_u16(&mut self) -> Result<u16, DexError> {
98 if self.bytes.position() > self.bytes_len - 2 {
99 return Err(DexError::new("Error: no data left to read"));
100 }
101
102 match self.endianness {
103 DexEndianness::BigEndian => Ok(self.bytes.read_u16::<BigEndian>().unwrap()),
104 DexEndianness::LittleEndian => Ok(self.bytes.read_u16::<LittleEndian>().unwrap()),
105 }
106 }
107
108 pub fn read_u32(&mut self) -> Result<u32, DexError> {
109 if self.bytes.position() > self.bytes_len - 4 {
110 return Err(DexError::new("Error: no data left to read"));
111 }
112
113 match self.endianness {
114 DexEndianness::BigEndian => Ok(self.bytes.read_u32::<BigEndian>().unwrap()),
115 DexEndianness::LittleEndian => Ok(self.bytes.read_u32::<LittleEndian>().unwrap()),
116 }
117 }
118
119 pub fn read_i32(&mut self) -> Result<i32, DexError> {
120 if self.bytes.position() > self.bytes_len - 4 {
121 return Err(DexError::new("Error: no data left to read"));
122 }
123
124 match self.endianness {
125 DexEndianness::BigEndian => Ok(self.bytes.read_i32::<BigEndian>().unwrap()),
126 DexEndianness::LittleEndian => Ok(self.bytes.read_i32::<LittleEndian>().unwrap()),
127 }
128 }
129
130 pub fn read_uleb128(&mut self) -> Result<(u32, usize), DexError> {
131 let mut bytes_read: usize = 0;
132 let mut result: u32 = 0;
133 let mut shift = 0;
134
135 loop {
136 let byte = self.bytes.read_u8().unwrap();
137 bytes_read += 1;
138 let payload = (byte & 0b0111_1111) as u32;
139 result |= payload << shift;
140 shift += 7;
141
142 if (byte & 0b1000_0000) == 0 {
143 break;
144 }
145
146 if bytes_read >= 5 {
147 return Err(DexError::new("Error: too many bytes in unsigned LEB128 value"));
148 }
149 }
150
151 Ok((result, bytes_read))
152 }
153
154 pub fn read_sleb128(&mut self) -> Result<(i32, usize), DexError> {
155 let mut bytes_read: usize = 0;
156 let mut result: u32 = 0;
157 let mut shift = 0;
158 let mut byte;
159
160 loop {
161 byte = self.bytes.read_u8().unwrap() as u32;
162 bytes_read += 1;
163 let payload = byte & 0b0111_1111;
164 result |= payload << shift;
165
166 shift += 7;
167
168 if (byte & 0b1000_0000) == 0 {
169 break;
170 }
171
172 if bytes_read >= 5 {
173 return Err(DexError::new("Error: too many bytes in unsigned LEB128 value"));
174 }
175 }
176
177 let mut result = result as i32;
178 if (byte & 0b0100_0000) == 0b0100_0000 {
179 result |= -(1 << shift);
181 }
182
183 Ok((result, bytes_read))
184 }
185
186 pub fn read_uleb128p1(&mut self) -> Result<(i32, usize), DexError> {
187 match self.read_uleb128() {
188 Ok((uleb128, bytes_read)) => Ok(((uleb128 as i32) - 1, bytes_read)),
189 Err(err) => Err(err)
190 }
191 }
192}
193
194#[cfg(test)]
195mod tests {
196 use super::*;
197
198 const DEX_DATA: [u8; 50] = [
199 0x64, 0x65, 0x78, 0x0a, 0x30, 0x33, 0x35, 0x00, 0x00, 0x00, 0x7f, 0xdf, 0x80, 0x01, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x7f, 0x80, 0x7f, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x7f, 0xdf, 0x00, 0x00, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x78, 0x56, 0x34, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ];
205
206 #[test]
207 fn test_build() {
208 let dex_reader = DexReader::build(DEX_DATA.to_vec());
209 assert_eq!(dex_reader.bytes_len, DEX_DATA.len() as u64);
210 assert_eq!(dex_reader.endianness, DexEndianness::LittleEndian);
211 }
212
213 #[test]
214 fn test_check_endianness() {
215 let dex_reader = DexReader::build(DEX_DATA.to_vec());
216 let endianness = DexReader::check_endianness(&DEX_DATA).unwrap();
217 assert_eq!(endianness, DexEndianness::LittleEndian);
218 assert_eq!(dex_reader.endianness, endianness);
219
220 let invalid_data = vec![0x00; 10];
221 let error = DexReader::check_endianness(&invalid_data).unwrap_err();
222 assert_eq!(error.message, "Error: DEX header too short");
223 }
224
225 #[test]
226 #[should_panic]
227 fn test_check_invalid_endianess() {
228 let invalid_long_data = vec![0x00; 100];
229 let _ = DexReader::check_endianness(&invalid_long_data).unwrap_err();
230 }
231
232 #[test]
233 fn test_read_u8() {
234 let mut dex_reader = DexReader::build(DEX_DATA.to_vec());
235 let byte = dex_reader.read_u8().unwrap();
236 assert_eq!(byte, 0x64);
237
238 dex_reader.bytes.seek(SeekFrom::End(0)).unwrap();
240 let result = dex_reader.read_u8();
241 assert_eq!(
242 result.unwrap_err().to_string(),
243 "Error: no data left to read"
244 );
245
246 let bound = DEX_DATA.len() + 10;
247 dex_reader.bytes.seek(SeekFrom::Start(bound as u64)).unwrap();
248 let result = dex_reader.read_u8();
249 assert_eq!(
250 result.unwrap_err().to_string(),
251 "Error: no data left to read"
252 );
253 }
254
255 #[test]
256 fn test_read_u16() {
257 let mut dex_reader = DexReader::build(DEX_DATA.to_vec());
258 let u16_val = dex_reader.read_u16().unwrap();
259 assert_eq!(u16_val, 0x6564);
260
261 dex_reader.bytes.seek(SeekFrom::End(0)).unwrap();
263 let result = dex_reader.read_u16();
264 assert_eq!(
265 result.unwrap_err().to_string(),
266 "Error: no data left to read"
267 );
268
269 let bound = DEX_DATA.len() + 10;
270 dex_reader.bytes.seek(SeekFrom::Start(bound as u64)).unwrap();
271 let result = dex_reader.read_u16();
272 assert_eq!(
273 result.unwrap_err().to_string(),
274 "Error: no data left to read"
275 );
276 }
277
278 #[test]
279 fn test_read_u32() {
280 let mut dex_reader = DexReader::build(DEX_DATA.to_vec());
281 let u32_val = dex_reader.read_u32().unwrap();
282 assert_eq!(u32_val, 0x0a786564);
283
284 dex_reader.bytes.seek(SeekFrom::End(0)).unwrap();
286 let result = dex_reader.read_u32();
287 assert_eq!(
288 result.unwrap_err().to_string(),
289 "Error: no data left to read"
290 );
291
292 let bound = DEX_DATA.len() + 10;
293 dex_reader.bytes.seek(SeekFrom::Start(bound as u64)).unwrap();
294 let result = dex_reader.read_u32();
295 assert_eq!(
296 result.unwrap_err().to_string(),
297 "Error: no data left to read"
298 );
299 }
300
301 #[test]
302 fn test_read_uleb128() {
303 let mut reader = DexReader::build(DEX_DATA.to_vec());
304 reader.bytes.seek(SeekFrom::Start(10)).unwrap();
305
306 let result = reader.read_uleb128().unwrap();
307 assert_eq!(result, (0x7f, 1));
308
309 let result = reader.read_uleb128().unwrap();
310 assert_eq!(result, (0x405f, 3));
311
312 let result = reader.read_uleb128();
313 assert_eq!(
314 result.unwrap_err().to_string(),
315 "Error: too many bytes in unsigned LEB128 value"
316 );
317 }
318
319 #[test]
320 fn test_read_sleb128() {
321 let mut reader = DexReader::build(DEX_DATA.to_vec());
322 reader.bytes.seek(SeekFrom::Start(20)).unwrap();
323
324 let result = reader.read_sleb128().unwrap();
325 assert_eq!(result, (-1, 1));
326
327 let result = reader.read_sleb128().unwrap();
328 assert_eq!(result, (-128, 2));
329
330 let result = reader.read_sleb128();
331 assert_eq!(
332 result.unwrap_err().to_string(),
333 "Error: too many bytes in unsigned LEB128 value"
334 );
335 }
336
337 #[test]
338 fn test_read_uleb128p1() {
339 let mut reader = DexReader::build(DEX_DATA.to_vec());
340 reader.bytes.seek(SeekFrom::Start(30)).unwrap();
341
342 let result = reader.read_uleb128p1().unwrap();
343 assert_eq!(result, (0x7e, 1));
344
345 let result = reader.read_uleb128p1().unwrap();
346 assert_eq!(result, (0x5e, 2));
347
348 let result = reader.read_uleb128p1().unwrap();
349 assert_eq!(result, (-1, 1));
350
351 let result = reader.read_uleb128p1();
352 assert_eq!(
353 result.unwrap_err().to_string(),
354 "Error: too many bytes in unsigned LEB128 value"
355 );
356 }
357}