rusty_dex/dex/
strings.rs

1#![allow(dead_code)]
2
3//! Representation of strings
4//!
5//! This module defines the logic to decode strings from a DEX file as well as an ordering method
6//! to ensure that we sort the strings in the order defined in the official documentation.
7//! Strings can be stored either in UTF-16 or (most of the time) in MUTF-8 format.
8
9use std::io::{Seek, SeekFrom};
10use std::io::BufRead;
11use std::cmp::Ordering;
12
13use crate::dex::reader::DexReader;
14use crate::error::DexError;
15
16/// Internal representation of a string
17#[derive(Debug, PartialEq)]
18pub struct DexStringsItem {
19    utf16_size: u32,
20    offset: u32,
21    is_raw: bool,  // sometimes decoding fails but we still need an entry
22                   // in the list so we keep the raw bytes
23    string: String
24}
25
26/// List of strings of a DEX file
27#[derive(Debug)]
28pub struct DexStrings {
29    pub strings: Vec<String>
30}
31
32impl DexStrings {
33    /// Sort the strings of a DEX file
34    fn sort(a: &DexStringsItem, b: &DexStringsItem) -> Ordering {
35        // TODO: this seems to work (tested on app for which I have
36        // the Java source code) but this is not following the
37        // documentation. The documentation says:
38        //
39        // "This list must be sorted by string contents, using UTF-16
40        // code point values (not in a locale-sensitive manner)"
41        //
42        // However for some reason this is giving me issues later on
43        // when decoding e.g., prototypes.
44        a.offset.cmp(&b.offset)
45    }
46
47    /// Parse all strings from a DEX file
48    pub fn build(dex_reader: &mut DexReader, offset: u32, size: u32) -> Result<Self, DexError> {
49        // Move to start of map list
50        dex_reader.bytes.seek(SeekFrom::Start(offset.into()))?;
51
52        let mut strings = Vec::new();
53
54        for _ in 0..size {
55            let string_offset = dex_reader.read_u32()?;
56            let current_offset = dex_reader.bytes.position();
57
58            dex_reader.bytes.seek(SeekFrom::Start(string_offset.into()))?;
59
60            let (utf16_size, _) = dex_reader.read_uleb128()?;
61            if utf16_size > 0 {
62                let mut raw_string = Vec::with_capacity(utf16_size as usize);
63                dex_reader.bytes.read_until(0, &mut raw_string)?;
64                raw_string.pop();
65
66                // TODO: `mutf8::decode()` has some issues which leads to
67                // string ordering issues. For now we use `String::from_utf8_lossy()`
68                // which works as long as the app doesn't actually use UTF-16
69                let decoded = String::from_utf8_lossy(&raw_string).to_string();
70                /*
71                let (decoded, is_raw) = match mutf8::decode(&raw_string) {
72                    Ok(decoded) => (decoded, false),
73                    Err(_) => {
74                        error!("invalid MUTF-8 string");
75                        (String::from(""), true)
76                    }
77                };
78                */
79
80                strings.push(DexStringsItem {
81                    utf16_size,
82                    offset: string_offset,
83                    is_raw: true,
84                    string: decoded,
85                });
86            } else {
87                strings.push(DexStringsItem {
88                    utf16_size,
89                    offset: string_offset,
90                    is_raw: false,
91                    string: String::new(),
92                });
93            }
94
95            dex_reader.bytes.seek(SeekFrom::Start(current_offset))?;
96
97        }
98
99        strings.sort_by(DexStrings::sort);
100        let mut uniq_strings: Vec<String> = strings.into_iter()
101                                                   .map(|x| x.string)
102                                                   .collect();
103        uniq_strings.dedup();
104
105        Ok(DexStrings { strings: uniq_strings })
106    }
107}
108
109#[cfg(test)]
110mod tests {
111    use super::*;
112
113    #[test]
114    fn test_build_with_empty_strings() {
115        let data = vec![
116            0x64, 0x65, 0x78, 0x0a, 0x30, 0x33, 0x35, 0x00, 0x00, 0x00,  // DEX magic
117            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // nothing
118            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // nothing
119            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // nothing
120            0x78, 0x56, 0x34, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // endianness tag
121        ];
122        let mut dex_reader = DexReader::build(data).unwrap();
123        let dex_strings = DexStrings::build(&mut dex_reader, 44, 0).unwrap();
124
125        assert_eq!(dex_strings.strings.len(), 0);
126    }
127
128    #[test]
129    fn test_build_with_non_empty_strings() {
130        let data = vec![
131            0x64, 0x65, 0x78, 0x0a, 0x30, 0x33, 0x35, 0x00, 0x00, 0x00,  // DEX magic
132            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // nothing
133            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // nothing
134            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // nothing
135            0x78, 0x56, 0x34, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // endianness tag
136            // offsets
137            0x3E, 0x00, 0x00, 0x00,
138            0x46, 0x00, 0x00, 0x00,
139            0x68, 0x00, 0x00, 0x00,
140
141            // strings size and data
142            0x0C,
143            b'H', b'e', b'l', b'l', b'o', b'!', 0x00, // string #0 value
144            0x48,
145            b'T', b'h', b'i', b's', b' ', b'i', b's', b' ', b'a', b' ', b't', b'e', b's', b't', b'.', b' ', b'\"', b'A', b'B', b'C', b'D', b'\"', b' ', b'i', b'n', b' ', b'M', b'U', b'T', b'F', b'-', b'8', 0x00, // string #1 value
146            0x00,
147        ];
148
149        let mut dex_reader = DexReader::build(data).unwrap();
150        let dex_strings = DexStrings::build(&mut dex_reader, 50, 3).unwrap();
151
152        assert_eq!(dex_strings.strings.len(), 3);
153        assert_eq!(dex_strings.strings[0], String::from("Hello!"));
154        assert_eq!(dex_strings.strings[1], String::from("This is a test. \"ABCD\" in MUTF-8"));
155        assert_eq!(dex_strings.strings[2], String::from(""));
156    }
157
158    #[test]
159    fn test_build_with_invalid_string() {
160        let data = vec![
161            0x64, 0x65, 0x78, 0x0a, 0x30, 0x33, 0x35, 0x00, 0x00, 0x00,  // DEX magic
162            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // nothing
163            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // nothing
164            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // nothing
165            0x78, 0x56, 0x34, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // endianness tag
166            // offsets
167            0x36, 0x00, 0x00, 0x00,
168
169            // string size and data
170            0x02,
171            0xc3, 0x00    // incomplete MUTF-8 two-byte sequence
172        ];
173
174        let mut dex_reader = DexReader::build(data).unwrap();
175        let dex_strings = DexStrings::build(&mut dex_reader, 50, 1).unwrap();
176
177        assert_eq!(dex_strings.strings.len(), 1);
178        // the invalid MUTF-8 sequence will be "decoded" to � (replacement character)
179        assert_eq!(dex_strings.strings[0], String::from("\u{FFFD}"));
180    }
181}