imessage_database/util/
streamtyped.rs

1/*!
2 The legacy/fallback simple `typedstream` parser.
3
4 Contains logic to parse text from `attributedBody`'s `typedstream` data.
5
6 It is called `streamtyped` because that is the header string contained in the data.
7*/
8
9use crate::error::streamtyped::StreamTypedError;
10
11/// Literals: `[<Start of Heading> (SOH), +]`
12/// - <https://www.compart.com/en/unicode/U+0001>
13/// - <https://www.compart.com/en/unicode/U+002b>
14const START_PATTERN: [u8; 2] = [0x0001, 0x002b];
15
16/// Literals: `[<Start of Selected Area> (SSA), <Index> (IND)]`
17/// - <https://www.compart.com/en/unicode/U+0086>
18/// - <https://www.compart.com/en/unicode/U+0084>
19const END_PATTERN: [u8; 2] = [0x0086, 0x0084];
20
21/// Parse the body [text](crate::tables::messages::message::Message::text) from a known type of `typedstream` `attributedBody` file.
22///
23/// `attributedBody` `typedstream` data looks like:
24///
25/// ```txt
26/// streamtyped���@���NSAttributedString�NSObject����NSString��+Example message  ��iI���� NSDictionary��i����__kIMMessagePartAttributeName����NSNumber��NSValue��*������
27/// ```
28///
29/// In that example, the returned body text would be `"Example message"`.
30pub fn parse(mut stream: Vec<u8>) -> Result<String, StreamTypedError> {
31    // Find the start index and drain
32    for idx in 0..stream.len() {
33        if idx + 2 > stream.len() {
34            return Err(StreamTypedError::NoStartPattern);
35        }
36        let part = &stream[idx..idx + 2];
37
38        if part == START_PATTERN {
39            // Remove the start pattern from the string
40            stream.drain(..idx + 2);
41            break;
42        }
43    }
44
45    // Find the end index and truncate
46    for idx in 1..stream.len() {
47        if idx >= stream.len() - 2 {
48            return Err(StreamTypedError::NoEndPattern);
49        }
50        let part = &stream[idx..idx + 2];
51
52        if part == END_PATTERN {
53            // Remove the end pattern from the string
54            stream.truncate(idx);
55            break;
56        }
57    }
58
59    // `from_utf8` doesn't allocate, but `from_utf8_lossy` does, so we try the allocation-free
60    // version first and only allocate if it fails
61    match String::from_utf8(stream)
62        .map_err(|non_utf8| String::from_utf8_lossy(non_utf8.as_bytes()).into_owned())
63    {
64        // If the bytes are valid unicode, only one char prefixes the actual message
65        // ['\u{6}', 'T', ...] where `T` is the first real char
66        // The prefix char is not always the same
67        Ok(string) => drop_chars(1, string),
68        // If the bytes are not valid unicode, 3 chars prefix the actual message
69        // ['�', '�', '\0', 'T', ...] where `T` is the first real char
70        // The prefix chars are not always the same
71        Err(string) => drop_chars(3, string),
72    }
73}
74
75/// Drop `offset` chars from the front of a String
76fn drop_chars(offset: usize, mut string: String) -> Result<String, StreamTypedError> {
77    // Find the index of the specified character offset
78    let (position, _) = string
79        .char_indices()
80        .nth(offset)
81        .ok_or(StreamTypedError::InvalidPrefix)?;
82
83    // Remove the prefix and give the String back
84    string.drain(..position);
85    Ok(string)
86}
87
88#[cfg(test)]
89mod tests {
90    use std::env::current_dir;
91    use std::fs::File;
92    use std::io::Read;
93    use std::vec;
94
95    use crate::util::streamtyped::{drop_chars, parse};
96
97    #[test]
98    fn test_parse_text_clean() {
99        let plist_path = current_dir()
100            .unwrap()
101            .as_path()
102            .join("test_data/typedstream/AttributedBodyTextOnly");
103        let mut file = File::open(plist_path).unwrap();
104        let mut bytes = vec![];
105        file.read_to_end(&mut bytes).unwrap();
106        let parsed = parse(bytes).unwrap();
107
108        let expected = "Noter test".to_string();
109
110        assert_eq!(parsed, expected);
111    }
112
113    #[test]
114    fn test_parse_text_space() {
115        let plist_path = current_dir()
116            .unwrap()
117            .as_path()
118            .join("test_data/typedstream/AttributedBodyTextOnly2");
119        let mut file = File::open(plist_path).unwrap();
120        let mut bytes = vec![];
121        file.read_to_end(&mut bytes).unwrap();
122        let parsed = parse(bytes).unwrap();
123
124        let expected = "Test 3".to_string();
125
126        assert_eq!(parsed, expected);
127    }
128
129    #[test]
130    fn test_parse_text_weird_font() {
131        let plist_path = current_dir()
132            .unwrap()
133            .as_path()
134            .join("test_data/typedstream/WeirdText");
135        let mut file = File::open(plist_path).unwrap();
136        let mut bytes = vec![];
137        file.read_to_end(&mut bytes).unwrap();
138        let parsed = parse(bytes).unwrap();
139
140        let expected = "𝖍𝖊𝖑𝖑𝖔 𝖜𝖔𝖗𝖑𝖉".to_string();
141
142        assert_eq!(parsed, expected);
143    }
144
145    #[test]
146    fn test_parse_text_url() {
147        let plist_path = current_dir()
148            .unwrap()
149            .as_path()
150            .join("test_data/typedstream/URL");
151        let mut file = File::open(plist_path).unwrap();
152        let mut bytes = vec![];
153        file.read_to_end(&mut bytes).unwrap();
154        let parsed = parse(bytes).unwrap();
155
156        let expected = "https://github.com/ReagentX/Logria".to_string();
157
158        assert_eq!(parsed, expected);
159    }
160
161    #[test]
162    fn test_parse_text_multi_part() {
163        let plist_path = current_dir()
164            .unwrap()
165            .as_path()
166            .join("test_data/typedstream/MultiPart");
167        let mut file = File::open(plist_path).unwrap();
168        let mut bytes = vec![];
169        file.read_to_end(&mut bytes).unwrap();
170        let parsed = parse(bytes).unwrap();
171
172        let expected = "\u{FFFC}test 1\u{FFFC}test 2 \u{FFFC}test 3".to_string();
173
174        assert_eq!(parsed, expected);
175    }
176
177    #[test]
178    fn test_parse_text_app() {
179        let plist_path = current_dir()
180            .unwrap()
181            .as_path()
182            .join("test_data/typedstream/ExtraData");
183        let mut file = File::open(plist_path).unwrap();
184        let mut bytes = vec![];
185        file.read_to_end(&mut bytes).unwrap();
186        let parsed = parse(bytes).unwrap();
187
188        let expected = "This is parsing";
189
190        assert_eq!(&parsed[..expected.len()], expected);
191    }
192
193    #[test]
194    fn test_parse_text_long() {
195        let plist_path = current_dir()
196            .unwrap()
197            .as_path()
198            .join("test_data/typedstream/LongMessage");
199        let mut file = File::open(plist_path).unwrap();
200        let mut bytes = vec![];
201        file.read_to_end(&mut bytes).unwrap();
202        let parsed = parse(bytes).unwrap();
203
204        let expected = "Sed nibh velit,";
205
206        assert_eq!(&parsed[..expected.len()], expected);
207        assert_eq!(parsed.len(), 2359);
208    }
209
210    #[test]
211    fn test_parse_text_blank() {
212        let plist_path = current_dir()
213            .unwrap()
214            .as_path()
215            .join("test_data/typedstream/Blank");
216        let mut file = File::open(plist_path).unwrap();
217        let mut bytes = vec![];
218        file.read_to_end(&mut bytes).unwrap();
219        let parsed = parse(bytes);
220
221        assert!(&parsed.is_err());
222    }
223
224    #[test]
225    fn test_parse_text_multi_part_deleted() {
226        let plist_path = current_dir()
227            .unwrap()
228            .as_path()
229            .join("test_data/typedstream/MultiPartWithDeleted");
230        let mut file = File::open(plist_path).unwrap();
231        let mut bytes = vec![];
232        file.read_to_end(&mut bytes).unwrap();
233        let parsed = parse(bytes).unwrap();
234        println!("{parsed:?}");
235
236        let expected = "From arbitrary byte stream:\r\u{FFFC}To native Rust data structures:\r";
237
238        assert_eq!(parsed, expected);
239    }
240
241    #[test]
242    fn test_parse_text_attachment() {
243        let plist_path = current_dir()
244            .unwrap()
245            .as_path()
246            .join("test_data/typedstream/Attachment");
247        let mut file = File::open(plist_path).unwrap();
248        let mut bytes = vec![];
249        file.read_to_end(&mut bytes).unwrap();
250        let parsed = parse(bytes).unwrap();
251        println!("{parsed:?}");
252
253        let expected =
254            "\u{FFFC}This is how the notes look to me fyi, in case it helps make sense of anything";
255
256        assert_eq!(parsed, expected);
257    }
258
259    #[test]
260    fn test_parse_text_array() {
261        let plist_path = current_dir()
262            .unwrap()
263            .as_path()
264            .join("test_data/typedstream/Array");
265        let mut file = File::open(plist_path).unwrap();
266        let mut bytes = vec![];
267        file.read_to_end(&mut bytes).unwrap();
268        let parsed = parse(bytes).unwrap();
269        println!("{parsed:?}");
270
271        let expected = "A single ChatGPT instance takes 5MW of power to run";
272
273        assert_eq!(parsed, expected);
274    }
275
276    #[test]
277    fn test_can_drop_chars() {
278        assert_eq!(
279            drop_chars(1, String::from("Hello world")).unwrap(),
280            String::from("ello world")
281        );
282    }
283
284    #[test]
285    fn test_can_drop_chars_none() {
286        assert_eq!(
287            drop_chars(0, String::from("Hello world")).unwrap(),
288            String::from("Hello world")
289        );
290    }
291
292    #[test]
293    fn test_cant_drop_all() {
294        assert!(drop_chars(1000, String::from("Hello world")).is_err());
295    }
296}