imessage_database/util/
streamtyped.rsuse crate::error::streamtyped::StreamTypedError;
const START_PATTERN: [u8; 2] = [0x0001, 0x002b];
const END_PATTERN: [u8; 2] = [0x0086, 0x0084];
pub fn parse(mut stream: Vec<u8>) -> Result<String, StreamTypedError> {
for idx in 0..stream.len() {
if idx + 2 > stream.len() {
return Err(StreamTypedError::NoStartPattern);
}
let part = &stream[idx..idx + 2];
if part == START_PATTERN {
stream.drain(..idx + 2);
break;
}
}
for idx in 1..stream.len() {
if idx >= stream.len() - 2 {
return Err(StreamTypedError::NoEndPattern);
}
let part = &stream[idx..idx + 2];
if part == END_PATTERN {
stream.truncate(idx);
break;
}
}
match String::from_utf8(stream)
.map_err(|non_utf8| String::from_utf8_lossy(non_utf8.as_bytes()).into_owned())
{
Ok(string) => drop_chars(1, string),
Err(string) => drop_chars(3, string),
}
}
fn drop_chars(offset: usize, mut string: String) -> Result<String, StreamTypedError> {
let (position, _) = string
.char_indices()
.nth(offset)
.ok_or(StreamTypedError::InvalidPrefix)?;
string.drain(..position);
Ok(string)
}
#[cfg(test)]
mod tests {
use std::env::current_dir;
use std::fs::File;
use std::io::Read;
use std::vec;
use crate::util::streamtyped::{drop_chars, parse};
#[test]
fn test_parse_text_clean() {
let plist_path = current_dir()
.unwrap()
.as_path()
.join("test_data/typedstream/AttributedBodyTextOnly");
let mut file = File::open(plist_path).unwrap();
let mut bytes = vec![];
file.read_to_end(&mut bytes).unwrap();
let parsed = parse(bytes).unwrap();
let expected = "Noter test".to_string();
assert_eq!(parsed, expected);
}
#[test]
fn test_parse_text_space() {
let plist_path = current_dir()
.unwrap()
.as_path()
.join("test_data/typedstream/AttributedBodyTextOnly2");
let mut file = File::open(plist_path).unwrap();
let mut bytes = vec![];
file.read_to_end(&mut bytes).unwrap();
let parsed = parse(bytes).unwrap();
let expected = "Test 3".to_string();
assert_eq!(parsed, expected);
}
#[test]
fn test_parse_text_weird_font() {
let plist_path = current_dir()
.unwrap()
.as_path()
.join("test_data/typedstream/WeirdText");
let mut file = File::open(plist_path).unwrap();
let mut bytes = vec![];
file.read_to_end(&mut bytes).unwrap();
let parsed = parse(bytes).unwrap();
let expected = "𝖍𝖊𝖑𝖑𝖔 𝖜𝖔𝖗𝖑𝖉".to_string();
assert_eq!(parsed, expected);
}
#[test]
fn test_parse_text_url() {
let plist_path = current_dir()
.unwrap()
.as_path()
.join("test_data/typedstream/URL");
let mut file = File::open(plist_path).unwrap();
let mut bytes = vec![];
file.read_to_end(&mut bytes).unwrap();
let parsed = parse(bytes).unwrap();
let expected = "https://github.com/ReagentX/Logria".to_string();
assert_eq!(parsed, expected);
}
#[test]
fn test_parse_text_multi_part() {
let plist_path = current_dir()
.unwrap()
.as_path()
.join("test_data/typedstream/MultiPart");
let mut file = File::open(plist_path).unwrap();
let mut bytes = vec![];
file.read_to_end(&mut bytes).unwrap();
let parsed = parse(bytes).unwrap();
let expected = "\u{FFFC}test 1\u{FFFC}test 2 \u{FFFC}test 3".to_string();
assert_eq!(parsed, expected);
}
#[test]
fn test_parse_text_app() {
let plist_path = current_dir()
.unwrap()
.as_path()
.join("test_data/typedstream/ExtraData");
let mut file = File::open(plist_path).unwrap();
let mut bytes = vec![];
file.read_to_end(&mut bytes).unwrap();
let parsed = parse(bytes).unwrap();
let expected = "This is parsing";
assert_eq!(&parsed[..expected.len()], expected);
}
#[test]
fn test_parse_text_long() {
let plist_path = current_dir()
.unwrap()
.as_path()
.join("test_data/typedstream/LongMessage");
let mut file = File::open(plist_path).unwrap();
let mut bytes = vec![];
file.read_to_end(&mut bytes).unwrap();
let parsed = parse(bytes).unwrap();
let expected = "Sed nibh velit,";
assert_eq!(&parsed[..expected.len()], expected);
assert_eq!(parsed.len(), 2359);
}
#[test]
fn test_parse_text_blank() {
let plist_path = current_dir()
.unwrap()
.as_path()
.join("test_data/typedstream/Blank");
let mut file = File::open(plist_path).unwrap();
let mut bytes = vec![];
file.read_to_end(&mut bytes).unwrap();
let parsed = parse(bytes);
assert!(&parsed.is_err());
}
#[test]
fn test_parse_text_multi_part_deleted() {
let plist_path = current_dir()
.unwrap()
.as_path()
.join("test_data/typedstream/MultiPartWithDeleted");
let mut file = File::open(plist_path).unwrap();
let mut bytes = vec![];
file.read_to_end(&mut bytes).unwrap();
let parsed = parse(bytes).unwrap();
println!("{:?}", parsed);
let expected = "From arbitrary byte stream:\r\u{FFFC}To native Rust data structures:\r";
assert_eq!(parsed, expected);
}
#[test]
fn test_parse_text_attachment() {
let plist_path = current_dir()
.unwrap()
.as_path()
.join("test_data/typedstream/Attachment");
let mut file = File::open(plist_path).unwrap();
let mut bytes = vec![];
file.read_to_end(&mut bytes).unwrap();
let parsed = parse(bytes).unwrap();
println!("{:?}", parsed);
let expected =
"\u{FFFC}This is how the notes look to me fyi, in case it helps make sense of anything";
assert_eq!(parsed, expected);
}
#[test]
fn test_parse_text_array() {
let plist_path = current_dir()
.unwrap()
.as_path()
.join("test_data/typedstream/Array");
let mut file = File::open(plist_path).unwrap();
let mut bytes = vec![];
file.read_to_end(&mut bytes).unwrap();
let parsed = parse(bytes).unwrap();
println!("{:?}", parsed);
let expected = "A single ChatGPT instance takes 5MW of power to run";
assert_eq!(parsed, expected);
}
#[test]
fn test_can_drop_chars() {
assert_eq!(
drop_chars(1, String::from("Hello world")).unwrap(),
String::from("ello world")
);
}
#[test]
fn test_can_drop_chars_none() {
assert_eq!(
drop_chars(0, String::from("Hello world")).unwrap(),
String::from("Hello world")
);
}
#[test]
fn test_cant_drop_all() {
assert!(drop_chars(1000, String::from("Hello world")).is_err());
}
}