use crate::ole::ppt::package::Result;
use zerocopy::{byteorder::{U16, LittleEndian}, FromBytes};
#[derive(Debug, Clone, Copy)]
pub(crate) enum TextCharAction {
Add(char),
Stop,
Skip,
}
impl TextCharAction {
pub(crate) fn process_utf16_char(code_unit: u16) -> Self {
match code_unit {
0 => TextCharAction::Stop,
0x01..=0x7F => {
if let Some(ch) = char::from_u32(code_unit as u32) {
TextCharAction::Add(ch)
} else {
TextCharAction::Skip
}
}
0x80.. => {
if let Some(ch) = char::from_u32(code_unit as u32) {
TextCharAction::Add(ch)
} else {
TextCharAction::Skip
}
}
}
}
}
pub fn parse_text_chars_atom(data: &[u8]) -> Result<String> {
if data.is_empty() {
return Ok(String::new());
}
let text = from_utf16le_lossy(data);
let text = text.trim_end_matches('\r').trim_end_matches('\u{0}').to_string();
Ok(text)
}
pub fn from_utf16le_lossy(bytes: &[u8]) -> String {
if bytes.is_empty() {
return String::new();
}
let estimated_chars = bytes.len() / 2;
let mut result = String::with_capacity(estimated_chars);
let mut i = 0;
while i + 1 < bytes.len() {
let code_unit = U16::<LittleEndian>::read_from_bytes(&bytes[i..i + 2])
.map(|v| v.get())
.unwrap_or(0);
i += 2;
match TextCharAction::process_utf16_char(code_unit) {
TextCharAction::Add(ch) => result.push(ch),
TextCharAction::Stop => break,
TextCharAction::Skip => continue,
}
}
result.shrink_to_fit();
result
}
pub fn parse_text_bytes_atom(data: &[u8]) -> Result<String> {
if data.is_empty() {
return Ok(String::new());
}
let text = data.iter().map(|&b| b as char).collect::<String>();
let text = text.trim_end_matches('\r').trim_end_matches('\u{0}').to_string();
Ok(text)
}
pub fn parse_cstring(data: &[u8]) -> Result<String> {
let null_pos = data.iter().position(|&b| b == 0).unwrap_or(data.len());
let text = String::from_utf8_lossy(&data[..null_pos]).to_string();
let text = text.trim_end_matches('\r').to_string();
if text == "___PPT10" || text == "Default Design" || text.is_empty() {
return Ok(String::new());
}
let printable_count = text.chars().filter(|c| c.is_alphanumeric() || c.is_whitespace() || c.is_ascii_punctuation()).count();
let total_count = text.chars().count();
if total_count > 0 && (printable_count as f32 / total_count as f32) < 0.8 {
return Ok(String::new());
}
Ok(text)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_text_chars_atom_parsing() {
let text_data = vec![
0x48, 0x00, 0x65, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0x6F, 0x00, 0x00, 0x00, ];
let text = parse_text_chars_atom(&text_data).unwrap();
assert_eq!(text, "Hello");
}
#[test]
fn test_text_bytes_atom_parsing() {
let text_data = b"Hello World";
let text = parse_text_bytes_atom(text_data).unwrap();
assert_eq!(text, "Hello World");
}
#[test]
fn test_cstring_filtering() {
let text = parse_cstring(b"___PPT10\0").unwrap();
assert_eq!(text, "");
let text = parse_cstring(b"Default Design\0").unwrap();
assert_eq!(text, "");
let text = parse_cstring(b"Normal Text\0").unwrap();
assert_eq!(text, "Normal Text");
}
}