use super::record::EscherRecord;
use super::container::EscherContainer;
use super::types::EscherRecordType;
use crate::ole::ppt::records::PptRecord;
use crate::ole::ppt::package::Result;
use crate::ole::consts::PptRecordType;
pub fn extract_text_from_escher(escher_data: &[u8]) -> Result<String> {
let mut text_parts = Vec::new();
let parser = super::parser::EscherParser::new(escher_data);
if let Some(root_result) = parser.root_container() {
let root = root_result?;
extract_text_from_container(&root, &mut text_parts);
}
Ok(if text_parts.is_empty() {
String::new()
} else {
text_parts.join("\n")
})
}
fn extract_text_from_container(container: &EscherContainer, text_parts: &mut Vec<String>) {
for child_result in container.children() {
if let Ok(child) = child_result {
match child.record_type {
EscherRecordType::ClientTextbox => {
if let Some(text) = extract_text_from_textbox(&child) {
if !text.trim().is_empty() {
text_parts.push(text);
}
}
}
EscherRecordType::SpContainer => {
let sp_container = EscherContainer::new(child);
extract_text_from_container(&sp_container, text_parts);
}
_ if child.is_container() => {
let child_container = EscherContainer::new(child);
extract_text_from_container(&child_container, text_parts);
}
_ => {}
}
}
}
}
pub(crate) fn extract_text_from_textbox(textbox: &EscherRecord) -> Option<String> {
if textbox.data.is_empty() {
return None;
}
let mut offset = 0;
let mut text_parts = Vec::new();
while offset + 8 <= textbox.data.len() {
match PptRecord::parse(textbox.data, offset) {
Ok((record, consumed)) => {
if let Ok(record_text) = record.extract_text() {
let trimmed = record_text.trim();
if !trimmed.is_empty() {
text_parts.push(trimmed.to_string());
}
}
extract_text_from_ppt_record(&record, &mut text_parts);
offset += consumed;
if consumed == 0 {
break;
}
}
Err(_) => {
offset += 1;
}
}
}
if text_parts.is_empty() {
None
} else {
Some(text_parts.join("\n"))
}
}
fn extract_text_from_ppt_record(record: &PptRecord, text_parts: &mut Vec<String>) {
match record.record_type {
PptRecordType::TextCharsAtom | PptRecordType::TextBytesAtom | PptRecordType::CString => {
if let Ok(text) = record.extract_text() {
let trimmed = text.trim();
if !trimmed.is_empty() {
text_parts.push(trimmed.to_string());
}
}
}
_ => {}
}
for child in &record.children {
extract_text_from_ppt_record(child, text_parts);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_escher_data() {
let data = vec![];
let text = extract_text_from_escher(&data).unwrap();
assert_eq!(text, "");
}
#[test]
fn test_escher_without_text() {
let data = vec![
0x0F, 0x00, 0x02, 0xF0, 0x04, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, ];
let text = extract_text_from_escher(&data).unwrap();
assert_eq!(text, "");
}
}