use std::io::{Cursor, Read};
use crate::error::{Error, Result};
use crate::ole::container::OleFile;
use crate::oleobj::native_stream::OleNativeStream;
use crate::ooxml::relationships::{self, Relationship};
#[derive(Debug, Clone)]
pub struct EmbeddedObject {
pub source: String,
pub filename: String,
pub src_path: String,
pub temp_path: String,
pub data: Vec<u8>,
}
#[derive(Debug, Clone)]
pub struct CustomUiInfo {
pub part_name: String,
pub rel_type: String,
}
pub struct OleObjExtractor {
data: Vec<u8>,
is_ole: bool,
is_ooxml: bool,
}
impl OleObjExtractor {
pub fn from_bytes(data: &[u8]) -> Result<Self> {
let is_ole = OleFile::is_ole(data);
let is_ooxml = data.len() >= 4 && data[0..4] == [0x50, 0x4B, 0x03, 0x04];
if !is_ole && !is_ooxml {
return Err(Error::UnsupportedFormat(
"Not an OLE or OOXML file".into(),
));
}
Ok(Self {
data: data.to_vec(),
is_ole,
is_ooxml,
})
}
pub fn extract_objects(&self) -> Result<Vec<EmbeddedObject>> {
if self.is_ole {
self.extract_objects_ole()
} else if self.is_ooxml {
self.extract_objects_ooxml()
} else {
Ok(Vec::new())
}
}
pub fn find_external_relationships(&self) -> Result<Vec<Relationship>> {
if !self.is_ooxml {
return Ok(Vec::new());
}
let cursor = Cursor::new(&self.data);
let mut archive = zip::ZipArchive::new(cursor)
.map_err(|e| Error::InvalidOoxml(format!("Invalid ZIP: {e}")))?;
let mut all_external = Vec::new();
let rels_files: Vec<String> = (0..archive.len())
.filter_map(|i| {
archive
.by_index(i)
.ok()
.filter(|e| e.name().ends_with(".rels"))
.map(|e| e.name().to_string())
})
.collect();
for rels_path in &rels_files {
let mut rels_data = Vec::new();
if let Ok(mut entry) = archive.by_name(rels_path) {
entry.read_to_end(&mut rels_data)?;
}
if rels_data.is_empty() {
continue;
}
if let Ok(rels) = relationships::parse_relationships(&rels_data) {
let external = relationships::find_external_relationships(&rels);
for rel in external {
all_external.push(rel.clone());
}
}
}
Ok(all_external)
}
pub fn find_custom_ui(&self) -> Result<Vec<CustomUiInfo>> {
if !self.is_ooxml {
return Ok(Vec::new());
}
let cursor = Cursor::new(&self.data);
let mut archive = zip::ZipArchive::new(cursor)
.map_err(|e| Error::InvalidOoxml(format!("Invalid ZIP: {e}")))?;
let mut custom_ui = Vec::new();
let rels_files: Vec<String> = (0..archive.len())
.filter_map(|i| {
archive
.by_index(i)
.ok()
.filter(|e| e.name().ends_with(".rels"))
.map(|e| e.name().to_string())
})
.collect();
for rels_path in &rels_files {
let mut rels_data = Vec::new();
if let Ok(mut entry) = archive.by_name(rels_path) {
entry.read_to_end(&mut rels_data)?;
}
if rels_data.is_empty() {
continue;
}
if let Ok(rels) = relationships::parse_relationships(&rels_data) {
for rel in &rels {
if rel.rel_type.contains("customUI")
|| rel.rel_type.contains("customui")
{
custom_ui.push(CustomUiInfo {
part_name: rel.target.clone(),
rel_type: rel.rel_type.clone(),
});
}
}
}
}
Ok(custom_ui)
}
fn extract_objects_ole(&self) -> Result<Vec<EmbeddedObject>> {
let mut ole = OleFile::from_bytes(&self.data)?;
let streams = ole.list_streams();
let mut objects = Vec::new();
for stream_path in &streams {
let lower = stream_path.to_lowercase();
if lower.contains("ole10native") {
let data = ole.open_stream(stream_path)?;
match OleNativeStream::parse(&data) {
Ok(native) => {
objects.push(EmbeddedObject {
source: stream_path.clone(),
filename: native.filename.clone(),
src_path: native.src_path.clone(),
temp_path: native.temp_path.clone(),
data: native.data,
});
}
Err(e) => {
log::debug!(
"Failed to parse OleNativeStream from {}: {}",
stream_path,
e
);
}
}
}
}
if ole.exists("/ObjectPool") || ole.exists("ObjectPool") {
let pool_streams: Vec<_> = streams
.iter()
.filter(|s| {
let lower = s.to_lowercase();
lower.starts_with("/objectpool") || lower.starts_with("objectpool")
})
.cloned()
.collect();
for stream_path in &pool_streams {
if !stream_path.to_lowercase().contains("ole10native") {
if ole.is_stream(stream_path)
&& let Ok(data) = ole.open_stream(stream_path)
&& !data.is_empty() {
objects.push(EmbeddedObject {
source: stream_path.clone(),
filename: String::new(),
src_path: String::new(),
temp_path: String::new(),
data,
});
}
}
}
}
Ok(objects)
}
fn extract_objects_ooxml(&self) -> Result<Vec<EmbeddedObject>> {
let cursor = Cursor::new(&self.data);
let mut archive = zip::ZipArchive::new(cursor)
.map_err(|e| Error::InvalidOoxml(format!("Invalid ZIP: {e}")))?;
let mut objects = Vec::new();
let ole_entries: Vec<String> = (0..archive.len())
.filter_map(|i| {
archive.by_index(i).ok().and_then(|e| {
let name = e.name().to_string();
let lower = name.to_lowercase();
if lower.contains("embeddings/") && lower.ends_with(".bin") {
Some(name)
} else {
None
}
})
})
.collect();
for entry_name in &ole_entries {
let mut entry_data = Vec::new();
if let Ok(mut entry) = archive.by_name(entry_name) {
entry.read_to_end(&mut entry_data)?;
}
if entry_data.is_empty() {
continue;
}
if OleFile::is_ole(&entry_data) {
let mut ole = OleFile::from_bytes(&entry_data)?;
let streams = ole.list_streams();
for stream_path in &streams {
if stream_path.to_lowercase().contains("ole10native") {
let data = ole.open_stream(stream_path)?;
match OleNativeStream::parse(&data) {
Ok(native) => {
objects.push(EmbeddedObject {
source: format!("{}//{}", entry_name, stream_path),
filename: native.filename.clone(),
src_path: native.src_path.clone(),
temp_path: native.temp_path.clone(),
data: native.data,
});
}
Err(e) => {
log::debug!(
"Failed to parse OleNativeStream from {}/{}: {}",
entry_name,
stream_path,
e
);
}
}
}
}
} else {
objects.push(EmbeddedObject {
source: entry_name.clone(),
filename: String::new(),
src_path: String::new(),
temp_path: String::new(),
data: entry_data,
});
}
}
Ok(objects)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_from_bytes_invalid() {
let result = OleObjExtractor::from_bytes(&[0x00, 0x01, 0x02]);
assert!(result.is_err());
}
#[test]
fn test_from_bytes_empty() {
let result = OleObjExtractor::from_bytes(&[]);
assert!(result.is_err());
}
#[test]
fn test_external_relationships_non_ooxml() {
let mut data = vec![0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1];
data.resize(512, 0);
if let Ok(extractor) = OleObjExtractor::from_bytes(&data) {
let rels = extractor.find_external_relationships().unwrap();
assert!(rels.is_empty());
}
}
#[test]
fn test_custom_ui_non_ooxml() {
let mut data = vec![0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1];
data.resize(512, 0);
if let Ok(extractor) = OleObjExtractor::from_bytes(&data) {
let ui = extractor.find_custom_ui().unwrap();
assert!(ui.is_empty());
}
}
#[test]
fn test_embedded_object_debug() {
let obj = EmbeddedObject {
source: "test".into(),
filename: "test.bin".into(),
src_path: String::new(),
temp_path: String::new(),
data: vec![1, 2, 3],
};
assert!(format!("{:?}", obj).contains("test.bin"));
}
#[test]
fn test_target_mode_filter() {
use crate::ooxml::relationships::TargetMode;
let rel = Relationship {
id: "rId1".into(),
rel_type: "test".into(),
target: "http://evil.com".into(),
target_mode: TargetMode::External,
};
assert_eq!(rel.target_mode, TargetMode::External);
}
}