use super::consts::*;
use super::file::{OleError, OleFile};
use std::collections::HashMap;
use std::io::{Read, Seek};
use chrono::{DateTime, Duration, Utc};
use zerocopy::{FromBytes, LE, U16, U32, I16, I32};
#[derive(Debug, Default)]
pub struct OleMetadata {
pub codepage: Option<u32>,
pub title: Option<String>,
pub subject: Option<String>,
pub author: Option<String>,
pub keywords: Option<String>,
pub comments: Option<String>,
pub template: Option<String>,
pub last_saved_by: Option<String>,
pub revision_number: Option<String>,
pub edit_time: Option<Duration>,
pub create_time: Option<DateTime<Utc>>,
pub last_printed_time: Option<DateTime<Utc>>,
pub last_saved_time: Option<DateTime<Utc>>,
pub num_pages: Option<u32>,
pub num_words: Option<u32>,
pub num_chars: Option<u32>,
pub creating_application: Option<String>,
pub security: Option<u32>,
pub category: Option<String>,
pub manager: Option<String>,
pub company: Option<String>,
}
#[derive(Debug, Clone)]
pub enum PropertyValue {
I2(i16),
I4(i32),
UI2(u16),
UI4(u32),
Bool(bool),
Lpstr(Vec<u8>), Lpwstr(String), Filetime(u64),
Blob(Vec<u8>),
Empty,
}
impl<R: Read + Seek> OleFile<R> {
pub fn get_metadata(&mut self) -> Result<OleMetadata, OleError> {
let mut metadata = OleMetadata::default();
if let Ok(data) = self.open_stream(&["\u{0005}SummaryInformation"])
&& let Ok(props) = parse_property_stream(&data) {
extract_summary_info(&mut metadata, &props);
}
if let Ok(data) = self.open_stream(&["\u{0005}DocumentSummaryInformation"])
&& let Ok(props) = parse_property_stream(&data) {
extract_document_summary_info(&mut metadata, &props);
}
Ok(metadata)
}
}
#[inline]
fn filetime_to_date(filetime: u64) -> Option<DateTime<Utc>> {
const EPOCH_DIFF: i64 = 116_444_736_000_000_000;
let doc_epoch = i64::try_from(filetime).ok()?;
Some(DateTime::from_timestamp_nanos((doc_epoch - EPOCH_DIFF) * 100))
}
#[inline]
fn filetime_to_duration(filetime: u64) -> Option<Duration> {
let nanos= filetime * 100;
Some(Duration::nanoseconds(i64::try_from(nanos).ok()?))
}
fn parse_property_stream(data: &[u8]) -> Result<HashMap<u32, PropertyValue>, OleError> {
if data.len() < 48 {
return Err(OleError::InvalidFormat(
"Property stream too short".to_string(),
));
}
let section_offset = U32::<LE>::read_from_bytes(&data[44..48])
.map(|v| v.get() as usize)
.unwrap_or(0);
if section_offset + 8 > data.len() {
return Err(OleError::InvalidFormat(
"Invalid section offset".to_string(),
));
}
let num_props = U32::<LE>::read_from_bytes(&data[section_offset + 4..section_offset + 8])
.map(|v| v.get())
.unwrap_or(0);
let num_props = num_props.min(1000);
let mut properties = HashMap::with_capacity(num_props as usize);
for i in 0..num_props {
let prop_offset = section_offset + 8 + (i as usize) * 8;
if prop_offset + 8 > data.len() {
break;
}
let prop_id = U32::<LE>::read_from_bytes(&data[prop_offset..prop_offset + 4])
.map(|v| v.get())
.unwrap_or(0);
let value_offset = section_offset
+ U32::<LE>::read_from_bytes(&data[prop_offset + 4..prop_offset + 8])
.map(|v| v.get() as usize)
.unwrap_or(0);
if value_offset + 4 > data.len() {
continue;
}
let prop_type = U16::<LE>::read_from_bytes(&data[value_offset..value_offset + 2])
.map(|v| v.get())
.unwrap_or(0);
if let Ok(value) = parse_property_value(data, value_offset + 4, prop_type) {
properties.insert(prop_id, value);
}
}
Ok(properties)
}
fn parse_property_value(
data: &[u8],
offset: usize,
prop_type: u16,
) -> Result<PropertyValue, OleError> {
match prop_type {
VT_I2 => {
if offset + 2 > data.len() {
return Err(OleError::InvalidFormat("Buffer overflow".to_string()));
}
let value = I16::<LE>::read_from_bytes(&data[offset..offset + 2])
.map(|v| v.get())
.unwrap_or(0);
Ok(PropertyValue::I2(value))
}
VT_I4 | VT_INT | VT_ERROR => {
if offset + 4 > data.len() {
return Err(OleError::InvalidFormat("Buffer overflow".to_string()));
}
let value = I32::<LE>::read_from_bytes(&data[offset..offset + 4])
.map(|v| v.get())
.unwrap_or(0);
Ok(PropertyValue::I4(value))
}
VT_UI2 => {
if offset + 2 > data.len() {
return Err(OleError::InvalidFormat("Buffer overflow".to_string()));
}
let value = U16::<LE>::read_from_bytes(&data[offset..offset + 2])
.map(|v| v.get())
.unwrap_or(0);
Ok(PropertyValue::UI2(value))
}
VT_UI4 | VT_UINT => {
if offset + 4 > data.len() {
return Err(OleError::InvalidFormat("Buffer overflow".to_string()));
}
let value = U32::<LE>::read_from_bytes(&data[offset..offset + 4])
.map(|v| v.get())
.unwrap_or(0);
Ok(PropertyValue::UI4(value))
}
VT_LPSTR | VT_BSTR => {
if offset + 4 > data.len() {
return Err(OleError::InvalidFormat("Buffer overflow".to_string()));
}
let str_len = U32::<LE>::read_from_bytes(&data[offset..offset + 4])
.map(|v| v.get() as usize)
.unwrap_or(0);
if offset + 4 + str_len > data.len() {
return Err(OleError::InvalidFormat("String overflow".to_string()));
}
let str_bytes = &data[offset + 4..offset + 4 + str_len];
let raw_bytes = str_bytes.to_vec();
Ok(PropertyValue::Lpstr(raw_bytes))
}
VT_LPWSTR => {
if offset + 4 > data.len() {
return Err(OleError::InvalidFormat("Buffer overflow".to_string()));
}
let char_count = U32::<LE>::read_from_bytes(&data[offset..offset + 4])
.map(|v| v.get() as usize)
.unwrap_or(0);
let byte_len = char_count * 2;
if offset + 4 + byte_len > data.len() {
return Err(OleError::InvalidFormat("String overflow".to_string()));
}
let mut utf16_chars = Vec::new();
for i in 0..char_count {
let byte_offset = offset + 4 + i * 2;
let code_unit = U16::<LE>::read_from_bytes(&data[byte_offset..byte_offset + 2])
.map(|v| v.get())
.unwrap_or(0);
if code_unit == 0 {
break;
}
utf16_chars.push(code_unit);
}
let s = String::from_utf16_lossy(&utf16_chars);
Ok(PropertyValue::Lpwstr(s))
}
VT_FILETIME => {
if offset + 8 > data.len() {
return Err(OleError::InvalidFormat("Buffer overflow".to_string()));
}
let low = U32::<LE>::read_from_bytes(&data[offset..offset + 4])
.map(|v| v.get() as u64)
.unwrap_or(0);
let high = U32::<LE>::read_from_bytes(&data[offset + 4..offset + 8])
.map(|v| v.get() as u64)
.unwrap_or(0);
let filetime = low | (high << 32);
Ok(PropertyValue::Filetime(filetime))
}
VT_BOOL => {
if offset + 2 > data.len() {
return Err(OleError::InvalidFormat("Buffer overflow".to_string()));
}
let value = U16::<LE>::read_from_bytes(&data[offset..offset + 2])
.map(|v| v.get())
.unwrap_or(0);
Ok(PropertyValue::Bool(value != 0))
}
VT_BLOB => {
if offset + 4 > data.len() {
return Err(OleError::InvalidFormat("Buffer overflow".to_string()));
}
let blob_len = U32::<LE>::read_from_bytes(&data[offset..offset + 4])
.map(|v| v.get() as usize)
.unwrap_or(0);
if offset + 4 + blob_len > data.len() {
return Err(OleError::InvalidFormat("Blob overflow".to_string()));
}
let blob = data[offset + 4..offset + 4 + blob_len].to_vec();
Ok(PropertyValue::Blob(blob))
}
VT_EMPTY | VT_NULL => Ok(PropertyValue::Empty),
_ => {
Ok(PropertyValue::Empty)
}
}
}
fn extract_summary_info(metadata: &mut OleMetadata, props: &HashMap<u32, PropertyValue>) {
let codepage = if let Some(PropertyValue::I2(v)) = props.get(&1) {
let cp = Some(*v as u32);
metadata.codepage = cp;
cp
} else {
None
};
if let Some(v) = props.get(&2) {
metadata.title = extract_string(v, codepage);
}
if let Some(v) = props.get(&3) {
metadata.subject = extract_string(v, codepage);
}
if let Some(v) = props.get(&4) {
metadata.author = extract_string(v, codepage);
}
if let Some(v) = props.get(&5) {
metadata.keywords = extract_string(v, codepage);
}
if let Some(v) = props.get(&6) {
metadata.comments = extract_string(v, codepage);
}
if let Some(v) = props.get(&7) {
metadata.template = extract_string(v, codepage);
}
if let Some(v) = props.get(&8) {
metadata.last_saved_by = extract_string(v, codepage);
}
if let Some(v) = props.get(&9) {
metadata.revision_number = extract_string(v, codepage);
}
if let Some(PropertyValue::Filetime(v)) = props.get(&10) {
metadata.edit_time = filetime_to_duration(*v);
}
if let Some(PropertyValue::Filetime(v)) = props.get(&11) {
metadata.last_printed_time = filetime_to_date(*v);
}
if let Some(PropertyValue::Filetime(v)) = props.get(&12) {
metadata.create_time = filetime_to_date(*v);
}
if let Some(PropertyValue::Filetime(v)) = props.get(&13) {
metadata.last_saved_time = filetime_to_date(*v);
}
if let Some(PropertyValue::I4(v)) = props.get(&14) {
metadata.num_pages = Some(*v as u32);
}
if let Some(PropertyValue::I4(v)) = props.get(&15) {
metadata.num_words = Some(*v as u32);
}
if let Some(PropertyValue::I4(v)) = props.get(&16) {
metadata.num_chars = Some(*v as u32);
}
if let Some(v) = props.get(&18) {
metadata.creating_application = extract_string(v, codepage);
}
if let Some(PropertyValue::I4(v)) = props.get(&19) {
metadata.security = Some(*v as u32);
}
}
fn extract_document_summary_info(metadata: &mut OleMetadata, props: &HashMap<u32, PropertyValue>) {
let codepage = metadata.codepage;
if let Some(v) = props.get(&2) {
metadata.category = extract_string(v, codepage);
}
if let Some(v) = props.get(&14) {
metadata.manager = extract_string(v, codepage);
}
if let Some(v) = props.get(&15) {
metadata.company = extract_string(v, codepage);
}
}
fn decode_ansi_string(bytes: &[u8], codepage: Option<u32>) -> Option<String> {
let bytes = bytes.iter().take_while(|&&b| b != 0).cloned().collect::<Vec<_>>();
let encoding = match codepage {
Some(cp) => {
match cp {
437 => encoding_rs::IBM866, 874 => encoding_rs::WINDOWS_874, 932 => encoding_rs::SHIFT_JIS, 936 => encoding_rs::GBK, 949 => encoding_rs::EUC_KR, 950 => encoding_rs::BIG5, 1250 => encoding_rs::WINDOWS_1250, 1251 => encoding_rs::WINDOWS_1251, 1252 => encoding_rs::WINDOWS_1252, 1253 => encoding_rs::WINDOWS_1253, 1254 => encoding_rs::WINDOWS_1254, 1255 => encoding_rs::WINDOWS_1255, 1256 => encoding_rs::WINDOWS_1256, 1257 => encoding_rs::WINDOWS_1257, 1258 => encoding_rs::WINDOWS_1258, 10000 => encoding_rs::MACINTOSH, 20932 => encoding_rs::EUC_JP, 28592 => encoding_rs::ISO_8859_2, 28593 => encoding_rs::ISO_8859_3, 28594 => encoding_rs::ISO_8859_4, 28595 => encoding_rs::ISO_8859_5, 28596 => encoding_rs::ISO_8859_6, 28597 => encoding_rs::ISO_8859_7, 28598 => encoding_rs::ISO_8859_8, 28605 => encoding_rs::ISO_8859_15, 54936 => encoding_rs::GB18030, 65001 => encoding_rs::UTF_8,
_ => { return None; },
}
}
None => { return None; },
};
Some(encoding.decode(&bytes).0.into_owned())
}
fn extract_string(value: &PropertyValue, codepage: Option<u32>) -> Option<String> {
match value {
PropertyValue::Lpstr(bytes) => {
if bytes.is_empty() {
None
} else {
decode_ansi_string(bytes, codepage)
}
}
PropertyValue::Lpwstr(s) => {
if s.is_empty() {
None
} else {
Some(s.clone())
}
}
_ => None,
}
}