use crate::doc::DocumentFile;
use crate::utils::u64_from_offset;
use crate::{Ordering, SpecimenFile};
use std::fmt::{Display, Formatter};
use anyhow::{ensure, Context, Result};
use chrono::{DateTime, Utc};
use tracing::instrument;
use uuid::{uuid, Uuid};
const DOCFILE_MAGIC: [u8; 8] = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1];
#[derive(Clone, Debug, Eq)]
pub struct Clsid {
pub le_uuid: Uuid,
pub be_uuid: Uuid,
}
impl PartialEq for Clsid {
fn eq(&self, other: &Self) -> bool {
self.be_uuid == other.be_uuid || self.le_uuid == other.le_uuid
}
}
impl Clsid {
pub const EXCEL5: Self = Clsid {
le_uuid: uuid!("10080200-0000-0000-c000-000000000046"),
be_uuid: uuid!("00020810-0000-0000-c000-000000000046"),
};
pub const EXCEL97: Self = Clsid {
le_uuid: uuid!("20080200-0000-0000-c000-000000000046"),
be_uuid: uuid!("00020820-0000-0000-c000-000000000046"),
};
pub const WORD6: Self = Clsid {
le_uuid: uuid!("00090200-0000-0000-c000-000000000046"),
be_uuid: uuid!("00020900-0000-0000-c000-000000000046"),
};
pub const DOC: Self = Clsid {
le_uuid: uuid!("06090200-0000-0000-c000-000000000046"),
be_uuid: uuid!("00020906-0000-0000-c000-000000000046"),
};
pub const POWERPOINT4: Self = Clsid {
le_uuid: uuid!("51480400-0000-0000-c000-000000000046"),
be_uuid: uuid!("00044851-0000-0000-c000-000000000046"),
};
pub const POWERPOINT95: Self = Clsid {
le_uuid: uuid!("ea7bae70-fb3b-11cd-a903-00aa00510ea3"),
be_uuid: uuid!("70ae7bea-3bfb-cd11-a903-00aa00510ea3"),
};
pub const PPT: Self = Clsid {
le_uuid: uuid!("108d8164-9b4f-cf11-86ea-00aa00b929e8"),
be_uuid: uuid!("64818d10-4f9b-11cf-86ea-00aa00b929e8"),
};
pub const MSI: Self = Clsid {
le_uuid: uuid!("000c1084-0000-0000-c000-000000000046"),
be_uuid: uuid!("84100c00-0000-0000-c000-000000000046"),
};
pub const MSP: Self = Clsid {
le_uuid: uuid!("000c1086-0000-0000-c000-000000000046"),
be_uuid: uuid!("86100c00-0000-0000-c000-000000000046"),
};
#[must_use]
pub fn equal(&self, bytes: &[u8; 16]) -> bool {
self.be_uuid.as_bytes() == bytes || self.le_uuid.as_bytes() == bytes
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ClsidType {
Excel,
PowerPoint,
Word,
MSI,
MSP,
Unknown([u8; 16]),
}
impl ClsidType {
#[instrument]
pub fn from(bytes: &[u8; 16]) -> Self {
if Clsid::EXCEL5.equal(bytes) || Clsid::EXCEL97.equal(bytes) {
return Self::Excel;
}
if Clsid::WORD6.equal(bytes) || Clsid::DOC.equal(bytes) {
return Self::Word;
}
if Clsid::PPT.equal(bytes)
|| Clsid::POWERPOINT4.equal(bytes)
|| Clsid::POWERPOINT95.equal(bytes)
{
return Self::PowerPoint;
}
if Clsid::MSI.equal(bytes) {
return Self::MSI;
}
if Clsid::MSP.equal(bytes) {
return Self::MSP;
}
Self::Unknown(*bytes)
}
}
impl ClsidType {
#[inline]
#[must_use]
pub fn is_document(&self) -> bool {
matches!(
self,
ClsidType::Excel | ClsidType::PowerPoint | ClsidType::Word
)
}
}
impl Display for ClsidType {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
ClsidType::Excel => write!(f, "Excel"),
ClsidType::PowerPoint => write!(f, "PowerPoint"),
ClsidType::Word => write!(f, "Word"),
ClsidType::MSI => write!(f, "Installer"),
ClsidType::MSP => write!(f, "Windows Patch"),
ClsidType::Unknown(uuid) => write!(f, "Unknown/other {}", hex::encode(uuid)),
}
}
}
#[derive(Clone, Debug)]
pub struct Office95<'a> {
pub clsid: ClsidType,
pub creation_time: Option<DateTime<Utc>>,
pub modification_time: Option<DateTime<Utc>>,
pub contents: &'a [u8],
}
impl<'a> Office95<'a> {
#[instrument(name = "Office95/Docfile parser", skip(contents))]
pub fn from(contents: &'a [u8]) -> Result<Self> {
ensure!(contents.starts_with(&DOCFILE_MAGIC), "Not a DOCFILE");
let offset: [u8; 4] = contents[48..52]
.try_into()
.context("Failed to get slice for Office95 offset")?;
let offset_int = u32::from_le_bytes(offset);
let offset_int = (512 * (1 + offset_int) + 80) as usize;
let clsid: [u8; 16] = contents[offset_int..offset_int + 16]
.try_into()
.context("Failed to get slide for Office95 clsid")?;
let creation_time = if let Some(creation_time) =
u64_from_offset(contents, offset_int + 20, Ordering::LittleEndian)
{
if creation_time > 0 {
Some(DateTime::<Utc>::from(nt_time::FileTime::new(creation_time)))
} else {
None
}
} else {
None
};
let modification_time = if let Some(modification_time) =
u64_from_offset(contents, offset_int + 28, Ordering::LittleEndian)
{
if modification_time > 0 {
Some(DateTime::<Utc>::from(nt_time::FileTime::new(
modification_time,
)))
} else {
None
}
} else {
None
};
let clsid = ClsidType::from(&clsid);
ensure!(
clsid.is_document(),
"Office95: CLSID `{clsid}` is not a known or supported document type"
);
Ok(Self {
clsid,
creation_time,
modification_time,
contents,
})
}
}
impl DocumentFile for Office95<'_> {
fn pages(&self) -> u32 {
0
}
fn author(&self) -> Option<String> {
None
}
fn title(&self) -> Option<String> {
None
}
fn has_javascript(&self) -> bool {
false
}
fn has_form(&self) -> bool {
false
}
fn creation_time(&self) -> Option<DateTime<Utc>> {
self.creation_time
}
fn modification_time(&self) -> Option<DateTime<Utc>> {
self.modification_time
}
}
impl SpecimenFile for Office95<'_> {
const MAGIC: &'static [&'static [u8]] = &[&DOCFILE_MAGIC];
fn type_name(&self) -> &'static str {
"Office95"
}
}
impl Display for Office95<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "Type: {}", self.clsid)?;
if let Some(created) = self.creation_time {
write!(f, ", Created: {created}")?;
}
if let Some(modified) = self.modification_time {
write!(f, ", Modified: {modified}")?;
}
write!(f, ", Size: {}", self.contents.len())
}
}
#[cfg(test)]
mod tests {
use super::*;
use rstest::rstest;
#[rstest]
#[case::word(include_bytes!("../../testdata/office95/word.doc"), ClsidType::Word)]
#[case::excel(include_bytes!("../../testdata/office95/excel.xls"), ClsidType::Excel)]
#[case::powerpoint(include_bytes!("../../testdata/office95/powerpoint.ppt"), ClsidType::PowerPoint)]
fn doc(#[case] bytes: &[u8], #[case] expected_clsid: ClsidType) {
let office = Office95::from(bytes).unwrap();
println!("{office}");
assert_eq!(office.clsid, expected_clsid);
}
}