1use crate::doc::DocumentFile;
4use crate::utils::u64_from_offset;
5use crate::{Ordering, SpecimenFile};
6
7use std::fmt::{Display, Formatter};
8
9use anyhow::{ensure, Context, Result};
10use chrono::{DateTime, Utc};
11use tracing::instrument;
12use uuid::{uuid, Uuid};
13
14const DOCFILE_MAGIC: [u8; 8] = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1];
15
16#[derive(Clone, Debug, Eq)]
20pub struct Clsid {
21 pub le_uuid: Uuid,
23
24 pub be_uuid: Uuid,
26}
27
28impl PartialEq for Clsid {
29 fn eq(&self, other: &Self) -> bool {
30 self.be_uuid == other.be_uuid || self.le_uuid == other.le_uuid
31 }
32}
33
34impl Clsid {
35 pub const EXCEL5: Self = Clsid {
37 le_uuid: uuid!("10080200-0000-0000-c000-000000000046"),
38 be_uuid: uuid!("00020810-0000-0000-c000-000000000046"),
39 };
40
41 pub const EXCEL97: Self = Clsid {
43 le_uuid: uuid!("20080200-0000-0000-c000-000000000046"),
44 be_uuid: uuid!("00020820-0000-0000-c000-000000000046"),
45 };
46
47 pub const WORD6: Self = Clsid {
49 le_uuid: uuid!("00090200-0000-0000-c000-000000000046"),
50 be_uuid: uuid!("00020900-0000-0000-c000-000000000046"),
51 };
52
53 pub const DOC: Self = Clsid {
55 le_uuid: uuid!("06090200-0000-0000-c000-000000000046"),
56 be_uuid: uuid!("00020906-0000-0000-c000-000000000046"),
57 };
58
59 pub const POWERPOINT4: Self = Clsid {
61 le_uuid: uuid!("51480400-0000-0000-c000-000000000046"),
62 be_uuid: uuid!("00044851-0000-0000-c000-000000000046"),
63 };
64
65 pub const POWERPOINT95: Self = Clsid {
67 le_uuid: uuid!("ea7bae70-fb3b-11cd-a903-00aa00510ea3"),
68 be_uuid: uuid!("70ae7bea-3bfb-cd11-a903-00aa00510ea3"),
69 };
70
71 pub const PPT: Self = Clsid {
73 le_uuid: uuid!("108d8164-9b4f-cf11-86ea-00aa00b929e8"),
74 be_uuid: uuid!("64818d10-4f9b-11cf-86ea-00aa00b929e8"),
75 };
76
77 pub const MSI: Self = Clsid {
79 le_uuid: uuid!("000c1084-0000-0000-c000-000000000046"),
80 be_uuid: uuid!("84100c00-0000-0000-c000-000000000046"),
81 };
82
83 pub const MSP: Self = Clsid {
85 le_uuid: uuid!("000c1086-0000-0000-c000-000000000046"),
86 be_uuid: uuid!("86100c00-0000-0000-c000-000000000046"),
87 };
88
89 #[must_use]
91 pub fn equal(&self, bytes: &[u8; 16]) -> bool {
92 self.be_uuid.as_bytes() == bytes || self.le_uuid.as_bytes() == bytes
93 }
94}
95
96#[derive(Clone, Debug, Eq, PartialEq)]
99pub enum ClsidType {
100 Excel,
102
103 PowerPoint,
105
106 Word,
108
109 MSI,
111
112 MSP,
114
115 Unknown([u8; 16]),
117}
118
119impl ClsidType {
120 #[instrument]
122 pub fn from(bytes: &[u8; 16]) -> Self {
123 if Clsid::EXCEL5.equal(bytes) || Clsid::EXCEL97.equal(bytes) {
124 return Self::Excel;
125 }
126
127 if Clsid::WORD6.equal(bytes) || Clsid::DOC.equal(bytes) {
128 return Self::Word;
129 }
130
131 if Clsid::PPT.equal(bytes)
132 || Clsid::POWERPOINT4.equal(bytes)
133 || Clsid::POWERPOINT95.equal(bytes)
134 {
135 return Self::PowerPoint;
136 }
137
138 if Clsid::MSI.equal(bytes) {
139 return Self::MSI;
140 }
141
142 if Clsid::MSP.equal(bytes) {
143 return Self::MSP;
144 }
145
146 Self::Unknown(*bytes)
147 }
148}
149
150impl ClsidType {
151 #[inline]
153 #[must_use]
154 pub fn is_document(&self) -> bool {
155 matches!(
156 self,
157 ClsidType::Excel | ClsidType::PowerPoint | ClsidType::Word
158 )
159 }
160}
161
162impl Display for ClsidType {
163 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
164 match self {
165 ClsidType::Excel => write!(f, "Excel"),
166 ClsidType::PowerPoint => write!(f, "PowerPoint"),
167 ClsidType::Word => write!(f, "Word"),
168 ClsidType::MSI => write!(f, "Installer"),
169 ClsidType::MSP => write!(f, "Windows Patch"),
170 ClsidType::Unknown(uuid) => write!(f, "Unknown/other {}", hex::encode(uuid)),
171 }
172 }
173}
174
175#[derive(Clone, Debug)]
181pub struct Office95<'a> {
182 pub clsid: ClsidType,
184
185 pub creation_time: Option<DateTime<Utc>>,
187
188 pub modification_time: Option<DateTime<Utc>>,
190
191 pub contents: &'a [u8],
193}
194
195impl<'a> Office95<'a> {
196 #[instrument(name = "Office95/Docfile parser", skip(contents))]
202 pub fn from(contents: &'a [u8]) -> Result<Self> {
203 ensure!(contents.starts_with(&DOCFILE_MAGIC), "Not a DOCFILE");
204
205 let offset: [u8; 4] = contents[48..52]
206 .try_into()
207 .context("Failed to get slice for Office95 offset")?;
208 let offset_int = u32::from_le_bytes(offset);
209 let offset_int = (512 * (1 + offset_int) + 80) as usize;
210 let clsid: [u8; 16] = contents[offset_int..offset_int + 16]
211 .try_into()
212 .context("Failed to get slide for Office95 clsid")?;
213
214 let creation_time = if let Some(creation_time) =
215 u64_from_offset(contents, offset_int + 20, Ordering::LittleEndian)
216 {
217 if creation_time > 0 {
218 Some(DateTime::<Utc>::from(nt_time::FileTime::new(creation_time)))
222 } else {
223 None
224 }
225 } else {
226 None
227 };
228
229 let modification_time = if let Some(modification_time) =
230 u64_from_offset(contents, offset_int + 28, Ordering::LittleEndian)
231 {
232 if modification_time > 0 {
233 Some(DateTime::<Utc>::from(nt_time::FileTime::new(
237 modification_time,
238 )))
239 } else {
240 None
241 }
242 } else {
243 None
244 };
245
246 let clsid = ClsidType::from(&clsid);
247 ensure!(
248 clsid.is_document(),
249 "Office95: CLSID `{clsid}` is not a known or supported document type"
250 );
251
252 Ok(Self {
253 clsid,
254 creation_time,
255 modification_time,
256 contents,
257 })
258 }
259}
260
261impl DocumentFile for Office95<'_> {
263 fn pages(&self) -> u32 {
264 0
265 }
266
267 fn author(&self) -> Option<String> {
268 None
269 }
270
271 fn title(&self) -> Option<String> {
272 None
273 }
274
275 fn has_javascript(&self) -> bool {
276 false
277 }
278
279 fn has_form(&self) -> bool {
280 false
281 }
282
283 fn creation_time(&self) -> Option<DateTime<Utc>> {
284 self.creation_time
285 }
286
287 fn modification_time(&self) -> Option<DateTime<Utc>> {
288 self.modification_time
289 }
290}
291
292impl SpecimenFile for Office95<'_> {
293 const MAGIC: &'static [&'static [u8]] = &[&DOCFILE_MAGIC];
294
295 fn type_name(&self) -> &'static str {
296 "Office95"
297 }
298}
299
300impl Display for Office95<'_> {
301 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
302 write!(f, "Type: {}", self.clsid)?;
303 if let Some(created) = self.creation_time {
304 write!(f, ", Created: {created}")?;
305 }
306 if let Some(modified) = self.modification_time {
307 write!(f, ", Modified: {modified}")?;
308 }
309 write!(f, ", Size: {}", self.contents.len())
310 }
311}
312
313#[cfg(test)]
314mod tests {
315 use super::*;
316 use rstest::rstest;
317
318 #[rstest]
319 #[case::word(include_bytes!("../../testdata/office95/word.doc"), ClsidType::Word)]
320 #[case::excel(include_bytes!("../../testdata/office95/excel.xls"), ClsidType::Excel)]
321 #[case::powerpoint(include_bytes!("../../testdata/office95/powerpoint.ppt"), ClsidType::PowerPoint)]
322 fn doc(#[case] bytes: &[u8], #[case] expected_clsid: ClsidType) {
323 let office = Office95::from(bytes).unwrap();
324 println!("{office}");
325 assert_eq!(office.clsid, expected_clsid);
326 }
327}