1use crate::doc::DocumentFile;
4use crate::utils::u64_from_offset;
5use crate::{Ordering, SpecimenFile};
6
7use std::fmt::{Display, Formatter};
8
9use anyhow::{ensure, Context, Result};
10use chrono::{DateTime, Utc};
11use tracing::instrument;
12use uuid::{uuid, Uuid};
13
14const DOCFILE_MAGIC: [u8; 8] = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1];
15
16#[derive(Clone, Debug, Eq)]
20pub struct Clsid {
21 pub le_uuid: Uuid,
23
24 pub be_uuid: Uuid,
26}
27
28impl PartialEq for Clsid {
29 fn eq(&self, other: &Self) -> bool {
30 self.be_uuid == other.be_uuid || self.le_uuid == other.le_uuid
31 }
32}
33
34impl Clsid {
35 pub const EXCEL5: Self = Clsid {
37 le_uuid: uuid!("10080200-0000-0000-c000-000000000046"),
38 be_uuid: uuid!("00020810-0000-0000-c000-000000000046"),
39 };
40
41 pub const EXCEL97: Self = Clsid {
43 le_uuid: uuid!("20080200-0000-0000-c000-000000000046"),
44 be_uuid: uuid!("00020820-0000-0000-c000-000000000046"),
45 };
46
47 pub const WORD6: Self = Clsid {
49 le_uuid: uuid!("00090200-0000-0000-c000-000000000046"),
50 be_uuid: uuid!("00020900-0000-0000-c000-000000000046"),
51 };
52
53 pub const DOC: Self = Clsid {
55 le_uuid: uuid!("06090200-0000-0000-c000-000000000046"),
56 be_uuid: uuid!("00020906-0000-0000-c000-000000000046"),
57 };
58
59 pub const POWERPOINT4: Self = Clsid {
61 le_uuid: uuid!("51480400-0000-0000-c000-000000000046"),
62 be_uuid: uuid!("00044851-0000-0000-c000-000000000046"),
63 };
64
65 pub const POWERPOINT95: Self = Clsid {
67 le_uuid: uuid!("ea7bae70-fb3b-11cd-a903-00aa00510ea3"),
68 be_uuid: uuid!("70ae7bea-3bfb-cd11-a903-00aa00510ea3"),
69 };
70
71 pub const PPT: Self = Clsid {
73 le_uuid: uuid!("108d8164-9b4f-cf11-86ea-00aa00b929e8"),
74 be_uuid: uuid!("64818d10-4f9b-11cf-86ea-00aa00b929e8"),
75 };
76
77 pub const MSI: Self = Clsid {
79 le_uuid: uuid!("000c1084-0000-0000-c000-000000000046"),
80 be_uuid: uuid!("84100c00-0000-0000-c000-000000000046"),
81 };
82
83 pub const MSP: Self = Clsid {
85 le_uuid: uuid!("000c1086-0000-0000-c000-000000000046"),
86 be_uuid: uuid!("86100c00-0000-0000-c000-000000000046"),
87 };
88
89 #[must_use]
91 pub fn equal(&self, bytes: &[u8; 16]) -> bool {
92 self.be_uuid.as_bytes() == bytes || self.le_uuid.as_bytes() == bytes
93 }
94}
95
96#[derive(Clone, Debug, Eq, PartialEq)]
99pub enum ClsidType {
100 Excel,
102
103 PowerPoint,
105
106 Word,
108
109 MSI,
111
112 MSP,
114
115 Unknown([u8; 16]),
117}
118
119impl ClsidType {
120 #[instrument]
122 pub fn from(bytes: &[u8; 16]) -> Self {
123 if Clsid::EXCEL5.equal(bytes) || Clsid::EXCEL97.equal(bytes) {
124 return Self::Excel;
125 }
126
127 if Clsid::WORD6.equal(bytes) || Clsid::DOC.equal(bytes) {
128 return Self::Word;
129 }
130
131 if Clsid::PPT.equal(bytes)
132 || Clsid::POWERPOINT4.equal(bytes)
133 || Clsid::POWERPOINT95.equal(bytes)
134 {
135 return Self::PowerPoint;
136 }
137
138 if Clsid::MSI.equal(bytes) {
139 return Self::MSI;
140 }
141
142 if Clsid::MSP.equal(bytes) {
143 return Self::MSP;
144 }
145
146 Self::Unknown(*bytes)
147 }
148}
149
150impl ClsidType {
151 #[inline]
153 #[must_use]
154 pub fn is_document(&self) -> bool {
155 matches!(
156 self,
157 ClsidType::Excel | ClsidType::PowerPoint | ClsidType::Word
158 )
159 }
160}
161
162impl Display for ClsidType {
163 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
164 match self {
165 ClsidType::Excel => write!(f, "Excel"),
166 ClsidType::PowerPoint => write!(f, "PowerPoint"),
167 ClsidType::Word => write!(f, "Word"),
168 ClsidType::MSI => write!(f, "Installer"),
169 ClsidType::MSP => write!(f, "Windows Patch"),
170 ClsidType::Unknown(uuid) => write!(f, "Unknown/other {}", hex::encode(uuid)),
171 }
172 }
173}
174
175#[derive(Clone, Debug)]
181pub struct Office95<'a> {
182 pub clsid: ClsidType,
184
185 pub creation_time: Option<DateTime<Utc>>,
187
188 pub modification_time: Option<DateTime<Utc>>,
190
191 pub contents: &'a [u8],
193}
194
195impl<'a> Office95<'a> {
196 #[instrument(name = "Office95/Docfile parser", skip(contents))]
198 pub fn from(contents: &'a [u8]) -> Result<Self> {
199 ensure!(contents.starts_with(&DOCFILE_MAGIC), "Not a DOCFILE");
200
201 let offset: [u8; 4] = contents[48..52]
202 .try_into()
203 .context("Failed to get slice for Office95 offset")?;
204 let offset_int = u32::from_le_bytes(offset);
205 let offset_int = (512 * (1 + offset_int) + 80) as usize;
206 let clsid: [u8; 16] = contents[offset_int..offset_int + 16]
207 .try_into()
208 .context("Failed to get slide for Office95 clsid")?;
209
210 let creation_time = u64_from_offset(contents, offset_int + 20, Ordering::LittleEndian);
211 let creation_time = if creation_time > 0 {
212 Some(DateTime::<Utc>::from(nt_time::FileTime::new(creation_time)))
216 } else {
217 None
218 };
219
220 let modification_time = u64_from_offset(contents, offset_int + 28, Ordering::LittleEndian);
221 let modification_time = if modification_time > 0 {
222 Some(DateTime::<Utc>::from(nt_time::FileTime::new(
226 modification_time,
227 )))
228 } else {
229 None
230 };
231
232 let clsid = ClsidType::from(&clsid);
233 ensure!(clsid.is_document(), "{clsid} is not a document type");
234
235 Ok(Self {
236 clsid,
237 creation_time,
238 modification_time,
239 contents,
240 })
241 }
242}
243
244impl DocumentFile for Office95<'_> {
246 fn pages(&self) -> u32 {
247 0
248 }
249
250 fn author(&self) -> Option<String> {
251 None
252 }
253
254 fn title(&self) -> Option<String> {
255 None
256 }
257
258 fn has_javascript(&self) -> bool {
259 false
260 }
261
262 fn has_form(&self) -> bool {
263 false
264 }
265
266 fn creation_time(&self) -> Option<DateTime<Utc>> {
267 self.creation_time
268 }
269
270 fn modification_time(&self) -> Option<DateTime<Utc>> {
271 self.modification_time
272 }
273}
274
275impl SpecimenFile for Office95<'_> {
276 const MAGIC: &'static [&'static [u8]] = &[&DOCFILE_MAGIC];
277
278 fn type_name(&self) -> &'static str {
279 "Office95"
280 }
281}
282
283impl Display for Office95<'_> {
284 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
285 write!(f, "Type: {}", self.clsid)?;
286 if let Some(created) = self.creation_time {
287 write!(f, ", Created: {created}")?;
288 }
289 if let Some(modified) = self.modification_time {
290 write!(f, ", Modified: {modified}")?;
291 }
292 write!(f, ", Size: {}", self.contents.len())
293 }
294}
295
296#[cfg(test)]
297mod tests {
298 use super::*;
299 use rstest::rstest;
300
301 #[rstest]
302 #[case::word(include_bytes!("../../testdata/office95/word.doc"), ClsidType::Word)]
303 #[case::excel(include_bytes!("../../testdata/office95/excel.xls"), ClsidType::Excel)]
304 #[case::powerpoint(include_bytes!("../../testdata/office95/powerpoint.ppt"), ClsidType::PowerPoint)]
305 fn doc(#[case] bytes: &[u8], #[case] expected_clsid: ClsidType) {
306 let office = Office95::from(bytes).unwrap();
307 println!("{office}");
308 assert_eq!(office.clsid, expected_clsid);
309 }
310}