1use std::io::{Cursor, Read};
39
40use forensicnomicon::olecf as k;
41use forensicnomicon::report::{Category, Finding, Location, Severity, Source, SubjectRef};
42
43pub mod raw;
44
45use raw::{DirEntry, RawCfb};
46
47const MAX_MINI_STREAM: usize = 1 << 24;
50
51#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum Scope {
54 Whole,
56}
57
58impl Scope {
59 fn label(self) -> &'static str {
60 match self {
61 Scope::Whole => "whole file",
62 }
63 }
64}
65
66#[must_use]
68pub fn source(scope: Scope) -> Source {
69 Source {
70 analyzer: "cfb-forensic".to_string(),
71 scope: scope.label().to_string(),
72 version: Some(env!("CARGO_PKG_VERSION").to_string()),
73 }
74}
75
76#[derive(Debug, Clone, PartialEq, Eq)]
78pub struct OrphanDetail {
79 pub sid: u32,
81 pub name: String,
83 pub object_type: u8,
85 pub stream_size: u64,
87 pub start_sector: u32,
89 pub create_time: u64,
91 pub modify_time: u64,
93 pub carved_len: usize,
96}
97
98#[derive(Debug, Clone, PartialEq, Eq)]
100pub enum StructureIssue {
101 StreamNonZeroClsid { sid: u32, name: String },
103 StreamNonZeroStateBits {
105 sid: u32,
106 name: String,
107 state_bits: u32,
108 },
109 StreamNonZeroFiletime { sid: u32, name: String },
112 ChainLoop { space: &'static str },
114 DifatOffFile { sid: u32 },
116 BadByteOrder { value: u16 },
118}
119
120#[derive(Debug, Clone, PartialEq, Eq)]
123pub enum OlecfAnomaly {
124 OrphanedDirEntry(OrphanDetail),
127 FreeSectorResidue {
130 sid: u32,
132 space: &'static str,
134 offset: u64,
136 residue_len: usize,
138 },
139 SlackResidue {
142 sid: u32,
144 name: String,
146 space: &'static str,
148 slack_len: usize,
150 },
151 StructureAnomaly(StructureIssue),
153 RootClsid {
155 sid: u32,
157 name: String,
159 clsid: String,
161 create_time: u64,
163 modify_time: u64,
165 },
166}
167
168impl OlecfAnomaly {
169 #[must_use]
171 pub fn code(&self) -> &'static str {
172 match self {
173 OlecfAnomaly::OrphanedDirEntry(_) => "OLECF-ORPHANED-DIR-ENTRY",
174 OlecfAnomaly::FreeSectorResidue { .. } => "OLECF-FREE-SECTOR-RESIDUE",
175 OlecfAnomaly::SlackResidue { .. } => "OLECF-SLACK-RESIDUE",
176 OlecfAnomaly::StructureAnomaly(_) => "OLECF-STRUCTURE-ANOMALY",
177 OlecfAnomaly::RootClsid { .. } => "OLECF-ROOT-CLSID",
178 }
179 }
180
181 #[must_use]
183 pub fn severity(&self) -> Severity {
184 match self {
185 OlecfAnomaly::OrphanedDirEntry(_) => Severity::High,
186 OlecfAnomaly::FreeSectorResidue { .. } => Severity::Medium,
187 OlecfAnomaly::SlackResidue { slack_len, .. } => {
188 if *slack_len >= k::MINI_SECTOR_SIZE {
189 Severity::Medium
190 } else {
191 Severity::Low
192 }
193 }
194 OlecfAnomaly::StructureAnomaly(issue) => match issue {
195 StructureIssue::StreamNonZeroClsid { .. }
196 | StructureIssue::StreamNonZeroStateBits { .. }
197 | StructureIssue::StreamNonZeroFiletime { .. }
198 | StructureIssue::ChainLoop { .. }
199 | StructureIssue::DifatOffFile { .. } => Severity::High,
200 StructureIssue::BadByteOrder { .. } => Severity::Medium,
201 },
202 OlecfAnomaly::RootClsid { .. } => Severity::Info,
203 }
204 }
205
206 #[must_use]
208 pub fn category(&self) -> Category {
209 match self {
210 OlecfAnomaly::OrphanedDirEntry(_)
211 | OlecfAnomaly::FreeSectorResidue { .. }
212 | OlecfAnomaly::SlackResidue { .. } => Category::Residue,
213 OlecfAnomaly::StructureAnomaly(_) => Category::Integrity,
214 OlecfAnomaly::RootClsid { .. } => Category::Provenance,
215 }
216 }
217
218 #[must_use]
220 pub fn mitre(&self) -> &'static [&'static str] {
221 match self {
222 OlecfAnomaly::OrphanedDirEntry(_) => &["T1070", "T1564"],
223 OlecfAnomaly::FreeSectorResidue { .. } | OlecfAnomaly::SlackResidue { .. } => {
224 &["T1564"]
225 }
226 OlecfAnomaly::StructureAnomaly(_) => &["T1070", "T1027"],
227 OlecfAnomaly::RootClsid { .. } => &[],
228 }
229 }
230
231 #[must_use]
233 pub fn note(&self) -> String {
234 match self {
235 OlecfAnomaly::OrphanedDirEntry(d) => format!(
236 "Directory entry '{}' (sid {}) is not reachable from the live root tree; \
237 consistent with a deleted stream whose metadata survived. {} byte(s) carved \
238 from the resident FAT chain.",
239 d.name, d.sid, d.carved_len
240 ),
241 OlecfAnomaly::FreeSectorResidue {
242 sid,
243 space,
244 offset,
245 residue_len,
246 } => format!(
247 "{space} sector {sid} is marked free but holds {residue_len} non-zero byte(s) at \
248 offset {offset}; consistent with deleted-stream remnant."
249 ),
250 OlecfAnomaly::SlackResidue {
251 name,
252 space,
253 slack_len,
254 ..
255 } => format!(
256 "Stream '{name}' leaves {slack_len} non-zero {space} slack byte(s) past its \
257 declared size; consistent with residue from a prior, larger allocation."
258 ),
259 OlecfAnomaly::StructureAnomaly(issue) => issue.note(),
260 OlecfAnomaly::RootClsid {
261 name,
262 clsid,
263 create_time,
264 modify_time,
265 ..
266 } => format!(
267 "{name} CLSID {clsid}; create FILETIME {create_time}, modify FILETIME {modify_time}."
268 ),
269 }
270 }
271
272 fn subject(&self) -> Option<SubjectRef> {
274 let (sid, name) = match self {
275 OlecfAnomaly::OrphanedDirEntry(d) => (d.sid, d.name.clone()),
276 OlecfAnomaly::SlackResidue { sid, name, .. }
277 | OlecfAnomaly::RootClsid { sid, name, .. } => (*sid, name.clone()),
278 OlecfAnomaly::StructureAnomaly(issue) => return issue.subject(),
279 OlecfAnomaly::FreeSectorResidue { .. } => return None,
280 };
281 Some(SubjectRef {
282 scheme: "olecf".to_string(),
283 kind: "directory_entry".to_string(),
284 id: format!("sid:{sid}"),
285 label: Some(name),
286 })
287 }
288
289 #[must_use]
292 pub fn to_finding(&self, src: Source) -> Finding {
293 let mut builder = Finding::observation(self.severity(), self.category(), self.code())
294 .note(self.note())
295 .source(src);
296
297 if let Some(subject) = self.subject() {
298 builder = builder.subject(subject);
299 }
300 for technique in self.mitre() {
301 builder = builder.mitre(*technique);
302 }
303 for (field, value, loc) in self.evidence() {
304 builder = match loc {
305 Some(location) => builder.evidence_at(field, value, location),
306 None => builder.evidence(field, value),
307 };
308 }
309 builder.build()
310 }
311
312 fn evidence(&self) -> Vec<(String, String, Option<Location>)> {
314 match self {
315 OlecfAnomaly::OrphanedDirEntry(d) => vec![
316 ("name".into(), d.name.clone(), None),
317 (
318 "object_type".into(),
319 format!("0x{:02x}", d.object_type),
320 None,
321 ),
322 (
323 "stream_size".into(),
324 d.stream_size.to_string(),
325 Some(Location::RecordId(u64::from(d.sid))),
326 ),
327 ("start_sector".into(), d.start_sector.to_string(), None),
328 ("carved_len".into(), d.carved_len.to_string(), None),
329 ("create_time".into(), d.create_time.to_string(), None),
330 ("modify_time".into(), d.modify_time.to_string(), None),
331 ],
332 OlecfAnomaly::FreeSectorResidue {
333 space,
334 residue_len,
335 offset,
336 ..
337 } => vec![
338 ("space".into(), (*space).to_string(), None),
339 (
340 "residue_len".into(),
341 residue_len.to_string(),
342 Some(Location::ByteOffset(*offset)),
343 ),
344 ],
345 OlecfAnomaly::SlackResidue {
346 space, slack_len, ..
347 } => vec![
348 ("space".into(), (*space).to_string(), None),
349 ("slack_len".into(), slack_len.to_string(), None),
350 ],
351 OlecfAnomaly::StructureAnomaly(issue) => issue.evidence(),
352 OlecfAnomaly::RootClsid {
353 clsid,
354 create_time,
355 modify_time,
356 ..
357 } => vec![
358 ("clsid".into(), clsid.clone(), None),
359 ("create_time".into(), create_time.to_string(), None),
360 ("modify_time".into(), modify_time.to_string(), None),
361 ],
362 }
363 }
364}
365
366impl StructureIssue {
367 fn note(&self) -> String {
368 match self {
369 StructureIssue::StreamNonZeroClsid { name, sid } => format!(
370 "Stream entry '{name}' (sid {sid}) carries a non-zero CLSID; [MS-CFB] §2.6.3 \
371 requires it zero — consistent with tampering or a non-conformant writer."
372 ),
373 StructureIssue::StreamNonZeroStateBits {
374 name,
375 sid,
376 state_bits,
377 } => format!(
378 "Stream entry '{name}' (sid {sid}) carries non-zero state bits 0x{state_bits:08x}; \
379 [MS-CFB] §2.6.3 requires them zero — consistent with tampering."
380 ),
381 StructureIssue::StreamNonZeroFiletime { name, sid } => format!(
382 "Stream entry '{name}' (sid {sid}) carries a non-zero create/modify FILETIME; \
383 [MS-CFB] §2.6.3 requires it zero — consistent with tampering or timestomping."
384 ),
385 StructureIssue::ChainLoop { space } => format!(
386 "The {space} chain loops back on itself; consistent with structural corruption \
387 or a crafted file."
388 ),
389 StructureIssue::DifatOffFile { sid } => format!(
390 "A DIFAT slot references FAT sector {sid} beyond the end of the file; consistent \
391 with structural corruption or a crafted file."
392 ),
393 StructureIssue::BadByteOrder { value } => format!(
394 "Header byte-order mark is 0x{value:04x}, not the required little-endian 0xFFFE."
395 ),
396 }
397 }
398
399 fn subject(&self) -> Option<SubjectRef> {
400 let (sid, name) = match self {
401 StructureIssue::StreamNonZeroClsid { sid, name }
402 | StructureIssue::StreamNonZeroStateBits { sid, name, .. }
403 | StructureIssue::StreamNonZeroFiletime { sid, name } => (*sid, name.clone()),
404 StructureIssue::ChainLoop { .. }
405 | StructureIssue::DifatOffFile { .. }
406 | StructureIssue::BadByteOrder { .. } => return None,
407 };
408 Some(SubjectRef {
409 scheme: "olecf".to_string(),
410 kind: "directory_entry".to_string(),
411 id: format!("sid:{sid}"),
412 label: Some(name),
413 })
414 }
415
416 fn evidence(&self) -> Vec<(String, String, Option<Location>)> {
417 match self {
418 StructureIssue::StreamNonZeroStateBits { state_bits, .. } => {
419 vec![("state_bits".into(), format!("0x{state_bits:08x}"), None)]
420 }
421 StructureIssue::DifatOffFile { sid } => {
422 vec![("fat_sector".into(), sid.to_string(), None)]
423 }
424 StructureIssue::BadByteOrder { value } => {
425 vec![("byte_order".into(), format!("0x{value:04x}"), None)]
426 }
427 _ => Vec::new(),
428 }
429 }
430}
431
432#[must_use]
436pub fn audit_bytes(data: &[u8]) -> Vec<OlecfAnomaly> {
437 let Some(raw) = raw::decode(data) else {
438 return Vec::new();
439 };
440
441 let mut anomalies = Vec::new();
442
443 if raw.byte_order != k::BYTE_ORDER_LE {
445 anomalies.push(OlecfAnomaly::StructureAnomaly(
446 StructureIssue::BadByteOrder {
447 value: raw.byte_order,
448 },
449 ));
450 }
451
452 detect_orphans(data, &raw, &mut anomalies);
453 detect_structure(data, &raw, &mut anomalies);
454 detect_free_residue(data, &raw, &mut anomalies);
455 detect_slack(data, &raw, &mut anomalies);
456 surface_root_clsid(&raw, &mut anomalies);
457
458 anomalies
459}
460
461#[must_use]
463pub fn audit_findings(data: &[u8], scope: Scope) -> Vec<Finding> {
464 let src = source(scope);
465 audit_bytes(data)
466 .into_iter()
467 .map(|a| a.to_finding(src.clone()))
468 .collect()
469}
470
471fn detect_orphans(data: &[u8], raw: &RawCfb, out: &mut Vec<OlecfAnomaly>) {
475 let reachable = raw::reachable_sids(&raw.dir_entries);
476 for (idx, entry) in raw.dir_entries.iter().enumerate() {
477 if reachable.get(idx).copied().unwrap_or(false) {
478 continue;
479 }
480 if !matches!(entry.object_type, 0x01 | 0x02) {
483 continue;
484 }
485 let carved = carve_stream(data, raw, entry);
486 out.push(OlecfAnomaly::OrphanedDirEntry(OrphanDetail {
487 sid: entry.sid,
488 name: entry.name.clone(),
489 object_type: entry.object_type,
490 stream_size: entry.stream_size,
491 start_sector: entry.start_sector,
492 create_time: entry.create_time,
493 modify_time: entry.modify_time,
494 carved_len: carved.len(),
495 }));
496 }
497}
498
499fn carve_stream(data: &[u8], raw: &RawCfb, entry: &DirEntry) -> Vec<u8> {
504 if entry.object_type != 0x02 || entry.stream_size == 0 {
505 return Vec::new();
506 }
507 let size = usize::try_from(entry.stream_size).unwrap_or(usize::MAX);
508
509 if entry.stream_size < u64::from(raw.mini_stream_cutoff) {
510 carve_mini(data, raw, entry.start_sector, size)
511 } else {
512 carve_fat(data, raw, entry.start_sector, size)
513 }
514}
515
516fn carve_fat(data: &[u8], raw: &RawCfb, start: u32, size: usize) -> Vec<u8> {
518 let mut out = Vec::with_capacity(size.min(1 << 20));
519 let mut sid = start;
520 let mut visited = vec![false; raw.fat.len()];
521 while sid <= k::MAXREGSECT && out.len() < size {
522 if let Some(slot) = visited.get_mut(sid as usize) {
523 if *slot {
524 break;
525 }
526 *slot = true;
527 } else {
528 break;
529 }
530 let start_off = (u64::from(sid) + 1).saturating_mul(raw.sector_size as u64);
531 if let Ok(off) = usize::try_from(start_off) {
532 if let Some(s) = data.get(off..off.saturating_add(raw.sector_size)) {
533 out.extend_from_slice(s);
534 }
535 }
536 sid = raw.fat.get(sid as usize).copied().unwrap_or(k::ENDOFCHAIN);
537 }
538 out.truncate(size);
539 out
540}
541
542fn carve_mini(data: &[u8], raw: &RawCfb, start: u32, size: usize) -> Vec<u8> {
544 let Some(root) = raw.dir_entries.first() else {
546 return Vec::new();
547 };
548 let mini_stream = carve_fat(data, raw, root.start_sector, MAX_MINI_STREAM);
549 let mini_size = 1usize << raw.mini_sector_shift.clamp(1, 16);
550
551 let mut out = Vec::with_capacity(size.min(1 << 20));
552 let mut msid = start;
553 let mut visited = vec![false; raw.mini_fat.len()];
554 while msid <= k::MAXREGSECT && out.len() < size {
555 if let Some(slot) = visited.get_mut(msid as usize) {
556 if *slot {
557 break;
558 }
559 *slot = true;
560 } else {
561 break;
562 }
563 let off = (msid as usize).saturating_mul(mini_size);
564 if let Some(s) = mini_stream.get(off..off.saturating_add(mini_size)) {
565 out.extend_from_slice(s);
566 }
567 msid = raw
568 .mini_fat
569 .get(msid as usize)
570 .copied()
571 .unwrap_or(k::ENDOFCHAIN);
572 }
573 out.truncate(size);
574 out
575}
576
577fn detect_structure(data: &[u8], raw: &RawCfb, out: &mut Vec<OlecfAnomaly>) {
580 for entry in &raw.dir_entries {
581 if !entry.is_stream() {
582 continue;
583 }
584 if entry.clsid != [0u8; 16] {
585 out.push(OlecfAnomaly::StructureAnomaly(
586 StructureIssue::StreamNonZeroClsid {
587 sid: entry.sid,
588 name: entry.name.clone(),
589 },
590 ));
591 }
592 if entry.state_bits != 0 {
593 out.push(OlecfAnomaly::StructureAnomaly(
594 StructureIssue::StreamNonZeroStateBits {
595 sid: entry.sid,
596 name: entry.name.clone(),
597 state_bits: entry.state_bits,
598 },
599 ));
600 }
601 if entry.create_time != 0 || entry.modify_time != 0 {
602 out.push(OlecfAnomaly::StructureAnomaly(
603 StructureIssue::StreamNonZeroFiletime {
604 sid: entry.sid,
605 name: entry.name.clone(),
606 },
607 ));
608 }
609 }
610
611 let max_sid = (data.len() / raw.sector_size.max(1)) as u64;
613 for i in 0..k::DIFAT_HEADER_COUNT {
614 let off = k::DIFAT_HEADER_OFFSET + i * 4;
615 let mut b = [0u8; 4];
616 if let Some(s) = data.get(off..off + 4) {
617 b.copy_from_slice(s);
618 }
619 let sid = u32::from_le_bytes(b);
620 if sid <= k::MAXREGSECT && u64::from(sid) >= max_sid {
621 out.push(OlecfAnomaly::StructureAnomaly(
622 StructureIssue::DifatOffFile { sid },
623 ));
624 }
625 }
626}
627
628fn detect_free_residue(data: &[u8], raw: &RawCfb, out: &mut Vec<OlecfAnomaly>) {
630 for (sid, &slot) in raw.fat.iter().enumerate() {
632 if slot != k::FREESECT {
633 continue;
634 }
635 let sid = sid as u32;
636 let off = (u64::from(sid) + 1).saturating_mul(raw.sector_size as u64);
637 let Ok(start) = usize::try_from(off) else {
638 continue;
639 };
640 let Some(sector) = data.get(start..start.saturating_add(raw.sector_size)) else {
641 continue;
642 };
643 let residue = sector.iter().filter(|&&b| b != 0).count();
644 if residue > 0 {
645 out.push(OlecfAnomaly::FreeSectorResidue {
646 sid,
647 space: "FAT",
648 offset: off,
649 residue_len: residue,
650 });
651 }
652 }
653
654 let mini_size = 1usize << raw.mini_sector_shift.clamp(1, 16);
656 if let Some(root) = raw.dir_entries.first() {
657 let mini_stream = carve_fat(data, raw, root.start_sector, MAX_MINI_STREAM);
658 for (msid, &slot) in raw.mini_fat.iter().enumerate() {
659 if slot != k::FREESECT {
660 continue;
661 }
662 let off = msid.saturating_mul(mini_size);
663 let Some(sector) = mini_stream.get(off..off.saturating_add(mini_size)) else {
664 continue;
665 };
666 let residue = sector.iter().filter(|&&b| b != 0).count();
667 if residue > 0 {
668 out.push(OlecfAnomaly::FreeSectorResidue {
669 sid: msid as u32,
670 space: "mini-FAT",
671 offset: off as u64,
672 residue_len: residue,
673 });
674 }
675 }
676 }
677}
678
679fn detect_slack(data: &[u8], raw: &RawCfb, out: &mut Vec<OlecfAnomaly>) {
681 let reachable = raw::reachable_sids(&raw.dir_entries);
682 let mini_size = 1usize << raw.mini_sector_shift.clamp(1, 16);
683
684 for (idx, entry) in raw.dir_entries.iter().enumerate() {
685 if !entry.is_stream() || entry.stream_size == 0 {
686 continue;
687 }
688 if !reachable.get(idx).copied().unwrap_or(false) {
689 continue; }
691 let size = usize::try_from(entry.stream_size).unwrap_or(usize::MAX);
692 let in_mini = entry.stream_size < u64::from(raw.mini_stream_cutoff);
693 let (unit, space, bytes) = if in_mini {
694 (
695 mini_size,
696 "mini-FAT",
697 carve_mini(data, raw, entry.start_sector, MAX_MINI_STREAM),
698 )
699 } else {
700 (
701 raw.sector_size,
702 "FAT",
703 carve_fat(data, raw, entry.start_sector, MAX_MINI_STREAM),
704 )
705 };
706 if unit == 0 || size % unit == 0 {
707 continue; }
709 let slack_start = size;
710 let slack_end = bytes.len();
711 if slack_end > slack_start {
712 let slack = &bytes[slack_start..slack_end];
713 let nonzero = slack.iter().filter(|&&b| b != 0).count();
714 if nonzero > 0 {
715 out.push(OlecfAnomaly::SlackResidue {
716 sid: entry.sid,
717 name: entry.name.clone(),
718 space,
719 slack_len: nonzero,
720 });
721 }
722 }
723 }
724}
725
726fn surface_root_clsid(raw: &RawCfb, out: &mut Vec<OlecfAnomaly>) {
728 if let Some(root) = raw.dir_entries.first() {
729 out.push(OlecfAnomaly::RootClsid {
730 sid: root.sid,
731 name: if root.name.is_empty() {
732 "Root Entry".to_string()
733 } else {
734 root.name.clone()
735 },
736 clsid: format_clsid(&root.clsid),
737 create_time: root.create_time,
738 modify_time: root.modify_time,
739 });
740 }
741}
742
743fn format_clsid(b: &[u8; 16]) -> String {
746 format!(
747 "{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}",
748 b[3], b[2], b[1], b[0], b[5], b[4], b[7], b[6], b[8], b[9], b[10], b[11], b[12], b[13], b[14], b[15]
749 )
750}
751
752#[must_use]
756pub fn live_entry_names(data: &[u8]) -> Option<Vec<String>> {
757 let cursor = Cursor::new(data.to_vec());
758 let comp = cfb::CompoundFile::open(cursor).ok()?;
759 let mut names = Vec::new();
760 for entry in comp.walk() {
761 names.push(entry.name().to_string());
762 }
763 Some(names)
764}
765
766#[must_use]
769pub fn read_live_stream(data: &[u8], path: &str) -> Option<Vec<u8>> {
770 let cursor = Cursor::new(data.to_vec());
771 let mut comp = cfb::CompoundFile::open(cursor).ok()?;
772 let mut stream = comp.open_stream(path).ok()?;
773 let mut buf = Vec::new();
774 stream.read_to_end(&mut buf).ok()?;
775 Some(buf)
776}