Skip to main content

cfb_forensic/
lib.rs

1//! `cfb-forensic` — forensic carving over OLE Compound File Binary (`[MS-CFB]`)
2//! files.
3//!
4//! Happy-path reading — live storages/streams, clean-file metadata (CLSID,
5//! FILETIMEs, sizes) — is delegated to the mature [`cfb`] crate. This crate adds
6//! the **carving and anomaly layer** `cfb` deliberately hides: the directory
7//! entries, sectors, and slack space a spec-faithful reader skips because they
8//! are no longer part of the live tree.
9//!
10//! ```no_run
11//! let bytes: &[u8] = b"...";
12//! for anomaly in cfb_forensic::audit_bytes(bytes) {
13//!     println!("{} — {}", anomaly.code(), anomaly.note());
14//! }
15//! ```
16//!
17//! All findings are **observations**, hedged "consistent with", never verdicts —
18//! the analyst/tribunal concludes. Format constants come from
19//! [`forensicnomicon::olecf`]; nothing is hardcoded here.
20//!
21//! # Anomaly classes
22//!
23//! - [`OLECF-ORPHANED-DIR-ENTRY`](OlecfAnomaly::OrphanedDirEntry) — a stream/storage
24//!   directory entry that the live red-black tree no longer reaches: deleted-stream
25//!   metadata that survived, with name/size/timestamps/start-sector recovered and
26//!   the stream bytes carved from the still-resident FAT chain.
27//! - [`OLECF-FREE-SECTOR-RESIDUE`](OlecfAnomaly::FreeSectorResidue) — a FAT/mini-FAT
28//!   slot marked free whose backing sector still holds non-zero bytes.
29//! - [`OLECF-SLACK-RESIDUE`](OlecfAnomaly::SlackResidue) — non-zero bytes past a
30//!   stream's declared size in its final (mini-)sector.
31//! - [`OLECF-STRUCTURE-ANOMALY`](OlecfAnomaly::StructureAnomaly) — a red-black /
32//!   sibling-cycle / chain-loop / off-file-DIFAT structural violation, or a stream
33//!   entry whose CLSID / state-bits / FILETIMEs are non-zero (`[MS-CFB]` §2.6.3
34//!   requires them zero) — a tamper tell.
35//! - [`OLECF-ROOT-CLSID`](OlecfAnomaly::RootClsid) — provenance: the root/storage
36//!   CLSID and the create/modify FILETIMEs CFB carries.
37
38use std::io::{Cursor, Read};
39
40use forensicnomicon::olecf as k;
41use forensicnomicon::report::{Category, Finding, Location, Severity, Source, SubjectRef};
42
43pub mod raw;
44
45use raw::{DirEntry, RawCfb};
46
47/// Cap on bytes materialized when resolving the root mini-stream for slack and
48/// mini-FAT residue analysis (16 MiB) — defends against a hostile root size.
49const MAX_MINI_STREAM: usize = 1 << 24;
50
51/// How much of the file the audit covered, surfaced on the [`Source`] scope.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum Scope {
54    /// The whole file was decoded.
55    Whole,
56}
57
58impl Scope {
59    fn label(self) -> &'static str {
60        match self {
61            Scope::Whole => "whole file",
62        }
63    }
64}
65
66/// The producing [`Source`] for a `cfb-forensic` finding.
67#[must_use]
68pub fn source(scope: Scope) -> Source {
69    Source {
70        analyzer: "cfb-forensic".to_string(),
71        scope: scope.label().to_string(),
72        version: Some(env!("CARGO_PKG_VERSION").to_string()),
73    }
74}
75
76/// Recovered detail for an orphaned (live-tree-unreachable) directory entry.
77#[derive(Debug, Clone, PartialEq, Eq)]
78pub struct OrphanDetail {
79    /// The entry's stream id (index in the directory array).
80    pub sid: u32,
81    /// The recovered name (lossy UTF-16LE).
82    pub name: String,
83    /// `0x01` storage / `0x02` stream.
84    pub object_type: u8,
85    /// Declared stream size in bytes.
86    pub stream_size: u64,
87    /// Starting sector id of the (still-resident) stream chain.
88    pub start_sector: u32,
89    /// Creation FILETIME (raw `u64`, `0` if absent).
90    pub create_time: u64,
91    /// Modification FILETIME (raw `u64`, `0` if absent).
92    pub modify_time: u64,
93    /// Number of stream bytes carved from the resident FAT chain (`0` if none
94    /// could be recovered).
95    pub carved_len: usize,
96}
97
98/// Which structural rule a [`OlecfAnomaly::StructureAnomaly`] flags.
99#[derive(Debug, Clone, PartialEq, Eq)]
100pub enum StructureIssue {
101    /// A stream entry carries a non-zero CLSID (`[MS-CFB]` §2.6.3 requires zero).
102    StreamNonZeroClsid { sid: u32, name: String },
103    /// A stream entry carries non-zero state bits (`[MS-CFB]` §2.6.3 requires zero).
104    StreamNonZeroStateBits {
105        sid: u32,
106        name: String,
107        state_bits: u32,
108    },
109    /// A stream entry carries a non-zero create/modify FILETIME
110    /// (`[MS-CFB]` §2.6.3 requires zero).
111    StreamNonZeroFiletime { sid: u32, name: String },
112    /// The directory or a FAT/mini-FAT chain looped back on itself.
113    ChainLoop { space: &'static str },
114    /// A DIFAT slot referenced a FAT sector beyond the end of the file.
115    DifatOffFile { sid: u32 },
116    /// The byte-order mark was not the required little-endian `0xFFFE`.
117    BadByteOrder { value: u16 },
118}
119
120/// A forensic anomaly observed in an OLE Compound File. Each variant maps to a
121/// stable, scheme-prefixed `code` (the published contract).
122#[derive(Debug, Clone, PartialEq, Eq)]
123pub enum OlecfAnomaly {
124    /// `OLECF-ORPHANED-DIR-ENTRY` — a stream/storage entry unreachable from the
125    /// live root tree: deleted-stream metadata that survived.
126    OrphanedDirEntry(OrphanDetail),
127    /// `OLECF-FREE-SECTOR-RESIDUE` — a free FAT/mini-FAT slot whose backing
128    /// sector still holds non-zero bytes.
129    FreeSectorResidue {
130        /// The (mini-)sector id marked free.
131        sid: u32,
132        /// `"FAT"` or `"mini-FAT"`.
133        space: &'static str,
134        /// Byte offset in the file of the residual sector.
135        offset: u64,
136        /// Count of non-zero bytes recovered.
137        residue_len: usize,
138    },
139    /// `OLECF-SLACK-RESIDUE` — non-zero bytes past a stream's declared size in
140    /// its final allocated (mini-)sector.
141    SlackResidue {
142        /// The owning entry's SID.
143        sid: u32,
144        /// The owning entry's name.
145        name: String,
146        /// `"FAT"` or `"mini-FAT"`.
147        space: &'static str,
148        /// Number of non-zero slack bytes.
149        slack_len: usize,
150    },
151    /// `OLECF-STRUCTURE-ANOMALY` — a structural / tamper violation.
152    StructureAnomaly(StructureIssue),
153    /// `OLECF-ROOT-CLSID` — the root/storage CLSID and the FILETIMEs CFB carries.
154    RootClsid {
155        /// The entry's SID (`0` for the root storage).
156        sid: u32,
157        /// The entry's name.
158        name: String,
159        /// CLSID rendered as a canonical upper-case GUID string.
160        clsid: String,
161        /// Creation FILETIME (raw `u64`).
162        create_time: u64,
163        /// Modification FILETIME (raw `u64`).
164        modify_time: u64,
165    },
166}
167
168impl OlecfAnomaly {
169    /// The stable, scheme-prefixed machine code for this anomaly.
170    #[must_use]
171    pub fn code(&self) -> &'static str {
172        match self {
173            OlecfAnomaly::OrphanedDirEntry(_) => "OLECF-ORPHANED-DIR-ENTRY",
174            OlecfAnomaly::FreeSectorResidue { .. } => "OLECF-FREE-SECTOR-RESIDUE",
175            OlecfAnomaly::SlackResidue { .. } => "OLECF-SLACK-RESIDUE",
176            OlecfAnomaly::StructureAnomaly(_) => "OLECF-STRUCTURE-ANOMALY",
177            OlecfAnomaly::RootClsid { .. } => "OLECF-ROOT-CLSID",
178        }
179    }
180
181    /// Severity of this anomaly.
182    #[must_use]
183    pub fn severity(&self) -> Severity {
184        match self {
185            OlecfAnomaly::OrphanedDirEntry(_) => Severity::High,
186            OlecfAnomaly::FreeSectorResidue { .. } => Severity::Medium,
187            OlecfAnomaly::SlackResidue { slack_len, .. } => {
188                if *slack_len >= k::MINI_SECTOR_SIZE {
189                    Severity::Medium
190                } else {
191                    Severity::Low
192                }
193            }
194            OlecfAnomaly::StructureAnomaly(issue) => match issue {
195                StructureIssue::StreamNonZeroClsid { .. }
196                | StructureIssue::StreamNonZeroStateBits { .. }
197                | StructureIssue::StreamNonZeroFiletime { .. }
198                | StructureIssue::ChainLoop { .. }
199                | StructureIssue::DifatOffFile { .. } => Severity::High,
200                StructureIssue::BadByteOrder { .. } => Severity::Medium,
201            },
202            OlecfAnomaly::RootClsid { .. } => Severity::Info,
203        }
204    }
205
206    /// Analytical lens for this anomaly.
207    #[must_use]
208    pub fn category(&self) -> Category {
209        match self {
210            OlecfAnomaly::OrphanedDirEntry(_)
211            | OlecfAnomaly::FreeSectorResidue { .. }
212            | OlecfAnomaly::SlackResidue { .. } => Category::Residue,
213            OlecfAnomaly::StructureAnomaly(_) => Category::Integrity,
214            OlecfAnomaly::RootClsid { .. } => Category::Provenance,
215        }
216    }
217
218    /// MITRE ATT&CK techniques this anomaly is **consistent with** (never a verdict).
219    #[must_use]
220    pub fn mitre(&self) -> &'static [&'static str] {
221        match self {
222            OlecfAnomaly::OrphanedDirEntry(_) => &["T1070", "T1564"],
223            OlecfAnomaly::FreeSectorResidue { .. } | OlecfAnomaly::SlackResidue { .. } => {
224                &["T1564"]
225            }
226            OlecfAnomaly::StructureAnomaly(_) => &["T1070", "T1027"],
227            OlecfAnomaly::RootClsid { .. } => &[],
228        }
229    }
230
231    /// Human-readable, hedged note.
232    #[must_use]
233    pub fn note(&self) -> String {
234        match self {
235            OlecfAnomaly::OrphanedDirEntry(d) => format!(
236                "Directory entry '{}' (sid {}) is not reachable from the live root tree; \
237                 consistent with a deleted stream whose metadata survived. {} byte(s) carved \
238                 from the resident FAT chain.",
239                d.name, d.sid, d.carved_len
240            ),
241            OlecfAnomaly::FreeSectorResidue {
242                sid,
243                space,
244                offset,
245                residue_len,
246            } => format!(
247                "{space} sector {sid} is marked free but holds {residue_len} non-zero byte(s) at \
248                 offset {offset}; consistent with deleted-stream remnant."
249            ),
250            OlecfAnomaly::SlackResidue {
251                name,
252                space,
253                slack_len,
254                ..
255            } => format!(
256                "Stream '{name}' leaves {slack_len} non-zero {space} slack byte(s) past its \
257                 declared size; consistent with residue from a prior, larger allocation."
258            ),
259            OlecfAnomaly::StructureAnomaly(issue) => issue.note(),
260            OlecfAnomaly::RootClsid {
261                name,
262                clsid,
263                create_time,
264                modify_time,
265                ..
266            } => format!(
267                "{name} CLSID {clsid}; create FILETIME {create_time}, modify FILETIME {modify_time}."
268            ),
269        }
270    }
271
272    /// Build the subject reference, if this anomaly is about a named object.
273    fn subject(&self) -> Option<SubjectRef> {
274        let (sid, name) = match self {
275            OlecfAnomaly::OrphanedDirEntry(d) => (d.sid, d.name.clone()),
276            OlecfAnomaly::SlackResidue { sid, name, .. }
277            | OlecfAnomaly::RootClsid { sid, name, .. } => (*sid, name.clone()),
278            OlecfAnomaly::StructureAnomaly(issue) => return issue.subject(),
279            OlecfAnomaly::FreeSectorResidue { .. } => return None,
280        };
281        Some(SubjectRef {
282            scheme: "olecf".to_string(),
283            kind: "directory_entry".to_string(),
284            id: format!("sid:{sid}"),
285            label: Some(name),
286        })
287    }
288
289    /// Convert to a canonical [`Finding`]. Dynamic codes carry runtime detail, so
290    /// this uses the [`forensicnomicon::report`] builder directly.
291    #[must_use]
292    pub fn to_finding(&self, src: Source) -> Finding {
293        let mut builder = Finding::observation(self.severity(), self.category(), self.code())
294            .note(self.note())
295            .source(src);
296
297        if let Some(subject) = self.subject() {
298            builder = builder.subject(subject);
299        }
300        for technique in self.mitre() {
301            builder = builder.mitre(*technique);
302        }
303        for (field, value, loc) in self.evidence() {
304            builder = match loc {
305                Some(location) => builder.evidence_at(field, value, location),
306                None => builder.evidence(field, value),
307            };
308        }
309        builder.build()
310    }
311
312    /// Evidence rows for this anomaly.
313    fn evidence(&self) -> Vec<(String, String, Option<Location>)> {
314        match self {
315            OlecfAnomaly::OrphanedDirEntry(d) => vec![
316                ("name".into(), d.name.clone(), None),
317                (
318                    "object_type".into(),
319                    format!("0x{:02x}", d.object_type),
320                    None,
321                ),
322                (
323                    "stream_size".into(),
324                    d.stream_size.to_string(),
325                    Some(Location::RecordId(u64::from(d.sid))),
326                ),
327                ("start_sector".into(), d.start_sector.to_string(), None),
328                ("carved_len".into(), d.carved_len.to_string(), None),
329                ("create_time".into(), d.create_time.to_string(), None),
330                ("modify_time".into(), d.modify_time.to_string(), None),
331            ],
332            OlecfAnomaly::FreeSectorResidue {
333                space,
334                residue_len,
335                offset,
336                ..
337            } => vec![
338                ("space".into(), (*space).to_string(), None),
339                (
340                    "residue_len".into(),
341                    residue_len.to_string(),
342                    Some(Location::ByteOffset(*offset)),
343                ),
344            ],
345            OlecfAnomaly::SlackResidue {
346                space, slack_len, ..
347            } => vec![
348                ("space".into(), (*space).to_string(), None),
349                ("slack_len".into(), slack_len.to_string(), None),
350            ],
351            OlecfAnomaly::StructureAnomaly(issue) => issue.evidence(),
352            OlecfAnomaly::RootClsid {
353                clsid,
354                create_time,
355                modify_time,
356                ..
357            } => vec![
358                ("clsid".into(), clsid.clone(), None),
359                ("create_time".into(), create_time.to_string(), None),
360                ("modify_time".into(), modify_time.to_string(), None),
361            ],
362        }
363    }
364}
365
366impl StructureIssue {
367    fn note(&self) -> String {
368        match self {
369            StructureIssue::StreamNonZeroClsid { name, sid } => format!(
370                "Stream entry '{name}' (sid {sid}) carries a non-zero CLSID; [MS-CFB] §2.6.3 \
371                 requires it zero — consistent with tampering or a non-conformant writer."
372            ),
373            StructureIssue::StreamNonZeroStateBits {
374                name,
375                sid,
376                state_bits,
377            } => format!(
378                "Stream entry '{name}' (sid {sid}) carries non-zero state bits 0x{state_bits:08x}; \
379                 [MS-CFB] §2.6.3 requires them zero — consistent with tampering."
380            ),
381            StructureIssue::StreamNonZeroFiletime { name, sid } => format!(
382                "Stream entry '{name}' (sid {sid}) carries a non-zero create/modify FILETIME; \
383                 [MS-CFB] §2.6.3 requires it zero — consistent with tampering or timestomping."
384            ),
385            StructureIssue::ChainLoop { space } => format!(
386                "The {space} chain loops back on itself; consistent with structural corruption \
387                 or a crafted file."
388            ),
389            StructureIssue::DifatOffFile { sid } => format!(
390                "A DIFAT slot references FAT sector {sid} beyond the end of the file; consistent \
391                 with structural corruption or a crafted file."
392            ),
393            StructureIssue::BadByteOrder { value } => format!(
394                "Header byte-order mark is 0x{value:04x}, not the required little-endian 0xFFFE."
395            ),
396        }
397    }
398
399    fn subject(&self) -> Option<SubjectRef> {
400        let (sid, name) = match self {
401            StructureIssue::StreamNonZeroClsid { sid, name }
402            | StructureIssue::StreamNonZeroStateBits { sid, name, .. }
403            | StructureIssue::StreamNonZeroFiletime { sid, name } => (*sid, name.clone()),
404            StructureIssue::ChainLoop { .. }
405            | StructureIssue::DifatOffFile { .. }
406            | StructureIssue::BadByteOrder { .. } => return None,
407        };
408        Some(SubjectRef {
409            scheme: "olecf".to_string(),
410            kind: "directory_entry".to_string(),
411            id: format!("sid:{sid}"),
412            label: Some(name),
413        })
414    }
415
416    fn evidence(&self) -> Vec<(String, String, Option<Location>)> {
417        match self {
418            StructureIssue::StreamNonZeroStateBits { state_bits, .. } => {
419                vec![("state_bits".into(), format!("0x{state_bits:08x}"), None)]
420            }
421            StructureIssue::DifatOffFile { sid } => {
422                vec![("fat_sector".into(), sid.to_string(), None)]
423            }
424            StructureIssue::BadByteOrder { value } => {
425                vec![("byte_order".into(), format!("0x{value:04x}"), None)]
426            }
427            _ => Vec::new(),
428        }
429    }
430}
431
432/// Audit a compound file's bytes, returning every anomaly observed. Never panics
433/// on malformed or hostile input; a buffer that is not a CFB yields an empty
434/// list.
435#[must_use]
436pub fn audit_bytes(data: &[u8]) -> Vec<OlecfAnomaly> {
437    let Some(raw) = raw::decode(data) else {
438        return Vec::new();
439    };
440
441    let mut anomalies = Vec::new();
442
443    // Header sanity (structure).
444    if raw.byte_order != k::BYTE_ORDER_LE {
445        anomalies.push(OlecfAnomaly::StructureAnomaly(
446            StructureIssue::BadByteOrder {
447                value: raw.byte_order,
448            },
449        ));
450    }
451
452    detect_orphans(data, &raw, &mut anomalies);
453    detect_structure(data, &raw, &mut anomalies);
454    detect_free_residue(data, &raw, &mut anomalies);
455    detect_slack(data, &raw, &mut anomalies);
456    surface_root_clsid(&raw, &mut anomalies);
457
458    anomalies
459}
460
461/// Audit and return canonical [`Finding`]s, tagged with the producing [`Source`].
462#[must_use]
463pub fn audit_findings(data: &[u8], scope: Scope) -> Vec<Finding> {
464    let src = source(scope);
465    audit_bytes(data)
466        .into_iter()
467        .map(|a| a.to_finding(src.clone()))
468        .collect()
469}
470
471/// The headline carving pass: every allocated stream/storage entry not reachable
472/// from the live root tree is an orphan; recover its metadata and carve its
473/// resident stream bytes.
474fn detect_orphans(data: &[u8], raw: &RawCfb, out: &mut Vec<OlecfAnomaly>) {
475    let reachable = raw::reachable_sids(&raw.dir_entries);
476    for (idx, entry) in raw.dir_entries.iter().enumerate() {
477        if reachable.get(idx).copied().unwrap_or(false) {
478            continue;
479        }
480        // Only allocated stream/storage entries are forensically meaningful
481        // orphans; an unallocated 0x00 slot is just empty directory space.
482        if !matches!(entry.object_type, 0x01 | 0x02) {
483            continue;
484        }
485        let carved = carve_stream(data, raw, entry);
486        out.push(OlecfAnomaly::OrphanedDirEntry(OrphanDetail {
487            sid: entry.sid,
488            name: entry.name.clone(),
489            object_type: entry.object_type,
490            stream_size: entry.stream_size,
491            start_sector: entry.start_sector,
492            create_time: entry.create_time,
493            modify_time: entry.modify_time,
494            carved_len: carved.len(),
495        }));
496    }
497}
498
499/// Carve a stream's bytes by following its still-resident FAT (or mini-FAT)
500/// chain. Returns the recovered bytes truncated to the declared size; loop- and
501/// length-guarded. Streams below the mini-stream cutoff live in the mini-FAT,
502/// which we resolve through the root entry's mini-stream.
503fn carve_stream(data: &[u8], raw: &RawCfb, entry: &DirEntry) -> Vec<u8> {
504    if entry.object_type != 0x02 || entry.stream_size == 0 {
505        return Vec::new();
506    }
507    let size = usize::try_from(entry.stream_size).unwrap_or(usize::MAX);
508
509    if entry.stream_size < u64::from(raw.mini_stream_cutoff) {
510        carve_mini(data, raw, entry.start_sector, size)
511    } else {
512        carve_fat(data, raw, entry.start_sector, size)
513    }
514}
515
516/// Carve from the regular FAT chain.
517fn carve_fat(data: &[u8], raw: &RawCfb, start: u32, size: usize) -> Vec<u8> {
518    let mut out = Vec::with_capacity(size.min(1 << 20));
519    let mut sid = start;
520    let mut visited = vec![false; raw.fat.len()];
521    while sid <= k::MAXREGSECT && out.len() < size {
522        if let Some(slot) = visited.get_mut(sid as usize) {
523            if *slot {
524                break;
525            }
526            *slot = true;
527        } else {
528            break;
529        }
530        let start_off = (u64::from(sid) + 1).saturating_mul(raw.sector_size as u64);
531        if let Ok(off) = usize::try_from(start_off) {
532            if let Some(s) = data.get(off..off.saturating_add(raw.sector_size)) {
533                out.extend_from_slice(s);
534            }
535        }
536        sid = raw.fat.get(sid as usize).copied().unwrap_or(k::ENDOFCHAIN);
537    }
538    out.truncate(size);
539    out
540}
541
542/// Carve from the mini-FAT chain via the root entry's mini-stream.
543fn carve_mini(data: &[u8], raw: &RawCfb, start: u32, size: usize) -> Vec<u8> {
544    // The mini-stream is the root entry's own (regular-FAT) stream.
545    let Some(root) = raw.dir_entries.first() else {
546        return Vec::new();
547    };
548    let mini_stream = carve_fat(data, raw, root.start_sector, MAX_MINI_STREAM);
549    let mini_size = 1usize << raw.mini_sector_shift.clamp(1, 16);
550
551    let mut out = Vec::with_capacity(size.min(1 << 20));
552    let mut msid = start;
553    let mut visited = vec![false; raw.mini_fat.len()];
554    while msid <= k::MAXREGSECT && out.len() < size {
555        if let Some(slot) = visited.get_mut(msid as usize) {
556            if *slot {
557                break;
558            }
559            *slot = true;
560        } else {
561            break;
562        }
563        let off = (msid as usize).saturating_mul(mini_size);
564        if let Some(s) = mini_stream.get(off..off.saturating_add(mini_size)) {
565            out.extend_from_slice(s);
566        }
567        msid = raw
568            .mini_fat
569            .get(msid as usize)
570            .copied()
571            .unwrap_or(k::ENDOFCHAIN);
572    }
573    out.truncate(size);
574    out
575}
576
577/// Detect the `[MS-CFB]` §2.6.3 "must be zero on a stream" tamper tells and the
578/// off-file DIFAT structural violation.
579fn detect_structure(data: &[u8], raw: &RawCfb, out: &mut Vec<OlecfAnomaly>) {
580    for entry in &raw.dir_entries {
581        if !entry.is_stream() {
582            continue;
583        }
584        if entry.clsid != [0u8; 16] {
585            out.push(OlecfAnomaly::StructureAnomaly(
586                StructureIssue::StreamNonZeroClsid {
587                    sid: entry.sid,
588                    name: entry.name.clone(),
589                },
590            ));
591        }
592        if entry.state_bits != 0 {
593            out.push(OlecfAnomaly::StructureAnomaly(
594                StructureIssue::StreamNonZeroStateBits {
595                    sid: entry.sid,
596                    name: entry.name.clone(),
597                    state_bits: entry.state_bits,
598                },
599            ));
600        }
601        if entry.create_time != 0 || entry.modify_time != 0 {
602            out.push(OlecfAnomaly::StructureAnomaly(
603                StructureIssue::StreamNonZeroFiletime {
604                    sid: entry.sid,
605                    name: entry.name.clone(),
606                },
607            ));
608        }
609    }
610
611    // A DIFAT FAT-sector pointer beyond the file is an off-file reference.
612    let max_sid = (data.len() / raw.sector_size.max(1)) as u64;
613    for i in 0..k::DIFAT_HEADER_COUNT {
614        let off = k::DIFAT_HEADER_OFFSET + i * 4;
615        let mut b = [0u8; 4];
616        if let Some(s) = data.get(off..off + 4) {
617            b.copy_from_slice(s);
618        }
619        let sid = u32::from_le_bytes(b);
620        if sid <= k::MAXREGSECT && u64::from(sid) >= max_sid {
621            out.push(OlecfAnomaly::StructureAnomaly(
622                StructureIssue::DifatOffFile { sid },
623            ));
624        }
625    }
626}
627
628/// Detect free FAT/mini-FAT slots whose backing sector still holds non-zero bytes.
629fn detect_free_residue(data: &[u8], raw: &RawCfb, out: &mut Vec<OlecfAnomaly>) {
630    // Regular FAT: a FREESECT slot at index `sid` ⇒ sector (sid+1)<<sector_shift.
631    for (sid, &slot) in raw.fat.iter().enumerate() {
632        if slot != k::FREESECT {
633            continue;
634        }
635        let sid = sid as u32;
636        let off = (u64::from(sid) + 1).saturating_mul(raw.sector_size as u64);
637        let Ok(start) = usize::try_from(off) else {
638            continue;
639        };
640        let Some(sector) = data.get(start..start.saturating_add(raw.sector_size)) else {
641            continue;
642        };
643        let residue = sector.iter().filter(|&&b| b != 0).count();
644        if residue > 0 {
645            out.push(OlecfAnomaly::FreeSectorResidue {
646                sid,
647                space: "FAT",
648                offset: off,
649                residue_len: residue,
650            });
651        }
652    }
653
654    // Mini-FAT: free mini-sectors with non-zero residue inside the mini-stream.
655    let mini_size = 1usize << raw.mini_sector_shift.clamp(1, 16);
656    if let Some(root) = raw.dir_entries.first() {
657        let mini_stream = carve_fat(data, raw, root.start_sector, MAX_MINI_STREAM);
658        for (msid, &slot) in raw.mini_fat.iter().enumerate() {
659            if slot != k::FREESECT {
660                continue;
661            }
662            let off = msid.saturating_mul(mini_size);
663            let Some(sector) = mini_stream.get(off..off.saturating_add(mini_size)) else {
664                continue;
665            };
666            let residue = sector.iter().filter(|&&b| b != 0).count();
667            if residue > 0 {
668                out.push(OlecfAnomaly::FreeSectorResidue {
669                    sid: msid as u32,
670                    space: "mini-FAT",
671                    offset: off as u64,
672                    residue_len: residue,
673                });
674            }
675        }
676    }
677}
678
679/// Detect non-zero slack past a live stream's declared size in its final sector.
680fn detect_slack(data: &[u8], raw: &RawCfb, out: &mut Vec<OlecfAnomaly>) {
681    let reachable = raw::reachable_sids(&raw.dir_entries);
682    let mini_size = 1usize << raw.mini_sector_shift.clamp(1, 16);
683
684    for (idx, entry) in raw.dir_entries.iter().enumerate() {
685        if !entry.is_stream() || entry.stream_size == 0 {
686            continue;
687        }
688        if !reachable.get(idx).copied().unwrap_or(false) {
689            continue; // orphans carve their own bytes; slack is for live streams
690        }
691        let size = usize::try_from(entry.stream_size).unwrap_or(usize::MAX);
692        let in_mini = entry.stream_size < u64::from(raw.mini_stream_cutoff);
693        let (unit, space, bytes) = if in_mini {
694            (
695                mini_size,
696                "mini-FAT",
697                carve_mini(data, raw, entry.start_sector, MAX_MINI_STREAM),
698            )
699        } else {
700            (
701                raw.sector_size,
702                "FAT",
703                carve_fat(data, raw, entry.start_sector, MAX_MINI_STREAM),
704            )
705        };
706        if unit == 0 || size % unit == 0 {
707            continue; // exact multiple ⇒ no slack region
708        }
709        let slack_start = size;
710        let slack_end = bytes.len();
711        if slack_end > slack_start {
712            let slack = &bytes[slack_start..slack_end];
713            let nonzero = slack.iter().filter(|&&b| b != 0).count();
714            if nonzero > 0 {
715                out.push(OlecfAnomaly::SlackResidue {
716                    sid: entry.sid,
717                    name: entry.name.clone(),
718                    space,
719                    slack_len: nonzero,
720                });
721            }
722        }
723    }
724}
725
726/// Surface the root storage's CLSID and FILETIMEs as a provenance breadcrumb.
727fn surface_root_clsid(raw: &RawCfb, out: &mut Vec<OlecfAnomaly>) {
728    if let Some(root) = raw.dir_entries.first() {
729        out.push(OlecfAnomaly::RootClsid {
730            sid: root.sid,
731            name: if root.name.is_empty() {
732                "Root Entry".to_string()
733            } else {
734                root.name.clone()
735            },
736            clsid: format_clsid(&root.clsid),
737            create_time: root.create_time,
738            modify_time: root.modify_time,
739        });
740    }
741}
742
743/// Render a 16-byte CLSID as a canonical upper-case GUID string. The first three
744/// groups are little-endian; the last two are big-endian (`[MS-DTYP]` GUID).
745fn format_clsid(b: &[u8; 16]) -> String {
746    format!(
747        "{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}",
748        b[3], b[2], b[1], b[0], b[5], b[4], b[7], b[6], b[8], b[9], b[10], b[11], b[12], b[13], b[14], b[15]
749    )
750}
751
752/// Cross-check helper: the set of live stream/storage names the `cfb` crate
753/// reaches, used by tests/consumers as a sanity oracle against our orphan set.
754/// Returns `None` if `cfb` cannot open the bytes at all.
755#[must_use]
756pub fn live_entry_names(data: &[u8]) -> Option<Vec<String>> {
757    let cursor = Cursor::new(data.to_vec());
758    let comp = cfb::CompoundFile::open(cursor).ok()?;
759    let mut names = Vec::new();
760    for entry in comp.walk() {
761        names.push(entry.name().to_string());
762    }
763    Some(names)
764}
765
766/// Read a live stream's bytes via the `cfb` crate (happy-path extraction), for
767/// consumers that want clean-file stream content rather than carved residue.
768#[must_use]
769pub fn read_live_stream(data: &[u8], path: &str) -> Option<Vec<u8>> {
770    let cursor = Cursor::new(data.to_vec());
771    let mut comp = cfb::CompoundFile::open(cursor).ok()?;
772    let mut stream = comp.open_stream(path).ok()?;
773    let mut buf = Vec::new();
774    stream.read_to_end(&mut buf).ok()?;
775    Some(buf)
776}