use std::io::{Cursor, Read};
use forensicnomicon::olecf as k;
use forensicnomicon::report::{Category, Finding, Location, Severity, Source, SubjectRef};
pub mod raw;
use raw::{DirEntry, RawCfb};
const MAX_MINI_STREAM: usize = 1 << 24;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Scope {
Whole,
}
impl Scope {
fn label(self) -> &'static str {
match self {
Scope::Whole => "whole file",
}
}
}
#[must_use]
pub fn source(scope: Scope) -> Source {
Source {
analyzer: "cfb-forensic".to_string(),
scope: scope.label().to_string(),
version: Some(env!("CARGO_PKG_VERSION").to_string()),
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct OrphanDetail {
pub sid: u32,
pub name: String,
pub object_type: u8,
pub stream_size: u64,
pub start_sector: u32,
pub create_time: u64,
pub modify_time: u64,
pub carved_len: usize,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum StructureIssue {
StreamNonZeroClsid { sid: u32, name: String },
StreamNonZeroStateBits {
sid: u32,
name: String,
state_bits: u32,
},
StreamNonZeroFiletime { sid: u32, name: String },
ChainLoop { space: &'static str },
DifatOffFile { sid: u32 },
BadByteOrder { value: u16 },
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum OlecfAnomaly {
OrphanedDirEntry(OrphanDetail),
FreeSectorResidue {
sid: u32,
space: &'static str,
offset: u64,
residue_len: usize,
},
SlackResidue {
sid: u32,
name: String,
space: &'static str,
slack_len: usize,
},
StructureAnomaly(StructureIssue),
RootClsid {
sid: u32,
name: String,
clsid: String,
create_time: u64,
modify_time: u64,
},
}
impl OlecfAnomaly {
#[must_use]
pub fn code(&self) -> &'static str {
match self {
OlecfAnomaly::OrphanedDirEntry(_) => "OLECF-ORPHANED-DIR-ENTRY",
OlecfAnomaly::FreeSectorResidue { .. } => "OLECF-FREE-SECTOR-RESIDUE",
OlecfAnomaly::SlackResidue { .. } => "OLECF-SLACK-RESIDUE",
OlecfAnomaly::StructureAnomaly(_) => "OLECF-STRUCTURE-ANOMALY",
OlecfAnomaly::RootClsid { .. } => "OLECF-ROOT-CLSID",
}
}
#[must_use]
pub fn severity(&self) -> Severity {
match self {
OlecfAnomaly::OrphanedDirEntry(_) => Severity::High,
OlecfAnomaly::FreeSectorResidue { .. } => Severity::Medium,
OlecfAnomaly::SlackResidue { slack_len, .. } => {
if *slack_len >= k::MINI_SECTOR_SIZE {
Severity::Medium
} else {
Severity::Low
}
}
OlecfAnomaly::StructureAnomaly(issue) => match issue {
StructureIssue::StreamNonZeroClsid { .. }
| StructureIssue::StreamNonZeroStateBits { .. }
| StructureIssue::StreamNonZeroFiletime { .. }
| StructureIssue::ChainLoop { .. }
| StructureIssue::DifatOffFile { .. } => Severity::High,
StructureIssue::BadByteOrder { .. } => Severity::Medium,
},
OlecfAnomaly::RootClsid { .. } => Severity::Info,
}
}
#[must_use]
pub fn category(&self) -> Category {
match self {
OlecfAnomaly::OrphanedDirEntry(_)
| OlecfAnomaly::FreeSectorResidue { .. }
| OlecfAnomaly::SlackResidue { .. } => Category::Residue,
OlecfAnomaly::StructureAnomaly(_) => Category::Integrity,
OlecfAnomaly::RootClsid { .. } => Category::Provenance,
}
}
#[must_use]
pub fn mitre(&self) -> &'static [&'static str] {
match self {
OlecfAnomaly::OrphanedDirEntry(_) => &["T1070", "T1564"],
OlecfAnomaly::FreeSectorResidue { .. } | OlecfAnomaly::SlackResidue { .. } => {
&["T1564"]
}
OlecfAnomaly::StructureAnomaly(_) => &["T1070", "T1027"],
OlecfAnomaly::RootClsid { .. } => &[],
}
}
#[must_use]
pub fn note(&self) -> String {
match self {
OlecfAnomaly::OrphanedDirEntry(d) => format!(
"Directory entry '{}' (sid {}) is not reachable from the live root tree; \
consistent with a deleted stream whose metadata survived. {} byte(s) carved \
from the resident FAT chain.",
d.name, d.sid, d.carved_len
),
OlecfAnomaly::FreeSectorResidue {
sid,
space,
offset,
residue_len,
} => format!(
"{space} sector {sid} is marked free but holds {residue_len} non-zero byte(s) at \
offset {offset}; consistent with deleted-stream remnant."
),
OlecfAnomaly::SlackResidue {
name,
space,
slack_len,
..
} => format!(
"Stream '{name}' leaves {slack_len} non-zero {space} slack byte(s) past its \
declared size; consistent with residue from a prior, larger allocation."
),
OlecfAnomaly::StructureAnomaly(issue) => issue.note(),
OlecfAnomaly::RootClsid {
name,
clsid,
create_time,
modify_time,
..
} => format!(
"{name} CLSID {clsid}; create FILETIME {create_time}, modify FILETIME {modify_time}."
),
}
}
fn subject(&self) -> Option<SubjectRef> {
let (sid, name) = match self {
OlecfAnomaly::OrphanedDirEntry(d) => (d.sid, d.name.clone()),
OlecfAnomaly::SlackResidue { sid, name, .. }
| OlecfAnomaly::RootClsid { sid, name, .. } => (*sid, name.clone()),
OlecfAnomaly::StructureAnomaly(issue) => return issue.subject(),
OlecfAnomaly::FreeSectorResidue { .. } => return None,
};
Some(SubjectRef {
scheme: "olecf".to_string(),
kind: "directory_entry".to_string(),
id: format!("sid:{sid}"),
label: Some(name),
})
}
#[must_use]
pub fn to_finding(&self, src: Source) -> Finding {
let mut builder = Finding::observation(self.severity(), self.category(), self.code())
.note(self.note())
.source(src);
if let Some(subject) = self.subject() {
builder = builder.subject(subject);
}
for technique in self.mitre() {
builder = builder.mitre(*technique);
}
for (field, value, loc) in self.evidence() {
builder = match loc {
Some(location) => builder.evidence_at(field, value, location),
None => builder.evidence(field, value),
};
}
builder.build()
}
fn evidence(&self) -> Vec<(String, String, Option<Location>)> {
match self {
OlecfAnomaly::OrphanedDirEntry(d) => vec![
("name".into(), d.name.clone(), None),
(
"object_type".into(),
format!("0x{:02x}", d.object_type),
None,
),
(
"stream_size".into(),
d.stream_size.to_string(),
Some(Location::RecordId(u64::from(d.sid))),
),
("start_sector".into(), d.start_sector.to_string(), None),
("carved_len".into(), d.carved_len.to_string(), None),
("create_time".into(), d.create_time.to_string(), None),
("modify_time".into(), d.modify_time.to_string(), None),
],
OlecfAnomaly::FreeSectorResidue {
space,
residue_len,
offset,
..
} => vec![
("space".into(), (*space).to_string(), None),
(
"residue_len".into(),
residue_len.to_string(),
Some(Location::ByteOffset(*offset)),
),
],
OlecfAnomaly::SlackResidue {
space, slack_len, ..
} => vec![
("space".into(), (*space).to_string(), None),
("slack_len".into(), slack_len.to_string(), None),
],
OlecfAnomaly::StructureAnomaly(issue) => issue.evidence(),
OlecfAnomaly::RootClsid {
clsid,
create_time,
modify_time,
..
} => vec![
("clsid".into(), clsid.clone(), None),
("create_time".into(), create_time.to_string(), None),
("modify_time".into(), modify_time.to_string(), None),
],
}
}
}
impl StructureIssue {
fn note(&self) -> String {
match self {
StructureIssue::StreamNonZeroClsid { name, sid } => format!(
"Stream entry '{name}' (sid {sid}) carries a non-zero CLSID; [MS-CFB] §2.6.3 \
requires it zero — consistent with tampering or a non-conformant writer."
),
StructureIssue::StreamNonZeroStateBits {
name,
sid,
state_bits,
} => format!(
"Stream entry '{name}' (sid {sid}) carries non-zero state bits 0x{state_bits:08x}; \
[MS-CFB] §2.6.3 requires them zero — consistent with tampering."
),
StructureIssue::StreamNonZeroFiletime { name, sid } => format!(
"Stream entry '{name}' (sid {sid}) carries a non-zero create/modify FILETIME; \
[MS-CFB] §2.6.3 requires it zero — consistent with tampering or timestomping."
),
StructureIssue::ChainLoop { space } => format!(
"The {space} chain loops back on itself; consistent with structural corruption \
or a crafted file."
),
StructureIssue::DifatOffFile { sid } => format!(
"A DIFAT slot references FAT sector {sid} beyond the end of the file; consistent \
with structural corruption or a crafted file."
),
StructureIssue::BadByteOrder { value } => format!(
"Header byte-order mark is 0x{value:04x}, not the required little-endian 0xFFFE."
),
}
}
fn subject(&self) -> Option<SubjectRef> {
let (sid, name) = match self {
StructureIssue::StreamNonZeroClsid { sid, name }
| StructureIssue::StreamNonZeroStateBits { sid, name, .. }
| StructureIssue::StreamNonZeroFiletime { sid, name } => (*sid, name.clone()),
StructureIssue::ChainLoop { .. }
| StructureIssue::DifatOffFile { .. }
| StructureIssue::BadByteOrder { .. } => return None,
};
Some(SubjectRef {
scheme: "olecf".to_string(),
kind: "directory_entry".to_string(),
id: format!("sid:{sid}"),
label: Some(name),
})
}
fn evidence(&self) -> Vec<(String, String, Option<Location>)> {
match self {
StructureIssue::StreamNonZeroStateBits { state_bits, .. } => {
vec![("state_bits".into(), format!("0x{state_bits:08x}"), None)]
}
StructureIssue::DifatOffFile { sid } => {
vec![("fat_sector".into(), sid.to_string(), None)]
}
StructureIssue::BadByteOrder { value } => {
vec![("byte_order".into(), format!("0x{value:04x}"), None)]
}
_ => Vec::new(),
}
}
}
#[must_use]
pub fn audit_bytes(data: &[u8]) -> Vec<OlecfAnomaly> {
let Some(raw) = raw::decode(data) else {
return Vec::new();
};
let mut anomalies = Vec::new();
if raw.byte_order != k::BYTE_ORDER_LE {
anomalies.push(OlecfAnomaly::StructureAnomaly(
StructureIssue::BadByteOrder {
value: raw.byte_order,
},
));
}
detect_orphans(data, &raw, &mut anomalies);
detect_structure(data, &raw, &mut anomalies);
detect_free_residue(data, &raw, &mut anomalies);
detect_slack(data, &raw, &mut anomalies);
surface_root_clsid(&raw, &mut anomalies);
anomalies
}
#[must_use]
pub fn audit_findings(data: &[u8], scope: Scope) -> Vec<Finding> {
let src = source(scope);
audit_bytes(data)
.into_iter()
.map(|a| a.to_finding(src.clone()))
.collect()
}
fn detect_orphans(data: &[u8], raw: &RawCfb, out: &mut Vec<OlecfAnomaly>) {
let reachable = raw::reachable_sids(&raw.dir_entries);
for (idx, entry) in raw.dir_entries.iter().enumerate() {
if reachable.get(idx).copied().unwrap_or(false) {
continue;
}
if !matches!(entry.object_type, 0x01 | 0x02) {
continue;
}
let carved = carve_stream(data, raw, entry);
out.push(OlecfAnomaly::OrphanedDirEntry(OrphanDetail {
sid: entry.sid,
name: entry.name.clone(),
object_type: entry.object_type,
stream_size: entry.stream_size,
start_sector: entry.start_sector,
create_time: entry.create_time,
modify_time: entry.modify_time,
carved_len: carved.len(),
}));
}
}
fn carve_stream(data: &[u8], raw: &RawCfb, entry: &DirEntry) -> Vec<u8> {
if entry.object_type != 0x02 || entry.stream_size == 0 {
return Vec::new();
}
let size = usize::try_from(entry.stream_size).unwrap_or(usize::MAX);
if entry.stream_size < u64::from(raw.mini_stream_cutoff) {
carve_mini(data, raw, entry.start_sector, size)
} else {
carve_fat(data, raw, entry.start_sector, size)
}
}
fn carve_fat(data: &[u8], raw: &RawCfb, start: u32, size: usize) -> Vec<u8> {
let mut out = Vec::with_capacity(size.min(1 << 20));
let mut sid = start;
let mut visited = vec![false; raw.fat.len()];
while sid <= k::MAXREGSECT && out.len() < size {
if let Some(slot) = visited.get_mut(sid as usize) {
if *slot {
break;
}
*slot = true;
} else {
break;
}
let start_off = (u64::from(sid) + 1).saturating_mul(raw.sector_size as u64);
if let Ok(off) = usize::try_from(start_off) {
if let Some(s) = data.get(off..off.saturating_add(raw.sector_size)) {
out.extend_from_slice(s);
}
}
sid = raw.fat.get(sid as usize).copied().unwrap_or(k::ENDOFCHAIN);
}
out.truncate(size);
out
}
fn carve_mini(data: &[u8], raw: &RawCfb, start: u32, size: usize) -> Vec<u8> {
let Some(root) = raw.dir_entries.first() else {
return Vec::new();
};
let mini_stream = carve_fat(data, raw, root.start_sector, MAX_MINI_STREAM);
let mini_size = 1usize << raw.mini_sector_shift.clamp(1, 16);
let mut out = Vec::with_capacity(size.min(1 << 20));
let mut msid = start;
let mut visited = vec![false; raw.mini_fat.len()];
while msid <= k::MAXREGSECT && out.len() < size {
if let Some(slot) = visited.get_mut(msid as usize) {
if *slot {
break;
}
*slot = true;
} else {
break;
}
let off = (msid as usize).saturating_mul(mini_size);
if let Some(s) = mini_stream.get(off..off.saturating_add(mini_size)) {
out.extend_from_slice(s);
}
msid = raw
.mini_fat
.get(msid as usize)
.copied()
.unwrap_or(k::ENDOFCHAIN);
}
out.truncate(size);
out
}
fn detect_structure(data: &[u8], raw: &RawCfb, out: &mut Vec<OlecfAnomaly>) {
for entry in &raw.dir_entries {
if !entry.is_stream() {
continue;
}
if entry.clsid != [0u8; 16] {
out.push(OlecfAnomaly::StructureAnomaly(
StructureIssue::StreamNonZeroClsid {
sid: entry.sid,
name: entry.name.clone(),
},
));
}
if entry.state_bits != 0 {
out.push(OlecfAnomaly::StructureAnomaly(
StructureIssue::StreamNonZeroStateBits {
sid: entry.sid,
name: entry.name.clone(),
state_bits: entry.state_bits,
},
));
}
if entry.create_time != 0 || entry.modify_time != 0 {
out.push(OlecfAnomaly::StructureAnomaly(
StructureIssue::StreamNonZeroFiletime {
sid: entry.sid,
name: entry.name.clone(),
},
));
}
}
let max_sid = (data.len() / raw.sector_size.max(1)) as u64;
for i in 0..k::DIFAT_HEADER_COUNT {
let off = k::DIFAT_HEADER_OFFSET + i * 4;
let mut b = [0u8; 4];
if let Some(s) = data.get(off..off + 4) {
b.copy_from_slice(s);
}
let sid = u32::from_le_bytes(b);
if sid <= k::MAXREGSECT && u64::from(sid) >= max_sid {
out.push(OlecfAnomaly::StructureAnomaly(
StructureIssue::DifatOffFile { sid },
));
}
}
}
fn detect_free_residue(data: &[u8], raw: &RawCfb, out: &mut Vec<OlecfAnomaly>) {
for (sid, &slot) in raw.fat.iter().enumerate() {
if slot != k::FREESECT {
continue;
}
let sid = sid as u32;
let off = (u64::from(sid) + 1).saturating_mul(raw.sector_size as u64);
let Ok(start) = usize::try_from(off) else {
continue;
};
let Some(sector) = data.get(start..start.saturating_add(raw.sector_size)) else {
continue;
};
let residue = sector.iter().filter(|&&b| b != 0).count();
if residue > 0 {
out.push(OlecfAnomaly::FreeSectorResidue {
sid,
space: "FAT",
offset: off,
residue_len: residue,
});
}
}
let mini_size = 1usize << raw.mini_sector_shift.clamp(1, 16);
if let Some(root) = raw.dir_entries.first() {
let mini_stream = carve_fat(data, raw, root.start_sector, MAX_MINI_STREAM);
for (msid, &slot) in raw.mini_fat.iter().enumerate() {
if slot != k::FREESECT {
continue;
}
let off = msid.saturating_mul(mini_size);
let Some(sector) = mini_stream.get(off..off.saturating_add(mini_size)) else {
continue;
};
let residue = sector.iter().filter(|&&b| b != 0).count();
if residue > 0 {
out.push(OlecfAnomaly::FreeSectorResidue {
sid: msid as u32,
space: "mini-FAT",
offset: off as u64,
residue_len: residue,
});
}
}
}
}
fn detect_slack(data: &[u8], raw: &RawCfb, out: &mut Vec<OlecfAnomaly>) {
let reachable = raw::reachable_sids(&raw.dir_entries);
let mini_size = 1usize << raw.mini_sector_shift.clamp(1, 16);
for (idx, entry) in raw.dir_entries.iter().enumerate() {
if !entry.is_stream() || entry.stream_size == 0 {
continue;
}
if !reachable.get(idx).copied().unwrap_or(false) {
continue; }
let size = usize::try_from(entry.stream_size).unwrap_or(usize::MAX);
let in_mini = entry.stream_size < u64::from(raw.mini_stream_cutoff);
let (unit, space, bytes) = if in_mini {
(
mini_size,
"mini-FAT",
carve_mini(data, raw, entry.start_sector, MAX_MINI_STREAM),
)
} else {
(
raw.sector_size,
"FAT",
carve_fat(data, raw, entry.start_sector, MAX_MINI_STREAM),
)
};
if unit == 0 || size % unit == 0 {
continue; }
let slack_start = size;
let slack_end = bytes.len();
if slack_end > slack_start {
let slack = &bytes[slack_start..slack_end];
let nonzero = slack.iter().filter(|&&b| b != 0).count();
if nonzero > 0 {
out.push(OlecfAnomaly::SlackResidue {
sid: entry.sid,
name: entry.name.clone(),
space,
slack_len: nonzero,
});
}
}
}
}
fn surface_root_clsid(raw: &RawCfb, out: &mut Vec<OlecfAnomaly>) {
if let Some(root) = raw.dir_entries.first() {
out.push(OlecfAnomaly::RootClsid {
sid: root.sid,
name: if root.name.is_empty() {
"Root Entry".to_string()
} else {
root.name.clone()
},
clsid: format_clsid(&root.clsid),
create_time: root.create_time,
modify_time: root.modify_time,
});
}
}
fn format_clsid(b: &[u8; 16]) -> String {
format!(
"{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}",
b[3], b[2], b[1], b[0], b[5], b[4], b[7], b[6], b[8], b[9], b[10], b[11], b[12], b[13], b[14], b[15]
)
}
#[must_use]
pub fn live_entry_names(data: &[u8]) -> Option<Vec<String>> {
let cursor = Cursor::new(data.to_vec());
let comp = cfb::CompoundFile::open(cursor).ok()?;
let mut names = Vec::new();
for entry in comp.walk() {
names.push(entry.name().to_string());
}
Some(names)
}
#[must_use]
pub fn read_live_stream(data: &[u8], path: &str) -> Option<Vec<u8>> {
let cursor = Cursor::new(data.to_vec());
let mut comp = cfb::CompoundFile::open(cursor).ok()?;
let mut stream = comp.open_stream(path).ok()?;
let mut buf = Vec::new();
stream.read_to_end(&mut buf).ok()?;
Some(buf)
}