Skip to main content

mbr_forensic/
analyse.rs

1//! Orchestration: the public [`analyse`] entry point and its per-stage checks.
2//!
3//! The analysis is a pipeline of small, independently-debuggable stages. Each
4//! stage receives the parsed data plus a [`Findings`] accumulator and records
5//! any anomalies it discovers. Every anomaly in the crate flows through the
6//! single [`Findings::record`] choke point, which is where tracing, breakpoints,
7//! or post-processing belong.
8
9use std::io::{Read, Seek, SeekFrom};
10
11use crate::boot_code::{self, BootCodeId};
12use crate::diag;
13use crate::ebr::{walk_ebr_chain, EbrChain};
14use crate::entropy;
15use crate::findings::{Anomaly, AnomalyKind, MbrAnalysis, PartitionSummary};
16use crate::gap::{compute_gaps, Gap, GapKind};
17use crate::mbr::{parse_mbr_sector, MbrSector, SECTOR_SIZE};
18use crate::signature::{self, DetectedFs};
19use crate::Error;
20
21// ── Layout constants ──────────────────────────────────────────────────────────
22
23/// Logical sector size in bytes.
24const SECTOR_BYTES: u64 = SECTOR_SIZE as u64;
25/// Byte offset of the partition table within the MBR sector.
26const PARTITION_TABLE_OFFSET: u64 = 446;
27/// Size of one partition table entry, in bytes.
28const ENTRY_SIZE: u64 = 16;
29/// Byte offset of the reserved field (bytes 444–445).
30const RESERVED_OFFSET: u64 = 444;
31/// Byte offset of the NT disk signature (bytes 440–443, little-endian u32).
32const DISK_SERIAL_OFFSET: u64 = 440;
33/// Byte offset of the EBR slack region (entries 2–3) within an EBR sector.
34const EBR_SLACK_OFFSET: u64 = 478;
35/// First partition index assigned to logical partitions from the EBR chain.
36const EBR_INDEX_BASE: usize = 4;
37
38// ── Geometry helpers ──────────────────────────────────────────────────────────
39
40/// Convert an LBA to its byte offset, saturating instead of overflowing.
41#[inline]
42fn lba_to_byte(lba: u64, sector_size: u64) -> u64 {
43    lba.saturating_mul(sector_size)
44}
45
46/// Byte offset of primary partition entry `index` within the MBR sector.
47#[inline]
48fn entry_offset(index: usize) -> u64 {
49    PARTITION_TABLE_OFFSET + index as u64 * ENTRY_SIZE
50}
51
52/// Inclusive last LBA of a disk of `disk_size_bytes`, or [`u64::MAX`] (i.e. "no
53/// bound") when the size is unknown (`0`).
54#[inline]
55fn disk_last_lba(disk_size_bytes: u64, sector_size: u64) -> u64 {
56    if disk_size_bytes > 0 {
57        (disk_size_bytes / sector_size).saturating_sub(1)
58    } else {
59        u64::MAX
60    }
61}
62
63// ── Anomaly accumulator ───────────────────────────────────────────────────────
64
65/// Accumulates anomalies across the analysis. Every anomaly the crate emits is
66/// funnelled through [`Findings::record`], giving one place to trace, set a
67/// breakpoint, or post-process findings.
68#[derive(Default)]
69struct Findings {
70    anomalies: Vec<Anomaly>,
71}
72
73impl Findings {
74    /// Build an anomaly from `kind` + `offset`, emit a trace event, and store it.
75    fn record(&mut self, kind: AnomalyKind, offset: u64) {
76        let anomaly = Anomaly::new(kind, offset);
77        diag::anomaly_recorded(&anomaly);
78        self.anomalies.push(anomaly);
79    }
80}
81
82/// Primary-table scan output threaded into the EBR and gap stages.
83struct PrimaryScan {
84    /// `(lba_start, lba_end)` inclusive extents of every non-empty partition
85    /// (including extended containers) — used for gap analysis.
86    extents: Vec<(u64, u64)>,
87    /// `(id, lba_start, lba_end)` for *data* partitions only — non-extended
88    /// primaries and logicals, used for overlap detection. Extended containers
89    /// are excluded so their logicals are not flagged as overlapping them.
90    overlap_extents: Vec<(usize, u64, u64)>,
91    /// Per-partition forensic summaries.
92    summaries: Vec<PartitionSummary>,
93}
94
95/// Options controlling [`analyse_with_options`].
96#[derive(Debug, Clone, Copy)]
97#[cfg_attr(feature = "serde", derive(serde::Serialize))]
98pub struct AnalyseOptions {
99    /// Logical sector size in bytes. Defaults to 512; set to 4096 for a 4Kn
100    /// (Advanced Format) disk so partition-content offsets, gap sizes, and
101    /// out-of-bounds bounds are computed against the correct geometry.
102    pub sector_size: u64,
103}
104
105impl Default for AnalyseOptions {
106    fn default() -> Self {
107        Self {
108            sector_size: SECTOR_BYTES,
109        }
110    }
111}
112
113// ── Public entry point ────────────────────────────────────────────────────────
114
115/// Perform a full forensic analysis of an MBR-partitioned disk image.
116///
117/// `disk_size_bytes` is used for gap analysis and out-of-bounds detection.
118/// Pass `0` to skip gap analysis.
119///
120/// # Errors
121///
122/// Returns [`Error::Io`] on read failures, [`Error::TooShort`] / [`Error::BadSignature`]
123/// when the MBR sector is invalid.
124#[cfg_attr(feature = "trace", tracing::instrument(level = "debug", skip(reader)))]
125pub fn analyse<R: Read + Seek>(reader: &mut R, disk_size_bytes: u64) -> Result<MbrAnalysis, Error> {
126    analyse_with_options(reader, disk_size_bytes, AnalyseOptions::default())
127}
128
129/// Like [`analyse`], but with explicit [`AnalyseOptions`] — e.g. to force a 4Kn
130/// (4096-byte) logical sector size for an Advanced Format disk.
131///
132/// The MBR boot record is always parsed from byte 0 (it is a 512-byte structure
133/// regardless of sector size); only partition-content offsets, gap sizes, and
134/// out-of-bounds bounds scale with [`AnalyseOptions::sector_size`].
135///
136/// # Errors
137///
138/// Same as [`analyse`].
139#[cfg_attr(feature = "trace", tracing::instrument(level = "debug", skip(reader)))]
140pub fn analyse_with_options<R: Read + Seek>(
141    reader: &mut R,
142    disk_size_bytes: u64,
143    opts: AnalyseOptions,
144) -> Result<MbrAnalysis, Error> {
145    let sector_size = opts.sector_size;
146    let mbr = read_mbr(reader)?;
147    let mut findings = Findings::default();
148
149    let boot_code_id = boot_code::identify(&mbr.boot_code);
150    // Read LBA 1 once: it decides both the GPT cross-validation and whether an
151    // all-zero boot code is benign (genuine GPT disk) or suspicious (legacy).
152    let gpt_header = gpt_header_present(reader, sector_size);
153    let on_gpt_disk = gpt_header && is_pure_protective_mbr(&mbr);
154
155    check_boot_code(&mbr, boot_code_id, on_gpt_disk, &mut findings);
156    check_disk_signature(&mbr, boot_code_id, &mut findings);
157    check_reserved(&mbr, &mut findings);
158    check_bootable_flags(&mbr, &mut findings);
159    check_duplicate_entries(&mbr, &mut findings);
160
161    let last_lba = disk_last_lba(disk_size_bytes, sector_size);
162    check_gpt(&mbr, last_lba, gpt_header, sector_size, &mut findings);
163    let mut scan = scan_primary_entries(
164        reader,
165        &mbr,
166        disk_size_bytes,
167        last_lba,
168        sector_size,
169        &mut findings,
170    );
171
172    let ebr_chain = walk_extended(
173        reader,
174        &mbr,
175        &mut scan,
176        disk_size_bytes,
177        sector_size,
178        &mut findings,
179    );
180    // Overlap detection runs on data partitions (non-extended primaries +
181    // logicals) AFTER the EBR walk, so logical-partition overlaps are caught.
182    // Extended containers are excluded — their logicals living inside them is
183    // expected, not an overlap.
184    check_overlaps(&scan.overlap_extents, &mut findings);
185    let gaps = check_gaps(
186        &scan.extents,
187        disk_size_bytes,
188        last_lba,
189        sector_size,
190        &mut findings,
191    );
192    check_gap_content(reader, &gaps, sector_size, &mut findings);
193
194    // When the disk turns out to be GPT, parse the real GUID Partition Table
195    // automatically via the sibling gpt-forensic crate.
196    #[cfg(feature = "gpt")]
197    let gpt = if gpt_header {
198        gpt_forensic::analyse(reader, disk_size_bytes).ok()
199    } else {
200        None
201    };
202
203    let disk_serial = mbr.disk_serial;
204    let era = crate::provenance::infer_era(first_partition_lba(&mbr), boot_code_id);
205    diag::analysis_complete(
206        findings.anomalies.len(),
207        scan.summaries.len(),
208        gaps.len(),
209        boot_code_id,
210    );
211
212    Ok(MbrAnalysis {
213        mbr,
214        partitions: scan.summaries,
215        ebr_chain,
216        gaps,
217        boot_code_id,
218        disk_serial,
219        era,
220        #[cfg(feature = "gpt")]
221        gpt,
222        anomalies: findings.anomalies,
223    })
224}
225
226/// Lowest start LBA among real (non-empty, non-extended, non-protective) primary
227/// partitions — the geometry signal for era attribution. `None` when there are
228/// none.
229fn first_partition_lba(mbr: &MbrSector) -> Option<u64> {
230    mbr.entries
231        .iter()
232        .filter(|e| {
233            !e.is_empty() && !e.is_extended() && e.type_code.0 != crate::gpt::PROTECTIVE_TYPE_CODE
234        })
235        .map(|e| e.lba_start as u64)
236        .min()
237}
238
239/// `true` when an "EFI PART" GPT header is present at LBA 1. A read failure
240/// (e.g. a sub-1024-byte image) is treated as "absent".
241fn gpt_header_present<R: Read + Seek>(reader: &mut R, sector_size: u64) -> bool {
242    match read_first_sector(reader, sector_size) {
243        Ok(lba1) => crate::gpt::has_gpt_header(&lba1),
244        Err(e) => {
245            diag::partition_read_failed(sector_size, &e);
246            false
247        }
248    }
249}
250
251/// `true` when the MBR is a *pure* GPT protective MBR: exactly one non-empty
252/// entry, of type 0xEE. Hybrid MBRs (extra real entries) are excluded, because
253/// their boot code can still be executed by a legacy BIOS.
254fn is_pure_protective_mbr(mbr: &MbrSector) -> bool {
255    let mut nonempty = mbr.entries.iter().filter(|e| !e.is_empty());
256    matches!(
257        (nonempty.next(), nonempty.next()),
258        (Some(e), None) if e.type_code.0 == crate::gpt::PROTECTIVE_TYPE_CODE
259    )
260}
261
262// ── Stages ────────────────────────────────────────────────────────────────────
263
264/// Seek to the start, read 512 bytes, and parse the MBR sector.
265fn read_mbr<R: Read + Seek>(reader: &mut R) -> Result<MbrSector, Error> {
266    reader.seek(SeekFrom::Start(0))?;
267    let mut raw = [0u8; SECTOR_SIZE];
268    reader.read_exact(&mut raw)?;
269    parse_mbr_sector(&raw)
270}
271
272/// Flag wiped / erased / unrecognised boot code.
273///
274/// All-zero boot code is context-dependent: on a legacy BIOS/MBR-boot disk the
275/// boot code is executed first, so all-zero is suspicious ([`AnomalyKind::WipedBootCode`],
276/// High); on a genuine GPT disk (`on_gpt_disk`) the MBR boot code is never run,
277/// so all-zero is benign ([`AnomalyKind::EmptyProtectiveBootCode`], Info).
278///
279/// Unrecognised boot code is additionally entropy-scanned: near-maximal Shannon
280/// entropy in the 446-byte code area, with no matching loader, is consistent
281/// with a packed or encrypted bootkit payload and raises [`AnomalyKind::HighEntropySlack`].
282fn check_boot_code(mbr: &MbrSector, id: BootCodeId, on_gpt_disk: bool, findings: &mut Findings) {
283    let kind = match id {
284        BootCodeId::AllZeros if on_gpt_disk => Some(AnomalyKind::EmptyProtectiveBootCode),
285        BootCodeId::AllZeros => Some(AnomalyKind::WipedBootCode),
286        BootCodeId::AllOnes => Some(AnomalyKind::ErasedBootCode),
287        BootCodeId::Unknown => Some(AnomalyKind::UnknownBootCode),
288        _ => None,
289    };
290    if let Some(kind) = kind {
291        findings.record(kind, 0);
292    }
293    if id == BootCodeId::Unknown {
294        let entropy = entropy::shannon(&mbr.boot_code);
295        if entropy > entropy::HIGH_ENTROPY_THRESHOLD {
296            findings.record(AnomalyKind::HighEntropySlack { offset: 0, entropy }, 0);
297        }
298    }
299
300    // Documented boot-sector-malware markers — scanned regardless of loader
301    // identity, since a marker can coexist with otherwise-valid-looking code.
302    for name in crate::bootkit::scan(&mbr.boot_code) {
303        findings.record(AnomalyKind::KnownBootkit { name }, 0);
304    }
305}
306
307/// Flag a Windows MBR whose NT disk signature (offset 440) is zero.
308///
309/// Windows always writes a non-zero signature; its absence under a recognised
310/// bootmgr stub is consistent with a wiped or re-created boot record. Non-Windows
311/// MBRs routinely leave it zero, so the check is gated on the boot-code identity
312/// to avoid false positives. Cross-disk collision detection (the cloning signal)
313/// lives in [`crate::disk_signature`].
314fn check_disk_signature(mbr: &MbrSector, id: BootCodeId, findings: &mut Findings) {
315    let is_windows = matches!(id, BootCodeId::WindowsVista | BootCodeId::Windows7Plus);
316    if is_windows && mbr.disk_serial == 0 {
317        findings.record(AnomalyKind::ZeroDiskSignature, DISK_SERIAL_OFFSET);
318    }
319}
320
321/// Minimum hidden tail (in sectors) before an undersized protective MBR is
322/// flagged — avoids false positives from a few-sector rounding difference.
323const PROTECTIVE_UNDERSIZE_TOLERANCE: u64 = 2048;
324
325/// Cross-validate the MBR against any GPT at LBA 1.
326///
327/// `header_present` is whether an "EFI PART" header was found at LBA 1 (read
328/// once by the caller). Reconciles it with the presence/shape of a protective
329/// 0xEE entry, surfacing hybrid MBRs, undersized protective entries, hidden
330/// GPTs, and spoofed protective MBRs — all data-hiding or tampering vectors.
331fn check_gpt(
332    mbr: &MbrSector,
333    last_lba: u64,
334    header_present: bool,
335    sector_size: u64,
336    findings: &mut Findings,
337) {
338    let protective_idx = mbr
339        .entries
340        .iter()
341        .position(|e| !e.is_empty() && e.type_code.0 == crate::gpt::PROTECTIVE_TYPE_CODE);
342
343    let Some(idx) = protective_idx else {
344        // No protective entry. A GPT header with no 0xEE advertising it is hidden.
345        if header_present {
346            findings.record(AnomalyKind::HiddenGpt, lba_to_byte(1, sector_size));
347        }
348        return;
349    };
350
351    let off = entry_offset(idx);
352    if !header_present {
353        findings.record(AnomalyKind::SpoofedProtectiveMbr, off);
354        return;
355    }
356
357    // Genuine protective entry backed by a GPT header. Check for coexisting real
358    // partitions (hybrid) and incomplete disk coverage (undersized).
359    let extra = mbr
360        .entries
361        .iter()
362        .filter(|e| !e.is_empty() && e.type_code.0 != crate::gpt::PROTECTIVE_TYPE_CODE)
363        .count();
364    if extra > 0 {
365        findings.record(
366            AnomalyKind::HybridMbr {
367                extra_partition_count: extra,
368            },
369            off,
370        );
371    }
372
373    let ee = &mbr.entries[idx];
374    let covered_last_lba = ee.lba_end() as u64;
375    let spans_disk = ee.lba_count == u32::MAX; // 0xFFFFFFFF = "rest of disk"
376    if last_lba != u64::MAX
377        && !spans_disk
378        && last_lba.saturating_sub(covered_last_lba) > PROTECTIVE_UNDERSIZE_TOLERANCE
379    {
380        findings.record(
381            AnomalyKind::ProtectiveMbrUndersized {
382                covered_last_lba,
383                disk_last_lba: last_lba,
384            },
385            off,
386        );
387    }
388}
389
390/// Flag pairs of non-empty primary entries that describe the identical extent
391/// (same start LBA and sector count) — a duplicate left by hand-editing or a
392/// faulty imaging tool. Each colliding pair is reported once (lowest indices).
393fn check_duplicate_entries(mbr: &MbrSector, findings: &mut Findings) {
394    let e = &mbr.entries;
395    for a in 0..e.len() {
396        if e[a].is_empty() {
397            continue;
398        }
399        for b in (a + 1)..e.len() {
400            if !e[b].is_empty()
401                && e[a].lba_start == e[b].lba_start
402                && e[a].lba_count == e[b].lba_count
403            {
404                findings.record(
405                    AnomalyKind::DuplicatePartitionEntry { a, b },
406                    entry_offset(a),
407                );
408            }
409        }
410    }
411}
412
413/// Flag a non-zero reserved field (bytes 444–445).
414fn check_reserved(mbr: &MbrSector, findings: &mut Findings) {
415    if mbr.reserved != [0, 0] {
416        findings.record(
417            AnomalyKind::NonZeroReserved {
418                bytes: mbr.reserved,
419            },
420            RESERVED_OFFSET,
421        );
422    }
423}
424
425/// Audit the bootable flags: more than one is invalid; none with active
426/// partitions is noteworthy.
427fn check_bootable_flags(mbr: &MbrSector, findings: &mut Findings) {
428    let bootable = mbr.entries.iter().filter(|e| e.is_bootable()).count();
429    let active = mbr.entries.iter().filter(|e| !e.is_empty()).count();
430    if bootable > 1 {
431        findings.record(
432            AnomalyKind::MultipleBootable { count: bootable },
433            PARTITION_TABLE_OFFSET,
434        );
435    }
436    if active > 0 && bootable == 0 {
437        findings.record(AnomalyKind::NoBootablePartition, PARTITION_TABLE_OFFSET);
438    }
439}
440
441/// Walk the four primary entries, emitting per-entry anomalies and collecting
442/// extents + summaries for the overlap, EBR, and gap stages.
443fn scan_primary_entries<R: Read + Seek>(
444    reader: &mut R,
445    mbr: &MbrSector,
446    disk_size_bytes: u64,
447    last_lba: u64,
448    sector_size: u64,
449    findings: &mut Findings,
450) -> PrimaryScan {
451    let mut extents = Vec::new();
452    let mut overlap_extents = Vec::new();
453    let mut summaries = Vec::new();
454
455    for (i, entry) in mbr.entries.iter().enumerate() {
456        let off = entry_offset(i);
457
458        // Residual entry: type 0x00 but non-zero LBA fields → deleted partition.
459        if entry.type_code.is_empty() && (entry.lba_start != 0 || entry.lba_count != 0) {
460            findings.record(
461                AnomalyKind::ResidualEntry {
462                    index: i,
463                    lba_start: entry.lba_start,
464                    lba_count: entry.lba_count,
465                },
466                off,
467            );
468            continue;
469        }
470        if entry.is_empty() {
471            continue;
472        }
473
474        // Status byte must be 0x00 (inactive) or 0x80 (bootable); anything else
475        // is a spec violation and a manual-edit / tooling artifact.
476        if entry.status != 0x00 && entry.status != 0x80 {
477            findings.record(
478                AnomalyKind::InvalidPartitionStatus {
479                    index: i,
480                    status: entry.status,
481                },
482                off,
483            );
484        }
485
486        check_chs_lba(i, entry, findings);
487
488        let lba_start = entry.lba_start as u64;
489        let lba_end = entry.lba_end() as u64;
490        let byte_offset = lba_to_byte(lba_start, sector_size);
491        let byte_size = lba_to_byte(entry.lba_count as u64, sector_size);
492
493        if disk_size_bytes > 0 && lba_end > last_lba {
494            findings.record(
495                AnomalyKind::OutOfBounds {
496                    index: i,
497                    last_lba: lba_end,
498                    disk_last_lba: last_lba,
499                },
500                off,
501            );
502        }
503
504        extents.push((lba_start, lba_end));
505        // Extended containers are excluded from overlap detection — their
506        // logicals living inside them is expected, not an overlap.
507        if !entry.is_extended() {
508            overlap_extents.push((i, lba_start, lba_end));
509        }
510
511        check_vbr(reader, i, lba_start, byte_offset, disk_size_bytes, findings);
512
513        let detected_fs = detect_and_check_fs(
514            reader,
515            i,
516            byte_offset,
517            entry.type_code,
518            disk_size_bytes,
519            findings,
520        );
521
522        summaries.push(PartitionSummary {
523            index: i,
524            lba_start,
525            lba_end,
526            byte_offset,
527            byte_size,
528            declared_type: entry.type_code,
529            detected_fs,
530        });
531    }
532
533    PrimaryScan {
534        extents,
535        overlap_extents,
536        summaries,
537    }
538}
539
540/// Fingerprint a partition's filesystem and flag a declared-vs-detected
541/// mismatch. Shared by the primary scan and the EBR logical walk so both get
542/// identical scrutiny. Returns the detected filesystem (if any).
543fn detect_and_check_fs<R: Read + Seek>(
544    reader: &mut R,
545    index: usize,
546    byte_offset: u64,
547    declared: crate::partition::TypeCode,
548    disk_size_bytes: u64,
549    findings: &mut Findings,
550) -> Option<DetectedFs> {
551    let detected_fs = detect_partition_fs(reader, byte_offset, disk_size_bytes);
552    if let Some(detected) = detected_fs {
553        if signature::type_conflicts(declared.family(), detected) {
554            findings.record(
555                AnomalyKind::SignatureMismatch {
556                    index,
557                    declared,
558                    detected,
559                },
560                byte_offset,
561            );
562        }
563    }
564    detected_fs
565}
566
567/// Flag a primary entry whose packed CHS first/last addresses contradict their
568/// LBA companions — a hallmark of a hand-edited or tool-crafted partition table.
569///
570/// Uses the de-facto standard LBA-assist geometry; the all-zero "unused"
571/// convention and the CHS overflow marker are both accepted (see
572/// [`crate::partition::chs_consistency`]).
573fn check_chs_lba(index: usize, entry: &crate::partition::PartitionEntry, findings: &mut Findings) {
574    use crate::partition::{
575        chs_consistency, ChsConsistency, STD_HEADS_PER_CYL, STD_SECTORS_PER_TRACK,
576    };
577    let first = chs_consistency(
578        entry.chs_first,
579        entry.lba_start,
580        STD_HEADS_PER_CYL,
581        STD_SECTORS_PER_TRACK,
582    );
583    let last = chs_consistency(
584        entry.chs_last,
585        entry.lba_end(),
586        STD_HEADS_PER_CYL,
587        STD_SECTORS_PER_TRACK,
588    );
589    if first == ChsConsistency::Inconsistent || last == ChsConsistency::Inconsistent {
590        findings.record(
591            AnomalyKind::ChsLbaInconsistency { index },
592            entry_offset(index),
593        );
594    }
595}
596
597/// Parse a partition's VBR and flag a stale BPB hidden-sectors field.
598///
599/// A FAT/NTFS volume records its disk offset in the BPB; when it disagrees with
600/// the partition-table LBA the volume was relocated/copied or the table edited.
601/// Only nonzero mismatches are flagged (zero is the removable-media convention),
602/// and non-FAT/NTFS first sectors are skipped via [`crate::vbr::parse_bpb`].
603fn check_vbr<R: Read + Seek>(
604    reader: &mut R,
605    index: usize,
606    lba_start: u64,
607    byte_offset: u64,
608    disk_size_bytes: u64,
609    findings: &mut Findings,
610) {
611    if disk_size_bytes != 0 && byte_offset >= disk_size_bytes {
612        return;
613    }
614    let Ok(sector) = read_first_sector(reader, byte_offset) else {
615        return;
616    };
617    let Some(bpb) = crate::vbr::parse_bpb(&sector) else {
618        return;
619    };
620    if bpb.hidden_sectors != 0 && u64::from(bpb.hidden_sectors) != lba_start {
621        findings.record(
622            AnomalyKind::VbrHiddenSectorsMismatch {
623                index,
624                bpb_hidden: bpb.hidden_sectors,
625                lba_start,
626            },
627            byte_offset,
628        );
629    }
630}
631
632/// Detect overlapping data-partition extents.
633///
634/// Operates on `(id, lba_start, lba_end)` triples for non-extended primaries and
635/// logicals (extended containers excluded — see [`PrimaryScan::overlap_extents`]),
636/// so overlaps among logicals and between logicals and primaries are caught.
637fn check_overlaps(extents: &[(usize, u64, u64)], findings: &mut Findings) {
638    let mut sorted = extents.to_vec();
639    sorted.sort_by_key(|&(_, start, _)| start);
640    for pair in sorted.windows(2) {
641        let (a_id, _, a_end) = pair[0];
642        let (b_id, b_start, _) = pair[1];
643        if b_start <= a_end {
644            findings.record(
645                AnomalyKind::OverlappingPartitions {
646                    a: a_id,
647                    b: b_id,
648                    a_end,
649                    b_start,
650                },
651                entry_offset(a_id.min(3)),
652            );
653        }
654    }
655}
656
657/// Walk the (single) extended partition's EBR chain, recording chain anomalies
658/// and appending each logical partition's extent + summary to `scan`.
659fn walk_extended<R: Read + Seek>(
660    reader: &mut R,
661    mbr: &MbrSector,
662    scan: &mut PrimaryScan,
663    disk_size_bytes: u64,
664    sector_size: u64,
665    findings: &mut Findings,
666) -> EbrChain {
667    let Some(ext) = mbr.entries.iter().find(|e| e.is_extended()) else {
668        return EbrChain::empty();
669    };
670    let ext_start = ext.lba_start as u64;
671
672    let chain = match walk_ebr_chain(reader, ext_start, sector_size) {
673        Ok(chain) => chain,
674        Err(e) => {
675            diag::ebr_walk_failed(ext_start, &e);
676            return EbrChain::empty();
677        }
678    };
679
680    let ext_offset = lba_to_byte(ext_start, sector_size);
681    if chain.had_cycle {
682        findings.record(AnomalyKind::EbrCycle, ext_offset);
683    }
684    if chain.depth_exceeded {
685        findings.record(
686            AnomalyKind::EbrExcessiveDepth {
687                depth: chain.entries.len(),
688            },
689            ext_offset,
690        );
691    }
692
693    for ebr in &chain.entries {
694        if ebr.has_slack {
695            let entropy = entropy::shannon(&ebr.slack);
696            findings.record(
697                AnomalyKind::EbrSlackData {
698                    ebr_lba: ebr.ebr_lba,
699                    entropy,
700                },
701                ebr.ebr_offset + EBR_SLACK_OFFSET,
702            );
703        }
704
705        let lba_start = ebr.logical_lba_start;
706        let lba_end = lba_start
707            .saturating_add(ebr.logical.lba_count as u64)
708            .saturating_sub(1);
709        let byte_offset = lba_to_byte(lba_start, sector_size);
710        let index = EBR_INDEX_BASE + scan.summaries.len();
711
712        scan.extents.push((lba_start, lba_end));
713        scan.overlap_extents.push((index, lba_start, lba_end));
714
715        // Logical partitions get the same scrutiny as primaries: BPB
716        // hidden-sectors relocation check and FS signature-mismatch detection.
717        check_vbr(
718            reader,
719            index,
720            lba_start,
721            byte_offset,
722            disk_size_bytes,
723            findings,
724        );
725        let detected_fs = detect_and_check_fs(
726            reader,
727            index,
728            byte_offset,
729            ebr.logical.type_code,
730            disk_size_bytes,
731            findings,
732        );
733
734        scan.summaries.push(PartitionSummary {
735            index,
736            lba_start,
737            lba_end,
738            byte_offset,
739            byte_size: lba_to_byte(ebr.logical.lba_count as u64, sector_size),
740            declared_type: ebr.logical.type_code,
741            detected_fs,
742        });
743    }
744
745    chain
746}
747
748/// Compute unpartitioned gaps and record one anomaly per gap.
749/// Returns an empty vec (and records nothing) when `disk_size_bytes == 0`.
750fn check_gaps(
751    extents: &[(u64, u64)],
752    disk_size_bytes: u64,
753    last_lba: u64,
754    sector_size: u64,
755    findings: &mut Findings,
756) -> Vec<Gap> {
757    if disk_size_bytes == 0 {
758        return vec![];
759    }
760    let mut sorted = extents.to_vec();
761    sorted.sort_by_key(|&(start, _)| start);
762    sorted.dedup();
763
764    let gaps = compute_gaps(&sorted, 1, last_lba, sector_size);
765    for gap in &gaps {
766        findings.record(
767            gap_anomaly_kind(gap),
768            lba_to_byte(gap.lba_start, sector_size),
769        );
770    }
771    gaps
772}
773
774/// Number of bytes sampled from the start of each gap to classify its fill.
775const GAP_SAMPLE_BYTES: usize = 4096;
776
777/// Sample the start of each unpartitioned gap and flag any that carry a
778/// deliberate wipe pattern (uniform non-zero, alternating, etc.).
779///
780/// All-zero gaps — ordinary unallocated space — are never flagged. Read
781/// failures (truncated images) are skipped silently; gap *existence* is already
782/// reported by [`check_gaps`].
783fn check_gap_content<R: Read + Seek>(
784    reader: &mut R,
785    gaps: &[Gap],
786    sector_size: u64,
787    findings: &mut Findings,
788) {
789    for gap in gaps {
790        let byte_offset = lba_to_byte(gap.lba_start, sector_size);
791        let sample_len = gap.byte_size.min(GAP_SAMPLE_BYTES as u64) as usize;
792        if sample_len == 0 {
793            continue;
794        }
795        if reader.seek(SeekFrom::Start(byte_offset)).is_err() {
796            continue;
797        }
798        let mut buf = vec![0u8; sample_len];
799        if reader.read_exact(&mut buf).is_err() {
800            continue;
801        }
802        let pattern = crate::wipe::classify(&buf);
803        if pattern.is_deliberate_wipe() {
804            findings.record(
805                AnomalyKind::WipedRegion {
806                    lba_start: gap.lba_start,
807                    pattern,
808                },
809                byte_offset,
810            );
811        }
812
813        // Carve recoverable file headers from the same window — leftover data
814        // from deleted or hidden files in unallocated space.
815        for artifact in crate::carve::carve(&buf, byte_offset) {
816            findings.record(
817                AnomalyKind::CarvedArtifact {
818                    kind: artifact.kind,
819                },
820                artifact.offset,
821            );
822        }
823    }
824}
825
826// ── Pure helpers ──────────────────────────────────────────────────────────────
827
828/// Map a [`Gap`] to its corresponding [`AnomalyKind`].
829fn gap_anomaly_kind(gap: &Gap) -> AnomalyKind {
830    match gap.kind {
831        GapKind::PrePartition => AnomalyKind::PrePartitionSpace {
832            lba_start: gap.lba_start,
833            lba_end: gap.lba_end,
834            byte_size: gap.byte_size,
835        },
836        GapKind::Between => AnomalyKind::InterPartitionGap {
837            lba_start: gap.lba_start,
838            lba_end: gap.lba_end,
839            byte_size: gap.byte_size,
840        },
841        GapKind::PostPartition => AnomalyKind::PostPartitionSpace {
842            lba_start: gap.lba_start,
843            lba_end: gap.lba_end,
844            byte_size: gap.byte_size,
845        },
846    }
847}
848
849/// Bytes read from a partition's start for filesystem fingerprinting. Sized to
850/// reach the Btrfs superblock magic at 64 KiB (the deepest magic we recognise);
851/// shallower magics (NTFS@3, ext@1080, swap@4086) fall within it.
852const FS_FINGERPRINT_BYTES: usize = 65600 + 8;
853
854/// Read and fingerprint a partition's start. Returns `None` when the partition
855/// starts beyond the known disk size, or the read fails.
856///
857/// Reads up to [`FS_FINGERPRINT_BYTES`], tolerating a short read at end-of-disk
858/// — fingerprints are offset-based, so a partial window still matches every
859/// magic that fits within it.
860fn detect_partition_fs<R: Read + Seek>(
861    reader: &mut R,
862    byte_offset: u64,
863    disk_size_bytes: u64,
864) -> Option<DetectedFs> {
865    if disk_size_bytes != 0 && byte_offset >= disk_size_bytes {
866        return None;
867    }
868    match read_fingerprint(reader, byte_offset, FS_FINGERPRINT_BYTES) {
869        Ok(buf) => Some(signature::detect(&buf)),
870        Err(e) => {
871            diag::partition_read_failed(byte_offset, &e);
872            None
873        }
874    }
875}
876
877/// Read up to `max` bytes from `byte_offset`, returning however many were
878/// available (a short read at EOF is not an error).
879fn read_fingerprint<R: Read + Seek>(
880    reader: &mut R,
881    byte_offset: u64,
882    max: usize,
883) -> Result<Vec<u8>, Error> {
884    reader.seek(SeekFrom::Start(byte_offset))?;
885    let mut buf = vec![0u8; max];
886    let mut filled = 0;
887    while filled < max {
888        match reader.read(&mut buf[filled..]) {
889            Ok(0) => break,
890            Ok(n) => filled += n,
891            Err(e) if e.kind() == std::io::ErrorKind::Interrupted => {}
892            Err(e) => return Err(e.into()),
893        }
894    }
895    buf.truncate(filled);
896    Ok(buf)
897}
898
899/// Read a single 512-byte sector at `byte_offset`.
900fn read_first_sector<R: Read + Seek>(
901    reader: &mut R,
902    byte_offset: u64,
903) -> Result<[u8; SECTOR_SIZE], Error> {
904    reader.seek(SeekFrom::Start(byte_offset))?;
905    let mut buf = [0u8; SECTOR_SIZE];
906    reader.read_exact(&mut buf)?;
907    Ok(buf)
908}