Skip to main content

vmdk_forensic/
lib.rs

1//! Forensic integrity analysis for VMware VMDK images.
2//!
3//! `vmdk` is a lean `Read + Seek` reader. `vmdk-forensic` is the evidence-grade layer
4//! on top of it (the same split as `vhdx`/`vhdx-forensic` and `ewf`/`ewf-forensic`):
5//! it reparses the raw structure — so it works on images too damaged for some readers —
6//! and reports the redundant-grain-directory, dangling-pointer, recovery, and header
7//! provenance findings that `qemu-img` and `libvmdk` discard.
8//!
9//! `analyse()` returns canonical [`forensicnomicon::report::Finding`]s, so VMDK
10//! findings aggregate alongside every other `SecurityRonin` analyzer.
11//!
12//! ```no_run
13//! use vmdk_forensic::VmdkIntegrity;
14//! use forensicnomicon::report::Severity;
15//! let mut a = VmdkIntegrity::new(std::fs::File::open("disk.vmdk")?);
16//! for finding in a.analyse()? {
17//!     if finding.severity >= Some(Severity::Medium) {
18//!         println!("[{:?}] {} — {}", finding.severity, finding.code, finding.note);
19//!     }
20//! }
21//! # Ok::<(), std::io::Error>(())
22//! ```
23
24use std::io::{self, Read, Seek, SeekFrom};
25
26use forensicnomicon::report::{Category, Finding, Severity};
27
28use vmdk::header::{self, SparseExtentHeader};
29use vmdk::sesparse::{self, SeConstHeader};
30
31/// The lean reader, re-exported so this one crate covers read + forensic analysis.
32pub use vmdk::VmdkReader;
33
34const SECTOR_SIZE: u64 = 512;
35/// Cap on the grain-directory size read from a (possibly crafted) header (16 MiB).
36const MAX_GD_BYTES: u64 = 16 * 1024 * 1024;
37
38/// Result of a structural integrity walk ([`VmdkIntegrity::check_integrity`]).
39///
40/// Counts grain-directory / grain-table pointers that fall outside the backing file —
41/// the signature of a truncated or tampered image. [`is_ok`](Self::is_ok) is the verdict.
42#[derive(Debug, Clone, Default, PartialEq, Eq)]
43#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
44pub struct IntegrityReport {
45    /// Number of allocated grains examined.
46    pub grains_checked: u64,
47    /// Allocated grains whose data offset lies beyond end-of-file.
48    pub out_of_bounds_grains: u64,
49    /// Grain-directory entries whose grain table lies beyond end-of-file.
50    pub out_of_bounds_grain_tables: u64,
51}
52
53impl IntegrityReport {
54    /// `true` when no out-of-bounds pointer was found.
55    #[must_use]
56    pub fn is_ok(&self) -> bool {
57        self.out_of_bounds_grains == 0 && self.out_of_bounds_grain_tables == 0
58    }
59}
60
61/// Per-entry recovery analysis of the grain directory against its redundant copy.
62///
63/// VMDK keeps a redundant grain directory (RGD) so a damaged primary GD can still be
64/// recovered; `qemu-img` discards it. `primary_intact + primary_damaged == total_entries`
65/// and `recoverable_via_rgd + unrecoverable == primary_damaged`.
66#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
67#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
68pub struct GdRecoveryReport {
69    /// `true` when the image carries a usable redundant grain directory.
70    pub has_rgd: bool,
71    /// Number of grain-directory entries analysed.
72    pub total_entries: usize,
73    /// Primary entries usable as-is (in-bounds, or sparse and agreeing with the RGD).
74    pub primary_intact: usize,
75    /// Primary entries that are damaged (out-of-bounds, or sparse where the RGD holds a pointer).
76    pub primary_damaged: usize,
77    /// Damaged primary entries the RGD can recover.
78    pub recoverable_via_rgd: usize,
79    /// Damaged primary entries the RGD cannot recover (damaged in both directories).
80    pub unrecoverable: usize,
81}
82
83/// Provenance read from the 512-byte sparse header — fields other readers discard.
84#[derive(Debug, Clone, Copy, PartialEq, Eq)]
85#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
86#[allow(clippy::struct_excessive_bools)] // independent provenance flags, not a state enum
87pub struct HeaderProvenance {
88    /// Header format version (1, 2, or 3).
89    pub version: u32,
90    /// `uncleanShutdown` (byte 72) — the disk was not closed cleanly (crash / power-loss
91    /// / live image): in-flight writes may be inconsistent.
92    pub unclean_shutdown: bool,
93    /// Newline-detection bytes (73..77) are exactly `0A 20 0D 0A`. `false` ⇒ the binary
94    /// was mangled by an ASCII-mode FTP transfer (which rewrites `\r\n`).
95    pub newline_check_intact: bool,
96    /// Flag bit `0x2` — a redundant (secondary) grain directory is present.
97    pub uses_redundant_gd: bool,
98    /// Flag bit `0x10000` — grains carry compressed data.
99    pub compressed: bool,
100    /// Flag bit `0x20000` — the stream carries metadata markers (streamOptimized).
101    pub has_markers: bool,
102}
103
104/// The kind of a forensic finding. Each variant carries the data its canonical
105/// [`Finding`] needs; severity, code, note, category and MITRE refs are derived
106/// from it via the [`Observation`](forensicnomicon::report::Observation) impl
107/// below, so detection sites never spell out presentation logic.
108#[derive(Debug, Clone, Copy, PartialEq, Eq)]
109#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
110pub enum AnomalyKind {
111    /// `uncleanShutdown` flag set.
112    UncleanShutdown,
113    /// Header newline-detection bytes mangled (ASCII-mode FTP transfer).
114    FtpAsciiMangled,
115    /// Redundant grain directory diverges from the primary (grain-table contents differ).
116    RedundantGdMismatch,
117    /// The primary grain directory is damaged but (partly) recoverable via the RGD.
118    PrimaryGdRecoverableViaRgd {
119        /// Damaged primary GD entries.
120        damaged: usize,
121        /// Total GD entries.
122        total: usize,
123        /// Entries recoverable from the redundant copy.
124        recoverable: usize,
125    },
126    /// The primary grain directory is damaged with no RGD recovery available.
127    PrimaryGdUnrecoverable {
128        /// Damaged entries with no redundant copy.
129        unrecoverable: usize,
130    },
131    /// One or more grain-table pointers fall beyond end-of-file.
132    DanglingGrainTable {
133        /// Out-of-bounds grain-table pointers.
134        count: u64,
135    },
136    /// One or more grain pointers fall beyond end-of-file.
137    DanglingGrain {
138        /// Out-of-bounds grain pointers.
139        count: u64,
140    },
141}
142
143impl forensicnomicon::report::Observation for AnomalyKind {
144    fn severity(&self) -> Option<Severity> {
145        use AnomalyKind::{
146            DanglingGrain, DanglingGrainTable, FtpAsciiMangled, PrimaryGdRecoverableViaRgd,
147            PrimaryGdUnrecoverable, RedundantGdMismatch, UncleanShutdown,
148        };
149        Some(match self {
150            UncleanShutdown => Severity::Low,
151            PrimaryGdRecoverableViaRgd { .. } => Severity::Medium,
152            FtpAsciiMangled
153            | RedundantGdMismatch
154            | PrimaryGdUnrecoverable { .. }
155            | DanglingGrainTable { .. }
156            | DanglingGrain { .. } => Severity::High,
157        })
158    }
159
160    fn category(&self) -> Category {
161        match self {
162            AnomalyKind::DanglingGrainTable { .. } | AnomalyKind::DanglingGrain { .. } => {
163                Category::Structure
164            }
165            _ => Category::Integrity,
166        }
167    }
168
169    fn code(&self) -> &'static str {
170        use AnomalyKind::{
171            DanglingGrain, DanglingGrainTable, FtpAsciiMangled, PrimaryGdRecoverableViaRgd,
172            PrimaryGdUnrecoverable, RedundantGdMismatch, UncleanShutdown,
173        };
174        match self {
175            UncleanShutdown => "VMDK-UNCLEAN-SHUTDOWN",
176            FtpAsciiMangled => "VMDK-FTP-ASCII-MANGLED",
177            RedundantGdMismatch => "VMDK-RGD-MISMATCH",
178            PrimaryGdRecoverableViaRgd { .. } => "VMDK-PRIMARY-GD-RECOVERABLE",
179            PrimaryGdUnrecoverable { .. } => "VMDK-PRIMARY-GD-UNRECOVERABLE",
180            DanglingGrainTable { .. } => "VMDK-DANGLING-GT",
181            DanglingGrain { .. } => "VMDK-DANGLING-GRAIN",
182        }
183    }
184
185    fn note(&self) -> String {
186        use AnomalyKind::{
187            DanglingGrain, DanglingGrainTable, FtpAsciiMangled, PrimaryGdRecoverableViaRgd,
188            PrimaryGdUnrecoverable, RedundantGdMismatch, UncleanShutdown,
189        };
190        match self {
191            UncleanShutdown => "uncleanShutdown flag set — the disk was not closed cleanly; \
192                                in-flight writes may be inconsistent"
193                .to_string(),
194            FtpAsciiMangled => "header newline-detection bytes mangled — the image was likely \
195                                corrupted by an ASCII-mode FTP transfer"
196                .to_string(),
197            RedundantGdMismatch => "redundant grain directory diverges from the primary — the \
198                                    grain tables they reference hold different contents"
199                .to_string(),
200            PrimaryGdRecoverableViaRgd { damaged, total, recoverable } => format!(
201                "{damaged} of {total} grain-directory entries damaged, {recoverable} recoverable via the RGD"
202            ),
203            PrimaryGdUnrecoverable { unrecoverable } => format!(
204                "{unrecoverable} grain-directory entries damaged with no RGD recovery available"
205            ),
206            DanglingGrainTable { count } => format!(
207                "{count} grain-table pointer(s) point beyond end-of-file (truncation or tampering)"
208            ),
209            DanglingGrain { count } => format!(
210                "{count} grain pointer(s) point beyond end-of-file (truncation or tampering)"
211            ),
212        }
213    }
214
215    fn mitre(&self) -> &'static [&'static str] {
216        match self {
217            // Divergent GD/RGD contents are consistent with stored-data manipulation.
218            AnomalyKind::RedundantGdMismatch => &["T1565.001"],
219            _ => &[],
220        }
221    }
222
223    fn evidence(&self) -> Vec<forensicnomicon::report::Evidence> {
224        use AnomalyKind::{
225            DanglingGrain, DanglingGrainTable, PrimaryGdRecoverableViaRgd, PrimaryGdUnrecoverable,
226        };
227        let ev = |field: &str, value: String| forensicnomicon::report::Evidence {
228            field: field.to_string(),
229            value,
230            location: None,
231        };
232        match self {
233            PrimaryGdRecoverableViaRgd {
234                damaged,
235                total,
236                recoverable,
237            } => vec![
238                ev("damaged", damaged.to_string()),
239                ev("total", total.to_string()),
240                ev("recoverable", recoverable.to_string()),
241            ],
242            PrimaryGdUnrecoverable { unrecoverable } => {
243                vec![ev("unrecoverable", unrecoverable.to_string())]
244            }
245            DanglingGrainTable { count } | DanglingGrain { count } => {
246                vec![ev("count", count.to_string())]
247            }
248            _ => Vec::new(),
249        }
250    }
251}
252
253/// Reparsed VMDK4 sparse layout needed for RGD / integrity analysis.
254struct SparseLayout {
255    grain_dir: Vec<u32>,
256    rgd_offset: u64,
257    num_gtes_per_gt: u64,
258    grain_size_bytes: u64,
259    gd_entry_count: usize,
260    file_len: u64,
261}
262
263/// Forensic integrity analyzer over any `Read + Seek` VMDK source.
264///
265/// Reparses the raw structure on each call, so a single instance can run several
266/// analyses and tolerates partially-damaged images.
267pub struct VmdkIntegrity<R: Read + Seek> {
268    inner: R,
269}
270
271impl<R: Read + Seek> VmdkIntegrity<R> {
272    /// Wrap a `Read + Seek` VMDK source.
273    pub fn new(reader: R) -> Self {
274        Self { inner: reader }
275    }
276
277    /// Recover the wrapped reader.
278    pub fn into_inner(self) -> R {
279        self.inner
280    }
281
282    fn file_len(&mut self) -> io::Result<u64> {
283        self.inner.seek(SeekFrom::End(0))
284    }
285
286    fn read_at(&mut self, offset: u64, len: usize) -> io::Result<Vec<u8>> {
287        self.inner.seek(SeekFrom::Start(offset))?;
288        let mut buf = vec![0u8; len];
289        self.inner.read_exact(&mut buf)?;
290        Ok(buf)
291    }
292
293    /// Reparse the VMDK4 sparse layout, or `None` if the image is not a parseable
294    /// VMDK4 sparse image (flat / COWD / seSparse / unreadable).
295    fn sparse_layout(&mut self) -> io::Result<Option<SparseLayout>> {
296        let file_len = self.file_len()?;
297        if file_len < SECTOR_SIZE {
298            return Ok(None);
299        }
300        let hdr_bytes = self.read_at(0, SECTOR_SIZE as usize)?;
301        let Ok(hdr) = SparseExtentHeader::parse(&hdr_bytes) else {
302            return Ok(None);
303        };
304        let num_grains = hdr.capacity.div_ceil(hdr.grain_size);
305        let num_gtes = u64::from(hdr.num_gtes_per_gt);
306        let num_gts = num_grains.div_ceil(num_gtes);
307        let gd_byte_len = num_gts.saturating_mul(4);
308        if gd_byte_len > MAX_GD_BYTES {
309            return Ok(None);
310        }
311        let gd_byte_len = gd_byte_len as usize;
312
313        // streamOptimized stores the real GD offset in the footer header.
314        let gd_offset = if hdr.gd_offset == header::GD_AT_END {
315            if file_len < 1024 {
316                return Ok(None);
317            }
318            let footer = self.read_at(file_len - 1024, SECTOR_SIZE as usize)?;
319            match SparseExtentHeader::parse(&footer) {
320                Ok(fh) => fh.gd_offset,
321                Err(_) => return Ok(None),
322            }
323        } else {
324            hdr.gd_offset
325        };
326
327        let gd_byte = gd_offset.saturating_mul(SECTOR_SIZE);
328        if gd_byte.saturating_add(gd_byte_len as u64) > file_len {
329            return Ok(None);
330        }
331        let gd = self.read_at(gd_byte, gd_byte_len)?;
332        let grain_dir: Vec<u32> = gd
333            .chunks_exact(4)
334            .map(|c| u32::from_le_bytes(c.try_into().expect("4 bytes")))
335            .collect();
336
337        Ok(Some(SparseLayout {
338            grain_dir,
339            rgd_offset: hdr.rgd_offset,
340            num_gtes_per_gt: num_gtes,
341            grain_size_bytes: hdr.grain_size.saturating_mul(SECTOR_SIZE),
342            gd_entry_count: num_gts as usize,
343            file_len,
344        }))
345    }
346
347    fn read_grain_table_bytes(
348        &mut self,
349        gt_sector: u32,
350        gt_byte_len: usize,
351        file_len: u64,
352    ) -> io::Result<Option<Vec<u8>>> {
353        let gt_byte = u64::from(gt_sector) * SECTOR_SIZE;
354        if gt_byte.saturating_add(gt_byte_len as u64) > file_len {
355            return Ok(None);
356        }
357        Ok(Some(self.read_at(gt_byte, gt_byte_len)?))
358    }
359
360    /// Read the redundant grain directory, bounds-checked, or `None` if absent/OOB.
361    fn read_rgd(&mut self, layout: &SparseLayout) -> io::Result<Option<Vec<u32>>> {
362        if layout.rgd_offset == 0 || layout.rgd_offset == header::GD_AT_END {
363            return Ok(None);
364        }
365        let rgd_byte = layout.rgd_offset.saturating_mul(SECTOR_SIZE);
366        let len = layout.gd_entry_count.saturating_mul(4) as u64;
367        if rgd_byte.saturating_add(len) > layout.file_len {
368            return Ok(None);
369        }
370        let bytes = self.read_at(rgd_byte, len as usize)?;
371        Ok(Some(
372            bytes
373                .chunks_exact(4)
374                .map(|c| u32::from_le_bytes(c.try_into().expect("4 bytes")))
375                .collect(),
376        ))
377    }
378
379    /// Validate the redundant grain directory by comparing the grain-table **contents**
380    /// each directory references (not the pointers, which differ by design in every
381    /// healthy multi-copy image). `Ok(true)` when they match, `Ok(false)` when the RGD
382    /// is absent or diverges.
383    pub fn validate_rgd(&mut self) -> io::Result<bool> {
384        let Some(layout) = self.sparse_layout()? else {
385            return Ok(false);
386        };
387        let Some(rgd) = self.read_rgd(&layout)? else {
388            return Ok(false);
389        };
390        let gt_byte_len = (layout.num_gtes_per_gt * 4) as usize;
391        for i in 0..layout.gd_entry_count {
392            let p = layout.grain_dir.get(i).copied().unwrap_or(0);
393            let r = rgd.get(i).copied().unwrap_or(0);
394            if p == 0 && r == 0 {
395                continue;
396            }
397            if (p == 0) != (r == 0) {
398                return Ok(false);
399            }
400            let pgt = self.read_grain_table_bytes(p, gt_byte_len, layout.file_len)?;
401            let rgt = self.read_grain_table_bytes(r, gt_byte_len, layout.file_len)?;
402            match (pgt, rgt) {
403                (Some(a), Some(b)) if a == b => {}
404                _ => return Ok(false),
405            }
406        }
407        Ok(true)
408    }
409
410    /// Classify every grain-directory entry against the redundant copy: how much of the
411    /// primary GD is intact, damaged, and recoverable via the RGD.
412    pub fn grain_directory_recovery(&mut self) -> io::Result<GdRecoveryReport> {
413        let Some(layout) = self.sparse_layout()? else {
414            return Ok(GdRecoveryReport::default());
415        };
416        let rgd = self.read_rgd(&layout)?;
417        let Some(rgd) = rgd else {
418            // rgd_offset is 0/sentinel → no RGD at all.
419            if layout.rgd_offset == 0 || layout.rgd_offset == header::GD_AT_END {
420                return Ok(GdRecoveryReport::default());
421            }
422            // RGD present in the header but its directory is out of bounds: nothing recoverable.
423            let mut report = GdRecoveryReport {
424                has_rgd: true,
425                total_entries: layout.gd_entry_count,
426                ..GdRecoveryReport::default()
427            };
428            for &p in &layout.grain_dir {
429                if Self::in_bounds(p, layout.num_gtes_per_gt, layout.file_len) || p == 0 {
430                    report.primary_intact += 1;
431                } else {
432                    report.primary_damaged += 1;
433                    report.unrecoverable += 1;
434                }
435            }
436            return Ok(report);
437        };
438
439        let mut report = GdRecoveryReport {
440            has_rgd: true,
441            total_entries: layout.gd_entry_count,
442            ..GdRecoveryReport::default()
443        };
444        for i in 0..layout.gd_entry_count {
445            let p = layout.grain_dir.get(i).copied().unwrap_or(0);
446            let r = rgd.get(i).copied().unwrap_or(0);
447            let p_ok = Self::in_bounds(p, layout.num_gtes_per_gt, layout.file_len);
448            if p_ok || (p == 0 && r == 0) {
449                report.primary_intact += 1;
450            } else {
451                report.primary_damaged += 1;
452                if Self::in_bounds(r, layout.num_gtes_per_gt, layout.file_len) {
453                    report.recoverable_via_rgd += 1;
454                } else {
455                    report.unrecoverable += 1;
456                }
457            }
458        }
459        Ok(report)
460    }
461
462    fn in_bounds(ptr: u32, num_gtes_per_gt: u64, file_len: u64) -> bool {
463        ptr != 0
464            && u64::from(ptr)
465                .saturating_mul(SECTOR_SIZE)
466                .saturating_add(num_gtes_per_gt * 4)
467                <= file_len
468    }
469
470    /// Walk the grain directory and tables, counting pointers that fall beyond
471    /// end-of-file (the signature of a truncated or tampered image). Covers VMDK4
472    /// sparse and seSparse; flat/COWD images report clean.
473    pub fn check_integrity(&mut self) -> io::Result<IntegrityReport> {
474        let file_len = self.file_len()?;
475        if file_len < SECTOR_SIZE {
476            return Ok(IntegrityReport::default());
477        }
478        let head = self.read_at(0, 8)?;
479        let magic8 = u64::from_le_bytes(head.try_into().expect("8 bytes"));
480        if magic8 == sesparse::SE_CONST_MAGIC {
481            return self.check_integrity_sesparse(file_len);
482        }
483
484        let Some(layout) = self.sparse_layout()? else {
485            return Ok(IntegrityReport::default());
486        };
487        let mut report = IntegrityReport::default();
488        let gt_byte_len = layout.num_gtes_per_gt * 4;
489        for &gt_sector in &layout.grain_dir {
490            if gt_sector == 0 {
491                continue;
492            }
493            let gt_byte = u64::from(gt_sector) * SECTOR_SIZE;
494            if gt_byte.saturating_add(gt_byte_len) > file_len {
495                report.out_of_bounds_grain_tables += 1;
496                continue;
497            }
498            let gt = self.read_at(gt_byte, gt_byte_len as usize)?;
499            for c in gt.chunks_exact(4) {
500                let gte = u32::from_le_bytes(c.try_into().expect("4 bytes"));
501                if gte <= 1 {
502                    continue; // sparse / explicitly-zeroed
503                }
504                report.grains_checked += 1;
505                let grain_byte = u64::from(gte) * SECTOR_SIZE;
506                if grain_byte.saturating_add(layout.grain_size_bytes) > file_len {
507                    report.out_of_bounds_grains += 1;
508                }
509            }
510        }
511        Ok(report)
512    }
513
514    fn check_integrity_sesparse(&mut self, file_len: u64) -> io::Result<IntegrityReport> {
515        let mut report = IntegrityReport::default();
516        let hdr_bytes = self.read_at(0, SECTOR_SIZE as usize)?;
517        let Ok(hdr) = SeConstHeader::parse(&hdr_bytes) else {
518            return Ok(report);
519        };
520        if hdr.grain_size == 0 {
521            return Ok(report);
522        }
523        let num_grains = hdr.capacity.div_ceil(hdr.grain_size);
524        let num_gts = num_grains.div_ceil(sesparse::SE_GTES_PER_GT).max(1);
525        let gd_len = num_gts.saturating_mul(8);
526        let gd_byte = hdr.gd_offset.saturating_mul(SECTOR_SIZE);
527        if gd_len > MAX_GD_BYTES || gd_byte.saturating_add(gd_len) > file_len {
528            return Ok(report);
529        }
530        let gd = self.read_at(gd_byte, gd_len as usize)?;
531        let grain_dir: Vec<u64> = gd
532            .chunks_exact(8)
533            .map(|c| u64::from_le_bytes(c.try_into().expect("8 bytes")))
534            .collect();
535        let grain_size_bytes = hdr.grain_size.saturating_mul(SECTOR_SIZE);
536        let grain_sectors = hdr.grain_size;
537        let gt_byte_len = sesparse::SE_GTES_PER_GT * 8;
538        for &gd_entry in &grain_dir {
539            if gd_entry == 0 {
540                continue;
541            }
542            if gd_entry & sesparse::SE_GD_ALLOC_MASK != sesparse::SE_GD_ALLOC_FLAG {
543                report.out_of_bounds_grain_tables += 1;
544                continue;
545            }
546            let gt_table_idx = gd_entry & sesparse::SE_GD_INDEX_MASK;
547            let gt_sector = hdr
548                .gt_offset
549                .saturating_add(gt_table_idx.saturating_mul(sesparse::SE_GT_SECTORS));
550            let gt_byte = gt_sector.saturating_mul(SECTOR_SIZE);
551            if gt_byte.saturating_add(gt_byte_len) > file_len {
552                report.out_of_bounds_grain_tables += 1;
553                continue;
554            }
555            let gt = self.read_at(gt_byte, gt_byte_len as usize)?;
556            for c in gt.chunks_exact(8) {
557                let gte = u64::from_le_bytes(c.try_into().expect("8 bytes"));
558                if gte & sesparse::SE_GTE_TYPE_MASK != sesparse::SE_GTE_TYPE_ALLOCATED {
559                    continue;
560                }
561                report.grains_checked += 1;
562                let grain_idx = sesparse::se_gte_grain_index(gte);
563                let grain_byte = hdr
564                    .grains_offset
565                    .saturating_add(grain_idx.saturating_mul(grain_sectors))
566                    .saturating_mul(SECTOR_SIZE);
567                if grain_byte.saturating_add(grain_size_bytes) > file_len {
568                    report.out_of_bounds_grains += 1;
569                }
570            }
571        }
572        Ok(report)
573    }
574
575    /// Read the 512-byte sparse-header provenance, or `None` if the header is not VMDK4.
576    pub fn header_provenance(&mut self) -> io::Result<Option<HeaderProvenance>> {
577        let file_len = self.file_len()?;
578        if file_len < SECTOR_SIZE {
579            return Ok(None);
580        }
581        let hdr = self.read_at(0, SECTOR_SIZE as usize)?;
582        if u32::from_le_bytes(hdr[0..4].try_into().expect("4 bytes")) != header::MAGIC {
583            return Ok(None);
584        }
585        let version = u32::from_le_bytes(hdr[4..8].try_into().expect("4 bytes"));
586        let flags = u32::from_le_bytes(hdr[8..12].try_into().expect("4 bytes"));
587        Ok(Some(HeaderProvenance {
588            version,
589            unclean_shutdown: hdr[72] != 0,
590            newline_check_intact: hdr[73..77] == [0x0A, 0x20, 0x0D, 0x0A],
591            uses_redundant_gd: flags & 0x0000_0002 != 0,
592            compressed: flags & 0x0001_0000 != 0,
593            has_markers: flags & 0x0002_0000 != 0,
594        }))
595    }
596
597    /// Run every analysis and aggregate the findings into a graded list of
598    /// canonical [`Finding`]s, sorted most-severe first.
599    pub fn analyse(&mut self) -> io::Result<Vec<Finding>> {
600        use forensicnomicon::report::{Observation, Source};
601
602        let mut kinds: Vec<AnomalyKind> = Vec::new();
603
604        if let Some(p) = self.header_provenance()? {
605            if p.unclean_shutdown {
606                kinds.push(AnomalyKind::UncleanShutdown);
607            }
608            if !p.newline_check_intact {
609                kinds.push(AnomalyKind::FtpAsciiMangled);
610            }
611        }
612
613        let recovery = self.grain_directory_recovery()?;
614        if recovery.has_rgd && !self.validate_rgd()? {
615            kinds.push(AnomalyKind::RedundantGdMismatch);
616        }
617        if recovery.recoverable_via_rgd > 0 {
618            kinds.push(AnomalyKind::PrimaryGdRecoverableViaRgd {
619                damaged: recovery.primary_damaged,
620                total: recovery.total_entries,
621                recoverable: recovery.recoverable_via_rgd,
622            });
623        }
624        if recovery.unrecoverable > 0 {
625            kinds.push(AnomalyKind::PrimaryGdUnrecoverable {
626                unrecoverable: recovery.unrecoverable,
627            });
628        }
629
630        let integrity = self.check_integrity()?;
631        if integrity.out_of_bounds_grain_tables > 0 {
632            kinds.push(AnomalyKind::DanglingGrainTable {
633                count: integrity.out_of_bounds_grain_tables,
634            });
635        }
636        if integrity.out_of_bounds_grains > 0 {
637            kinds.push(AnomalyKind::DanglingGrain {
638                count: integrity.out_of_bounds_grains,
639            });
640        }
641
642        let source = Source {
643            analyzer: "vmdk-forensic".to_string(),
644            scope: "VMDK".to_string(),
645            version: Some(env!("CARGO_PKG_VERSION").to_string()),
646        };
647        let mut out: Vec<Finding> = kinds.iter().map(|k| k.to_finding(source.clone())).collect();
648        // Most-severe first; unrated (None) sorts last.
649        out.sort_by(|a, b| b.severity.cmp(&a.severity));
650        Ok(out)
651    }
652}
653
654#[cfg(test)]
655mod tests {
656    use super::*;
657    use forensicnomicon::report::Severity;
658    use std::io::Cursor;
659    use vmdk::testutil::{test_sesparse_vmdk, test_sparse_vmdk};
660
661    #[test]
662    fn header_provenance_clean_image() {
663        let v = test_sparse_vmdk(&[0u8; 512]);
664        let mut a = VmdkIntegrity::new(Cursor::new(v));
665        let p = a.header_provenance().expect("io").expect("VMDK4 header");
666        assert_eq!(p.version, 1);
667        assert!(!p.unclean_shutdown);
668        assert!(p.newline_check_intact);
669    }
670
671    #[test]
672    fn validate_rgd_true_on_healthy_image() {
673        let v = test_sparse_vmdk(&[0xAB; 512]);
674        let mut a = VmdkIntegrity::new(Cursor::new(v));
675        assert!(a.validate_rgd().expect("io"));
676    }
677
678    #[test]
679    fn validate_rgd_false_on_redundant_gt_divergence() {
680        // Corrupt the redundant grain table (sector 22 in the test fixture).
681        let mut v = test_sparse_vmdk(&[0xAB; 512]);
682        v[22 * 512] ^= 0xFF;
683        let mut a = VmdkIntegrity::new(Cursor::new(v));
684        assert!(!a.validate_rgd().expect("io"));
685    }
686
687    #[test]
688    fn grain_directory_recovery_flags_recoverable_damage() {
689        let mut v = test_sparse_vmdk(&[0xAB; 512]);
690        v[21 * 512..21 * 512 + 4].copy_from_slice(&0xFFFF_FFFFu32.to_le_bytes()); // primary GD[0] damaged
691        let mut a = VmdkIntegrity::new(Cursor::new(v));
692        let r = a.grain_directory_recovery().expect("io");
693        assert!(r.has_rgd);
694        assert_eq!(r.primary_damaged, 1);
695        assert_eq!(r.recoverable_via_rgd, 1);
696    }
697
698    #[test]
699    fn check_integrity_clean_then_flags_dangling_gt() {
700        let v = test_sparse_vmdk(&[0xAB; 512]);
701        let mut a = VmdkIntegrity::new(Cursor::new(v));
702        assert!(a.check_integrity().expect("io").is_ok());
703
704        let mut v2 = test_sparse_vmdk(&[0xAB; 512]);
705        v2[21 * 512..21 * 512 + 4].copy_from_slice(&0xFFFF_FFFFu32.to_le_bytes());
706        let mut a2 = VmdkIntegrity::new(Cursor::new(v2));
707        let rep = a2.check_integrity().expect("io");
708        assert!(!rep.is_ok());
709        assert_eq!(rep.out_of_bounds_grain_tables, 1);
710    }
711
712    #[test]
713    fn analyse_reports_rgd_mismatch_anomaly() {
714        let mut v = test_sparse_vmdk(&[0xAB; 512]);
715        v[22 * 512] ^= 0xFF; // redundant GT diverges
716        let mut a = VmdkIntegrity::new(Cursor::new(v));
717        let anomalies = a.analyse().expect("io");
718        assert!(
719            anomalies
720                .iter()
721                .any(|x| x.code.as_ref() == "VMDK-RGD-MISMATCH"),
722            "expected an RGD mismatch anomaly, got: {anomalies:?}"
723        );
724    }
725
726    #[test]
727    fn into_inner_returns_reader() {
728        let v = test_sparse_vmdk(&[0u8; 512]);
729        let a = VmdkIntegrity::new(Cursor::new(v));
730        let _cursor = a.into_inner();
731    }
732
733    #[test]
734    fn header_provenance_flags_unclean_shutdown_and_ftp_mangling() {
735        let mut v = test_sparse_vmdk(&[0u8; 512]);
736        v[72] = 1; // uncleanShutdown
737        v[73] = 0x20; // break the 0A 20 0D 0A newline-detection sequence
738        let mut a = VmdkIntegrity::new(Cursor::new(v));
739        let p = a.header_provenance().expect("io").expect("vmdk4");
740        assert!(p.unclean_shutdown);
741        assert!(!p.newline_check_intact);
742    }
743
744    #[test]
745    fn header_provenance_none_for_non_vmdk4() {
746        let mut a = VmdkIntegrity::new(Cursor::new(vec![0u8; 1024]));
747        assert!(a.header_provenance().expect("io").is_none());
748    }
749
750    #[test]
751    fn validate_rgd_false_for_sesparse() {
752        let se = test_sesparse_vmdk(&[0u8; 512]);
753        let mut a = VmdkIntegrity::new(Cursor::new(se));
754        assert!(!a.validate_rgd().expect("io")); // no RGD in seSparse
755    }
756
757    #[test]
758    fn grain_directory_recovery_default_when_no_rgd() {
759        let mut v = test_sparse_vmdk(&[0u8; 512]);
760        v[48..56].copy_from_slice(&0u64.to_le_bytes()); // rgd_offset = 0
761        let mut a = VmdkIntegrity::new(Cursor::new(v));
762        let r = a.grain_directory_recovery().expect("io");
763        assert!(!r.has_rgd);
764        assert_eq!(r.total_entries, 0);
765    }
766
767    #[test]
768    fn grain_directory_recovery_counts_unrecoverable() {
769        let mut v = test_sparse_vmdk(&[0u8; 512]);
770        v[21 * 512..21 * 512 + 4].copy_from_slice(&0xFFFF_FFFFu32.to_le_bytes()); // primary GD[0]
771        v[22 * 512..22 * 512 + 4].copy_from_slice(&0xFFFF_FFFFu32.to_le_bytes()); // RGD[0]
772        let mut a = VmdkIntegrity::new(Cursor::new(v));
773        let r = a.grain_directory_recovery().expect("io");
774        assert_eq!(r.primary_damaged, 1);
775        assert_eq!(r.unrecoverable, 1);
776        assert_eq!(r.recoverable_via_rgd, 0);
777    }
778
779    #[test]
780    fn grain_directory_recovery_clean_all_intact() {
781        let v = test_sparse_vmdk(&[0xAB; 512]);
782        let mut a = VmdkIntegrity::new(Cursor::new(v));
783        let r = a.grain_directory_recovery().expect("io");
784        assert!(r.has_rgd);
785        assert_eq!(r.primary_intact, r.total_entries);
786        assert_eq!(r.primary_damaged, 0);
787    }
788
789    #[test]
790    fn sesparse_integrity_clean_and_flagged() {
791        // Clean seSparse: no out-of-bounds pointers.
792        let se = test_sesparse_vmdk(&[0xAB; 512]);
793        let mut a = VmdkIntegrity::new(Cursor::new(se));
794        assert!(a.check_integrity().expect("io").is_ok());
795
796        // Invalid GD allocation marker → flagged grain table.
797        let mut se2 = test_sesparse_vmdk(&[0xAB; 512]);
798        let gd = 2 * 512;
799        se2[gd..gd + 8].copy_from_slice(&0x5000_0000_0000_0000u64.to_le_bytes());
800        let mut a2 = VmdkIntegrity::new(Cursor::new(se2));
801        let rep = a2.check_integrity().expect("io");
802        assert!(!rep.is_ok());
803        assert_eq!(rep.out_of_bounds_grain_tables, 1);
804
805        // Allocated marker pointing to a grain table past EOF → flagged.
806        let mut se3 = test_sesparse_vmdk(&[0xAB; 512]);
807        se3[gd..gd + 8].copy_from_slice(&(0x1000_0000_0000_0000u64 | 0xFFFF_FFFF).to_le_bytes());
808        let mut a3 = VmdkIntegrity::new(Cursor::new(se3));
809        assert!(!a3.check_integrity().expect("io").is_ok());
810    }
811
812    #[test]
813    fn check_integrity_flags_grain_past_eof() {
814        // Point a primary GT entry at a grain past EOF (GT at sector 23, GTE[0]).
815        let mut v = test_sparse_vmdk(&[0xAB; 512]);
816        v[23 * 512..23 * 512 + 4].copy_from_slice(&0xFFFF_FFFFu32.to_le_bytes());
817        let mut a = VmdkIntegrity::new(Cursor::new(v));
818        let rep = a.check_integrity().expect("io");
819        assert_eq!(rep.out_of_bounds_grains, 1);
820        assert!(!rep.is_ok());
821    }
822
823    #[test]
824    fn analyse_flags_unclean_shutdown_warning() {
825        let mut v = test_sparse_vmdk(&[0u8; 512]);
826        v[72] = 1;
827        let mut a = VmdkIntegrity::new(Cursor::new(v));
828        let anomalies = a.analyse().expect("io");
829        assert!(anomalies
830            .iter()
831            .any(|x| x.code.as_ref() == "VMDK-UNCLEAN-SHUTDOWN"));
832    }
833
834    #[test]
835    fn analyse_flags_dangling_and_recoverable_for_corrupt_primary_gd() {
836        let mut v = test_sparse_vmdk(&[0xAB; 512]);
837        v[21 * 512..21 * 512 + 4].copy_from_slice(&0xFFFF_FFFFu32.to_le_bytes());
838        let mut a = VmdkIntegrity::new(Cursor::new(v));
839        let k: Vec<_> = a
840            .analyse()
841            .expect("io")
842            .into_iter()
843            .map(|x| x.code)
844            .collect();
845        assert!(k.iter().any(|c| c.as_ref() == "VMDK-DANGLING-GT"));
846        assert!(k
847            .iter()
848            .any(|c| c.as_ref() == "VMDK-PRIMARY-GD-RECOVERABLE"));
849        // first finding is the most severe (Error sorts before Warning)
850    }
851
852    #[test]
853    fn analyse_flags_unrecoverable_when_both_directories_damaged() {
854        let mut v = test_sparse_vmdk(&[0xAB; 512]);
855        v[21 * 512..21 * 512 + 4].copy_from_slice(&0xFFFF_FFFFu32.to_le_bytes());
856        v[22 * 512..22 * 512 + 4].copy_from_slice(&0xFFFF_FFFFu32.to_le_bytes());
857        let mut a = VmdkIntegrity::new(Cursor::new(v));
858        let k: Vec<_> = a
859            .analyse()
860            .expect("io")
861            .into_iter()
862            .map(|x| x.code)
863            .collect();
864        assert!(k
865            .iter()
866            .any(|c| c.as_ref() == "VMDK-PRIMARY-GD-UNRECOVERABLE"));
867    }
868
869    #[test]
870    fn tiny_and_garbage_inputs_are_safe() {
871        for bytes in [vec![0u8; 100], vec![0xFFu8; 512], vec![0u8; 600]] {
872            let mut a = VmdkIntegrity::new(Cursor::new(bytes));
873            assert!(!a.validate_rgd().expect("io"));
874            assert!(a.check_integrity().expect("io").is_ok());
875            assert!(a.grain_directory_recovery().expect("io").total_entries == 0);
876            assert!(a.header_provenance().expect("io").is_none());
877            let _ = a.analyse().expect("io");
878        }
879    }
880
881    #[test]
882    fn validate_rgd_false_when_only_one_directory_has_a_gt() {
883        let mut v = test_sparse_vmdk(&[0xAB; 512]);
884        v[21 * 512..21 * 512 + 4].copy_from_slice(&0u32.to_le_bytes()); // primary GD[0] sparse, RGD[0] not
885        let mut a = VmdkIntegrity::new(Cursor::new(v));
886        assert!(!a.validate_rgd().expect("io"));
887    }
888
889    #[test]
890    fn grain_directory_recovery_rgd_directory_out_of_bounds_is_unrecoverable() {
891        let mut v = test_sparse_vmdk(&[0xAB; 512]);
892        v[21 * 512..21 * 512 + 4].copy_from_slice(&0xFFFF_FFFFu32.to_le_bytes()); // primary damaged
893        v[48..56].copy_from_slice(&9_999_999u64.to_le_bytes()); // rgd_offset past EOF
894        let mut a = VmdkIntegrity::new(Cursor::new(v));
895        let r = a.grain_directory_recovery().expect("io");
896        assert!(r.has_rgd);
897        assert_eq!(r.unrecoverable, 1);
898        assert_eq!(r.recoverable_via_rgd, 0);
899    }
900
901    #[test]
902    fn analyse_flags_ftp_ascii_mangling() {
903        let mut v = test_sparse_vmdk(&[0u8; 512]);
904        v[73] = 0x20; // break the newline-detection sequence
905        let mut a = VmdkIntegrity::new(Cursor::new(v));
906        let k: Vec<_> = a
907            .analyse()
908            .expect("io")
909            .into_iter()
910            .map(|x| x.code)
911            .collect();
912        assert!(k.iter().any(|c| c.as_ref() == "VMDK-FTP-ASCII-MANGLED"));
913    }
914
915    #[test]
916    fn analyse_flags_dangling_grain() {
917        let mut v = test_sparse_vmdk(&[0xAB; 512]);
918        v[23 * 512..23 * 512 + 4].copy_from_slice(&0xFFFF_FFFFu32.to_le_bytes()); // GT[0] → grain past EOF
919        let mut a = VmdkIntegrity::new(Cursor::new(v));
920        let k: Vec<_> = a
921            .analyse()
922            .expect("io")
923            .into_iter()
924            .map(|x| x.code)
925            .collect();
926        assert!(k.iter().any(|c| c.as_ref() == "VMDK-DANGLING-GRAIN"));
927    }
928
929    #[test]
930    fn sesparse_grain_past_eof_is_flagged() {
931        // An allocated seSparse GTE whose grain index lands past EOF.
932        let mut se = test_sesparse_vmdk(&[0xAB; 512]);
933        // GT entries for seSparse live after the GD; corrupt the first GTE of GT 0.
934        // GD[0] at sector 2 holds the allocated GT index; the GT is at gt_offset.
935        // Easiest reachable path: set capacity huge so grains land past EOF.
936        se[16..24].copy_from_slice(&u64::MAX.to_le_bytes()); // capacity field
937        let mut a = VmdkIntegrity::new(Cursor::new(se));
938        let _ = a.check_integrity().expect("io"); // must not panic on the crafted capacity
939    }
940
941    #[test]
942    fn validate_rgd_false_when_grain_directory_out_of_bounds() {
943        let mut v = test_sparse_vmdk(&[0u8; 512]);
944        v[56..64].copy_from_slice(&9_999_999u64.to_le_bytes()); // gd_offset past EOF
945        let mut a = VmdkIntegrity::new(Cursor::new(v));
946        assert!(!a.validate_rgd().expect("io"));
947        assert!(a.check_integrity().expect("io").is_ok()); // no parseable layout
948    }
949
950    #[test]
951    fn validate_rgd_false_when_rgd_directory_out_of_bounds() {
952        let mut v = test_sparse_vmdk(&[0xAB; 512]);
953        v[48..56].copy_from_slice(&9_999_999u64.to_le_bytes()); // rgd_offset past EOF
954        let mut a = VmdkIntegrity::new(Cursor::new(v));
955        assert!(!a.validate_rgd().expect("io"));
956    }
957
958    #[test]
959    fn sesparse_zero_grain_size_and_oob_gd_are_safe() {
960        let mut se = test_sesparse_vmdk(&[0u8; 512]);
961        se[24..32].copy_from_slice(&0u64.to_le_bytes()); // grain_size = 0
962        let mut a = VmdkIntegrity::new(Cursor::new(se));
963        assert!(a.check_integrity().expect("io").is_ok());
964
965        let mut se2 = test_sesparse_vmdk(&[0u8; 512]);
966        se2[128..136].copy_from_slice(&9_999_999u64.to_le_bytes()); // gd_offset past EOF
967        let mut a2 = VmdkIntegrity::new(Cursor::new(se2));
968        assert!(a2.check_integrity().expect("io").is_ok());
969    }
970
971    #[test]
972    fn streamoptimized_gd_at_end_footer_resolution() {
973        // Build a GD_AT_END image: the primary header's gd_offset is the sentinel and the
974        // real GD offset lives in the footer header (file_end - 1024).
975        let mut v = test_sparse_vmdk(&[0xAB; 512]);
976        v[56..64].copy_from_slice(&0xFFFF_FFFF_FFFF_FFFFu64.to_le_bytes()); // gd_offset = GD_AT_END
977        let mut footer = v[0..512].to_vec();
978        footer[56..64].copy_from_slice(&21u64.to_le_bytes()); // footer points at the real GD (sector 21)
979        v.extend_from_slice(&footer);
980        v.extend_from_slice(&[0u8; 512]); // EOS marker
981        let mut a = VmdkIntegrity::new(Cursor::new(v));
982        assert!(a.check_integrity().expect("io").is_ok());
983        assert!(a.validate_rgd().expect("io"));
984    }
985
986    #[test]
987    fn sesparse_huge_capacity_grain_directory_too_large_is_safe() {
988        let mut se = test_sesparse_vmdk(&[0u8; 512]);
989        se[16..24].copy_from_slice(&u64::MAX.to_le_bytes()); // capacity → GD size exceeds the cap
990        let mut a = VmdkIntegrity::new(Cursor::new(se));
991        assert!(a.check_integrity().expect("io").is_ok()); // bails out safely
992    }
993
994    #[test]
995    fn sesparse_allocated_gte_grain_past_eof_is_flagged() {
996        // Set GT[0] to an allocated entry whose grain index lands past EOF.
997        let mut se = test_sesparse_vmdk(&[0xAB; 512]);
998        let gt0 = 3 * 512; // grain table 0 starts at sector 3 in the fixture
999        se[gt0..gt0 + 8].copy_from_slice(&(0x3000_0000_0000_0000u64 | 0x00FF_FFFF).to_le_bytes());
1000        let mut a = VmdkIntegrity::new(Cursor::new(se));
1001        let _ = a.check_integrity().expect("io"); // must not panic on the crafted grain index
1002    }
1003
1004    #[test]
1005    fn analyse_clean_image_has_no_error_anomalies() {
1006        let v = test_sparse_vmdk(&[0xAB; 512]);
1007        let mut a = VmdkIntegrity::new(Cursor::new(v));
1008        let anomalies = a.analyse().expect("io");
1009        assert!(anomalies.iter().all(|x| x.severity < Some(Severity::High)));
1010    }
1011}