Skip to main content

disk_forensic/
container.rs

1//! Container-format detection (magic-sniff) — which decoder a disk image needs.
2//!
3//! disk4n6 analyses a `Read + Seek` view of a *disk*. Most evidence arrives
4//! wrapped in a container (E01, VHD/VHDX, VMDK, QCOW2, AFF4, DMG); this sniffs
5//! the magic so an opener can pick the right decoder. The magics come from the
6//! `forensicnomicon` knowledge modules (single source of truth). A flat raw/`dd`
7//! image has no wrapper and is analysed in place.
8
9use std::fs::File;
10use std::io::{Read, Seek, SeekFrom};
11use std::path::Path;
12
13use forensicnomicon::report::Finding;
14use forensicnomicon::{aff4, dmg, ewf, qcow2, vhd, vhdx, vmdk};
15
16/// Anything that can be both read and seeked — the disk view `analyse_disk`
17/// consumes. A blanket impl covers every `Read + Seek`, so a decoder's reader or
18/// a plain `File` both box into `Box<dyn ReadSeek>`.
19pub trait ReadSeek: Read + Seek {}
20impl<T: Read + Seek> ReadSeek for T {}
21
22/// A decoded, analysable disk image.
23pub struct OpenedImage {
24    /// The container format it was decoded from (`Raw` for a flat image).
25    pub format: ContainerFormat,
26    /// Logical disk size in bytes (the decoded media size).
27    pub size: u64,
28    /// A `Read + Seek` view of the decoded disk, ready for `analyse_disk`.
29    pub reader: Box<dyn ReadSeek>,
30    /// Container-level forensic findings (e.g. VMDK redundant-GD / dangling-pointer
31    /// / provenance anomalies), surfaced so they aggregate into the normalized
32    /// report alongside the partition/filesystem findings. Empty for containers
33    /// without a forensic analyzer.
34    pub findings: Vec<Finding>,
35}
36
37impl core::fmt::Debug for OpenedImage {
38    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
39        f.debug_struct("OpenedImage")
40            .field("format", &self.format)
41            .field("size", &self.size)
42            .finish_non_exhaustive()
43    }
44}
45
46/// Failure opening/decoding an image.
47#[derive(Debug, thiserror::Error)]
48pub enum OpenError {
49    /// I/O failure opening or reading the file.
50    #[error("I/O error: {0}")]
51    Io(#[from] std::io::Error),
52    /// A recognized container's decoder failed (corrupt/unsupported variant).
53    #[error("{0:?} decode error: {1}")]
54    Decode(ContainerFormat, String),
55    /// The container format is recognized but its decoder is not yet wired —
56    /// decode it to a raw image first.
57    #[error("{0:?} container decoding is not yet supported — decode it to a raw image first")]
58    Unsupported(ContainerFormat),
59}
60
61/// Open `path`, sniff its container format, and return a decoded `Read + Seek`
62/// disk view: raw images pass through; E01/EWF is decoded; other recognized
63/// containers return [`OpenError::Unsupported`].
64///
65/// # Errors
66/// [`OpenError::Io`] on a read failure, [`OpenError::Decode`] on a corrupt
67/// image, or [`OpenError::Unsupported`] for a container whose decoder is not yet
68/// wired.
69pub fn open(path: &Path) -> Result<OpenedImage, OpenError> {
70    let mut file = File::open(path)?;
71    let format = sniff(&mut file)?;
72    match format {
73        ContainerFormat::Raw => {
74            let size = file.metadata()?.len();
75            Ok(OpenedImage {
76                format,
77                size,
78                reader: Box::new(file),
79                findings: Vec::new(),
80            })
81        }
82        ContainerFormat::Ewf => {
83            // `ewf` (imported) is forensicnomicon's magic module; the decoder is
84            // the external `ewf` crate, reached via the absolute path.
85            let reader = ::ewf::EwfReader::open(path)
86                .map_err(|e| OpenError::Decode(format, e.to_string()))?;
87            let size = reader.total_size();
88            Ok(OpenedImage {
89                format,
90                size,
91                reader: Box::new(reader),
92                findings: Vec::new(),
93            })
94        }
95        ContainerFormat::Vmdk => {
96            // `vmdk` (imported) is forensicnomicon's magic module; the decoder is
97            // the external `vmdk` crate, reached via the absolute path. The chain
98            // reader resolves any snapshot/delta extents to the base image.
99            let reader = ::vmdk::VmdkChainReader::open(path)
100                .map_err(|e| OpenError::Decode(format, e.to_string()))?;
101            let size = reader.virtual_disk_size();
102            // Run the VMDK forensic analyzer over the same path so its findings
103            // (RGD mismatch, dangling pointers, unclean shutdown, FTP-mangling)
104            // aggregate into the report. A failed analysis must not fail the open —
105            // the disk view is still usable — so it degrades to no findings.
106            let findings = File::open(path)
107                .ok()
108                .map(vmdk_forensic::VmdkIntegrity::new)
109                .and_then(|mut i| i.analyse().ok())
110                .unwrap_or_default();
111            Ok(OpenedImage {
112                format,
113                size,
114                reader: Box::new(reader),
115                findings,
116            })
117        }
118        ContainerFormat::Qcow2 => {
119            // Our qcow2-core reader owns the file and is Read + Seek directly;
120            // it rejects QCOW1 / encrypted / backing-file images at open().
121            let reader = ::qcow2::Qcow2Reader::open(path)
122                .map_err(|e| OpenError::Decode(format, e.to_string()))?;
123            let size = reader.virtual_disk_size();
124            Ok(OpenedImage {
125                format,
126                size,
127                reader: Box::new(reader),
128                findings: Vec::new(),
129            })
130        }
131        ContainerFormat::Vhd => {
132            // Hand-rolled decoder (no crate): handles fixed + dynamic subformats.
133            let reader = crate::vhd::VhdReader::open(File::open(path)?)
134                .map_err(|e| OpenError::Decode(format, e.to_string()))?;
135            let size = reader.virtual_size();
136            Ok(OpenedImage {
137                format,
138                size,
139                reader: Box::new(reader),
140                findings: Vec::new(),
141            })
142        }
143        ContainerFormat::Vhdx => {
144            // Our own `vhdx-core` reader (imported as `vhdx`) returns an owned
145            // `VhdxReader` that is itself `Read + Seek` with a real `Result` — box
146            // it directly; no adapter and no panic guard needed.
147            let reader = ::vhdx::VhdxReader::open(path)
148                .map_err(|e| OpenError::Decode(format, e.to_string()))?;
149            let size = reader.virtual_disk_size();
150            Ok(OpenedImage {
151                format,
152                size,
153                reader: Box::new(reader),
154                findings: Vec::new(),
155            })
156        }
157        ContainerFormat::Dmg => {
158            let bytes =
159                decode_dmg(path).map_err(|e| OpenError::Decode(format, e.to_string()))?;
160            Ok(OpenedImage {
161                format,
162                size: bytes.len() as u64,
163                reader: Box::new(std::io::Cursor::new(bytes)),
164                findings: Vec::new(),
165            })
166        }
167        ContainerFormat::Iso => {
168            // An ISO 9660 image needs no container decoding — it is a flat
169            // filesystem image. Pass the file through; disk4n6 routes the `Iso`
170            // format to the filesystem analyzer instead of the partition parsers.
171            let size = file.metadata()?.len();
172            Ok(OpenedImage {
173                format,
174                size,
175                reader: Box::new(file),
176                findings: Vec::new(),
177            })
178        }
179        other => Err(OpenError::Unsupported(other)),
180    }
181}
182
183/// Reconstruct a whole DMG (UDIF) disk image in memory. The `udif` crate exposes
184/// a DMG as ordered blkx entries (MBR, free gaps, partitions) rather than a
185/// single disk view, so we concatenate them in disk order; sparse `Apple_Free`
186/// entries (no compressed bytes) become zero-fill. Unlike the streaming
187/// decoders this buffers the entire disk in memory — a limitation of the crate's
188/// partition-oriented API.
189fn decode_dmg(path: &Path) -> std::io::Result<Vec<u8>> {
190    let to_io =
191        |e: ::udif::DppError| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string());
192    let mut archive = ::udif::DmgArchive::open(path).map_err(to_io)?;
193    let parts = archive.partitions();
194    let mut disk = Vec::new();
195    for p in &parts {
196        let want = p.size as usize;
197        if p.compressed_size == 0 {
198            // Free / unallocated space stored sparsely → zero-fill.
199            disk.resize(disk.len() + want, 0);
200        } else {
201            let mut data = archive.extract_partition(p.id).map_err(to_io)?;
202            data.resize(want, 0);
203            disk.append(&mut data);
204        }
205    }
206    Ok(disk)
207}
208
209/// Bytes read from the start for header-magic detection — large enough to reach
210/// the ISO 9660 PVD "CD001" at offset 32769.
211const HEADER_SNIFF_BYTES: usize = 34816;
212/// Bytes read from the end for footer/trailer-magic detection (VHD, DMG).
213const FOOTER_SNIFF_BYTES: u64 = 512;
214
215/// A detected disk-image container format.
216#[derive(Debug, Clone, Copy, PartialEq, Eq)]
217#[cfg_attr(feature = "serde", derive(serde::Serialize))]
218pub enum ContainerFormat {
219    /// No container wrapper — a flat raw/`dd` image (analyse in place).
220    Raw,
221    /// Expert Witness Format (EnCase E01 / Ex01 / logical L01).
222    Ewf,
223    /// Microsoft VHD (fixed / dynamic / differencing).
224    Vhd,
225    /// Microsoft VHDX.
226    Vhdx,
227    /// VMware VMDK (sparse extent).
228    Vmdk,
229    /// QEMU / KVM QCOW2.
230    Qcow2,
231    /// Advanced Forensic Format 4 (ZIP-based).
232    Aff4,
233    /// Apple Disk Image (UDIF).
234    Dmg,
235    /// ISO 9660 optical-disc image (a filesystem, not a partitioned disk —
236    /// analysed by `iso9660-forensic` rather than the partition parsers).
237    Iso,
238}
239
240/// Sniff the container format from a disk image's `header` (its first bytes,
241/// ideally ≥512) and `footer` (its last 512 bytes — VHD's `conectix` cookie and
242/// DMG's `koly` trailer live at the *end* of the file).
243///
244/// Returns [`ContainerFormat::Raw`] when no wrapper magic is present (a bare
245/// MBR/GPT/APM disk).
246#[must_use]
247pub fn detect(header: &[u8], footer: &[u8]) -> ContainerFormat {
248    // ── Offset-0 magics ──────────────────────────────────────────────────────
249    if header.starts_with(&ewf::EVF1_SIGNATURE)
250        || header.starts_with(&ewf::EVF2_SIGNATURE)
251        || header.starts_with(&ewf::LEF2_SIGNATURE)
252    {
253        return ContainerFormat::Ewf;
254    }
255    if header.starts_with(vhdx::FILE_IDENTIFIER) {
256        return ContainerFormat::Vhdx;
257    }
258    // A dynamic VHD mirrors its footer cookie at offset 0.
259    if header.starts_with(vhd::FOOTER_COOKIE) {
260        return ContainerFormat::Vhd;
261    }
262    if header.starts_with(&vmdk::VMDK4_MAGIC.to_le_bytes()) {
263        return ContainerFormat::Vmdk;
264    }
265    if header.starts_with(&qcow2::MAGIC.to_be_bytes()) {
266        return ContainerFormat::Qcow2;
267    }
268    if header.starts_with(&aff4::ZIP_LOCAL_FILE_HEADER_MAGIC) {
269        return ContainerFormat::Aff4;
270    }
271    // ── Optical (ISO 9660): "CD001" at the PVD, offset 32769 (ECMA-119) ───────
272    const ISO_PVD_OFFSET: usize = 32769;
273    if header.len() >= ISO_PVD_OFFSET + 5 && &header[ISO_PVD_OFFSET..ISO_PVD_OFFSET + 5] == b"CD001"
274    {
275        return ContainerFormat::Iso;
276    }
277    // ── Footer / trailer magics ──────────────────────────────────────────────
278    if footer.starts_with(vhd::FOOTER_COOKIE) {
279        return ContainerFormat::Vhd;
280    }
281    if footer.starts_with(&dmg::KOLY_MAGIC.to_be_bytes()) {
282        return ContainerFormat::Dmg;
283    }
284    ContainerFormat::Raw
285}
286
287/// Sniff the container format of a seekable image: read its header and trailing
288/// footer, classify via [`detect`], and **rewind the reader to 0** for the
289/// caller. A sub-512-byte image is read without a footer.
290///
291/// # Errors
292/// Propagates any I/O error from seeking/reading the image.
293pub fn sniff<R: Read + Seek>(reader: &mut R) -> std::io::Result<ContainerFormat> {
294    let len = reader.seek(SeekFrom::End(0))?;
295
296    reader.seek(SeekFrom::Start(0))?;
297    let header_len = (len as usize).min(HEADER_SNIFF_BYTES);
298    let mut header = vec![0u8; header_len];
299    reader.read_exact(&mut header)?;
300
301    let footer = if len >= FOOTER_SNIFF_BYTES {
302        reader.seek(SeekFrom::End(-(FOOTER_SNIFF_BYTES as i64)))?;
303        let mut f = vec![0u8; FOOTER_SNIFF_BYTES as usize];
304        reader.read_exact(&mut f)?;
305        f
306    } else {
307        Vec::new()
308    };
309
310    reader.seek(SeekFrom::Start(0))?;
311    Ok(detect(&header, &footer))
312}