disk_forensic/container.rs
1//! Container-format detection (magic-sniff) — which decoder a disk image needs.
2//!
3//! disk4n6 analyses a `Read + Seek` view of a *disk*. Most evidence arrives
4//! wrapped in a container (E01, VHD/VHDX, VMDK, QCOW2, AFF4, DMG); this sniffs
5//! the magic so an opener can pick the right decoder. The magics come from the
6//! `forensicnomicon` knowledge modules (single source of truth). A flat raw/`dd`
7//! image has no wrapper and is analysed in place.
8
9use std::fs::File;
10use std::io::{Read, Seek, SeekFrom};
11use std::path::Path;
12
13use forensicnomicon::report::Finding;
14use forensicnomicon::{aff4, dmg, ewf, qcow2, vhd, vhdx, vmdk};
15
16/// Anything that can be both read and seeked — the disk view `analyse_disk`
17/// consumes. A blanket impl covers every `Read + Seek`, so a decoder's reader or
18/// a plain `File` both box into `Box<dyn ReadSeek>`.
19pub trait ReadSeek: Read + Seek {}
20impl<T: Read + Seek> ReadSeek for T {}
21
22/// A decoded, analysable disk image.
23pub struct OpenedImage {
24 /// The container format it was decoded from (`Raw` for a flat image).
25 pub format: ContainerFormat,
26 /// Logical disk size in bytes (the decoded media size).
27 pub size: u64,
28 /// A `Read + Seek` view of the decoded disk, ready for `analyse_disk`.
29 pub reader: Box<dyn ReadSeek>,
30 /// Container-level forensic findings (e.g. VMDK redundant-GD / dangling-pointer
31 /// / provenance anomalies), surfaced so they aggregate into the normalized
32 /// report alongside the partition/filesystem findings. Empty for containers
33 /// without a forensic analyzer.
34 pub findings: Vec<Finding>,
35}
36
37impl core::fmt::Debug for OpenedImage {
38 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
39 f.debug_struct("OpenedImage")
40 .field("format", &self.format)
41 .field("size", &self.size)
42 .finish_non_exhaustive()
43 }
44}
45
46/// Failure opening/decoding an image.
47#[derive(Debug, thiserror::Error)]
48pub enum OpenError {
49 /// I/O failure opening or reading the file.
50 #[error("I/O error: {0}")]
51 Io(#[from] std::io::Error),
52 /// A recognized container's decoder failed (corrupt/unsupported variant).
53 #[error("{0:?} decode error: {1}")]
54 Decode(ContainerFormat, String),
55 /// The container format is recognized but its decoder is not yet wired —
56 /// decode it to a raw image first.
57 #[error("{0:?} container decoding is not yet supported — decode it to a raw image first")]
58 Unsupported(ContainerFormat),
59}
60
61/// Open `path`, sniff its container format, and return a decoded `Read + Seek`
62/// disk view: raw images pass through; E01/EWF is decoded; other recognized
63/// containers return [`OpenError::Unsupported`].
64///
65/// # Errors
66/// [`OpenError::Io`] on a read failure, [`OpenError::Decode`] on a corrupt
67/// image, or [`OpenError::Unsupported`] for a container whose decoder is not yet
68/// wired.
69pub fn open(path: &Path) -> Result<OpenedImage, OpenError> {
70 let mut file = File::open(path)?;
71 let format = sniff(&mut file)?;
72 match format {
73 ContainerFormat::Raw => {
74 let size = file.metadata()?.len();
75 Ok(OpenedImage {
76 format,
77 size,
78 reader: Box::new(file),
79 findings: Vec::new(),
80 })
81 }
82 ContainerFormat::Ewf => {
83 // `ewf` (imported) is forensicnomicon's magic module; the decoder is
84 // the external `ewf` crate, reached via the absolute path.
85 let reader = ::ewf::EwfReader::open(path)
86 .map_err(|e| OpenError::Decode(format, e.to_string()))?;
87 let size = reader.total_size();
88 Ok(OpenedImage {
89 format,
90 size,
91 reader: Box::new(reader),
92 findings: Vec::new(),
93 })
94 }
95 ContainerFormat::Vmdk => {
96 // `vmdk` (imported) is forensicnomicon's magic module; the decoder is
97 // the external `vmdk` crate, reached via the absolute path. The chain
98 // reader resolves any snapshot/delta extents to the base image.
99 let reader = ::vmdk::VmdkChainReader::open(path)
100 .map_err(|e| OpenError::Decode(format, e.to_string()))?;
101 let size = reader.virtual_disk_size();
102 // Run the VMDK forensic analyzer over the same path so its findings
103 // (RGD mismatch, dangling pointers, unclean shutdown, FTP-mangling)
104 // aggregate into the report. A failed analysis must not fail the open —
105 // the disk view is still usable — so it degrades to no findings.
106 let findings = File::open(path)
107 .ok()
108 .map(vmdk_forensic::VmdkIntegrity::new)
109 .and_then(|mut i| i.analyse().ok())
110 .unwrap_or_default();
111 Ok(OpenedImage {
112 format,
113 size,
114 reader: Box::new(reader),
115 findings,
116 })
117 }
118 ContainerFormat::Qcow2 => {
119 // Our qcow2-core reader owns the file and is Read + Seek directly;
120 // it rejects QCOW1 / encrypted / backing-file images at open().
121 let reader = ::qcow2::Qcow2Reader::open(path)
122 .map_err(|e| OpenError::Decode(format, e.to_string()))?;
123 let size = reader.virtual_disk_size();
124 Ok(OpenedImage {
125 format,
126 size,
127 reader: Box::new(reader),
128 findings: Vec::new(),
129 })
130 }
131 ContainerFormat::Vhd => {
132 // Hand-rolled decoder (no crate): handles fixed + dynamic subformats.
133 let reader = crate::vhd::VhdReader::open(File::open(path)?)
134 .map_err(|e| OpenError::Decode(format, e.to_string()))?;
135 let size = reader.virtual_size();
136 Ok(OpenedImage {
137 format,
138 size,
139 reader: Box::new(reader),
140 findings: Vec::new(),
141 })
142 }
143 ContainerFormat::Vhdx => {
144 // Our own `vhdx-core` reader (imported as `vhdx`) returns an owned
145 // `VhdxReader` that is itself `Read + Seek` with a real `Result` — box
146 // it directly; no adapter and no panic guard needed.
147 let reader = ::vhdx::VhdxReader::open(path)
148 .map_err(|e| OpenError::Decode(format, e.to_string()))?;
149 let size = reader.virtual_disk_size();
150 Ok(OpenedImage {
151 format,
152 size,
153 reader: Box::new(reader),
154 findings: Vec::new(),
155 })
156 }
157 ContainerFormat::Dmg => {
158 // Our own `dmg-core` reader is `Read + Seek` directly (no buffering)
159 // and decodes every UDIF block codec — ADC/zlib/bzip2/LZFSE/LZMA —
160 // in pure Rust. `::dmg` is the crate; `dmg` (imported) is
161 // forensicnomicon's magic module used for sniffing.
162 let reader = ::dmg::DmgReader::open(File::open(path)?)
163 .map_err(|e| OpenError::Decode(format, e.to_string()))?;
164 let size = reader.virtual_disk_size();
165 Ok(OpenedImage {
166 format,
167 size,
168 reader: Box::new(reader),
169 findings: Vec::new(),
170 })
171 }
172 ContainerFormat::Iso => {
173 // An ISO 9660 image needs no container decoding — it is a flat
174 // filesystem image. Pass the file through; disk4n6 routes the `Iso`
175 // format to the filesystem analyzer instead of the partition parsers.
176 let size = file.metadata()?.len();
177 Ok(OpenedImage {
178 format,
179 size,
180 reader: Box::new(file),
181 findings: Vec::new(),
182 })
183 }
184 other => Err(OpenError::Unsupported(other)),
185 }
186}
187
188/// Bytes read from the start for header-magic detection — large enough to reach
189/// the ISO 9660 PVD "CD001" at offset 32769.
190const HEADER_SNIFF_BYTES: usize = 34816;
191/// Bytes read from the end for footer/trailer-magic detection (VHD, DMG).
192const FOOTER_SNIFF_BYTES: u64 = 512;
193
194/// A detected disk-image container format.
195#[derive(Debug, Clone, Copy, PartialEq, Eq)]
196#[cfg_attr(feature = "serde", derive(serde::Serialize))]
197pub enum ContainerFormat {
198 /// No container wrapper — a flat raw/`dd` image (analyse in place).
199 Raw,
200 /// Expert Witness Format (EnCase E01 / Ex01 / logical L01).
201 Ewf,
202 /// Microsoft VHD (fixed / dynamic / differencing).
203 Vhd,
204 /// Microsoft VHDX.
205 Vhdx,
206 /// VMware VMDK (sparse extent).
207 Vmdk,
208 /// QEMU / KVM QCOW2.
209 Qcow2,
210 /// Advanced Forensic Format 4 (ZIP-based).
211 Aff4,
212 /// Apple Disk Image (UDIF).
213 Dmg,
214 /// ISO 9660 optical-disc image (a filesystem, not a partitioned disk —
215 /// analysed by `iso9660-forensic` rather than the partition parsers).
216 Iso,
217}
218
219/// Sniff the container format from a disk image's `header` (its first bytes,
220/// ideally ≥512) and `footer` (its last 512 bytes — VHD's `conectix` cookie and
221/// DMG's `koly` trailer live at the *end* of the file).
222///
223/// Returns [`ContainerFormat::Raw`] when no wrapper magic is present (a bare
224/// MBR/GPT/APM disk).
225#[must_use]
226pub fn detect(header: &[u8], footer: &[u8]) -> ContainerFormat {
227 // ── Offset-0 magics ──────────────────────────────────────────────────────
228 if header.starts_with(&ewf::EVF1_SIGNATURE)
229 || header.starts_with(&ewf::EVF2_SIGNATURE)
230 || header.starts_with(&ewf::LEF2_SIGNATURE)
231 {
232 return ContainerFormat::Ewf;
233 }
234 if header.starts_with(vhdx::FILE_IDENTIFIER) {
235 return ContainerFormat::Vhdx;
236 }
237 // A dynamic VHD mirrors its footer cookie at offset 0.
238 if header.starts_with(vhd::FOOTER_COOKIE) {
239 return ContainerFormat::Vhd;
240 }
241 if header.starts_with(&vmdk::VMDK4_MAGIC.to_le_bytes()) {
242 return ContainerFormat::Vmdk;
243 }
244 if header.starts_with(&qcow2::MAGIC.to_be_bytes()) {
245 return ContainerFormat::Qcow2;
246 }
247 if header.starts_with(&aff4::ZIP_LOCAL_FILE_HEADER_MAGIC) {
248 return ContainerFormat::Aff4;
249 }
250 // ── Optical (ISO 9660): "CD001" at the PVD, offset 32769 (ECMA-119) ───────
251 const ISO_PVD_OFFSET: usize = 32769;
252 if header.len() >= ISO_PVD_OFFSET + 5 && &header[ISO_PVD_OFFSET..ISO_PVD_OFFSET + 5] == b"CD001"
253 {
254 return ContainerFormat::Iso;
255 }
256 // ── Footer / trailer magics ──────────────────────────────────────────────
257 if footer.starts_with(vhd::FOOTER_COOKIE) {
258 return ContainerFormat::Vhd;
259 }
260 if footer.starts_with(&dmg::KOLY_MAGIC.to_be_bytes()) {
261 return ContainerFormat::Dmg;
262 }
263 ContainerFormat::Raw
264}
265
266/// Sniff the container format of a seekable image: read its header and trailing
267/// footer, classify via [`detect`], and **rewind the reader to 0** for the
268/// caller. A sub-512-byte image is read without a footer.
269///
270/// # Errors
271/// Propagates any I/O error from seeking/reading the image.
272pub fn sniff<R: Read + Seek>(reader: &mut R) -> std::io::Result<ContainerFormat> {
273 let len = reader.seek(SeekFrom::End(0))?;
274
275 reader.seek(SeekFrom::Start(0))?;
276 let header_len = (len as usize).min(HEADER_SNIFF_BYTES);
277 let mut header = vec![0u8; header_len];
278 reader.read_exact(&mut header)?;
279
280 let footer = if len >= FOOTER_SNIFF_BYTES {
281 reader.seek(SeekFrom::End(-(FOOTER_SNIFF_BYTES as i64)))?;
282 let mut f = vec![0u8; FOOTER_SNIFF_BYTES as usize];
283 reader.read_exact(&mut f)?;
284 f
285 } else {
286 Vec::new()
287 };
288
289 reader.seek(SeekFrom::Start(0))?;
290 Ok(detect(&header, &footer))
291}