Skip to main content

qcow2_forensic/
lib.rs

1//! Forensic anomaly auditor for QCOW2 images.
2//!
3//! Reads QCOW2 header facts via [`qcow2::inspect`] and grades them into
4//! severity-ranked findings on the shared [`forensicnomicon::report`] model.
5//! Each finding is an *observation* ("consistent with …"); the examiner draws
6//! the conclusions.
7
8// Production code is panic-free (enforced by the workspace lints); tests opt out.
9#![cfg_attr(test, allow(clippy::unwrap_used, clippy::expect_used))]
10
11use std::path::Path;
12
13pub use forensicnomicon::report::Severity;
14pub use qcow2::{Qcow2Error, Qcow2Info, Qcow2RefcountReport, Qcow2Snapshot};
15
16// v3 incompatible-feature bits (QCOW2 spec §4).
17const FEAT_DIRTY: u64 = 1 << 0;
18const FEAT_CORRUPT: u64 = 1 << 1;
19const FEAT_EXTERNAL_DATA: u64 = 1 << 2;
20
21/// A QCOW2 image-level forensic anomaly.
22#[derive(Debug, Clone, PartialEq, Eq)]
23pub enum Qcow2Anomaly {
24    /// The image references a backing file — it is an overlay/differential and
25    /// alone does not contain the full guest data.
26    BackingFile {
27        /// The referenced backing filename, when it could be extracted.
28        name: Option<String>,
29        /// The backing-file format (e.g. "qcow2", "raw"), when recorded.
30        format: Option<String>,
31    },
32    /// The image is encrypted; its contents cannot be read without the key.
33    Encrypted {
34        /// Encryption method byte (1 = AES, 2 = LUKS).
35        method: u32,
36    },
37    /// The image carries internal snapshots — additional captured guest states.
38    InternalSnapshots {
39        /// Number of snapshots.
40        count: u32,
41    },
42    /// A single named internal snapshot — a captured guest state at a point in
43    /// time, worth examining as additional recoverable evidence.
44    Snapshot {
45        /// Snapshot name (e.g. "snap1").
46        name: String,
47        /// Creation time — seconds since the Unix epoch.
48        date_unix_secs: u32,
49    },
50    /// The dirty bit is set — the image was not closed cleanly (in use/crash).
51    Dirty,
52    /// The corrupt bit is set — QEMU flagged the image as corrupt.
53    Corrupt,
54    /// The image stores guest data in an external data file (not self-contained).
55    ExternalDataFile,
56    /// Clusters reachable through the active L1/L2 mapping have a host refcount
57    /// of 0 — allocated-but-unreferenced data, a candidate for orphaned or
58    /// deleted guest content.
59    OrphanClusters {
60        /// Number of allocated-but-unreferenced clusters detected.
61        count: u64,
62        /// Total allocated data clusters examined (context for the count).
63        allocated: u64,
64    },
65    /// The image uses the legacy QCOW1 (v1) format — an obsolete container
66    /// predating QCOW2's refcounts, snapshots, and feature bits.
67    LegacyQcow1,
68}
69
70impl Qcow2Anomaly {
71    /// Severity — the single source of truth for this kind.
72    #[must_use]
73    pub fn severity(&self) -> Severity {
74        match self {
75            Qcow2Anomaly::Corrupt => Severity::High,
76            Qcow2Anomaly::BackingFile { .. }
77            | Qcow2Anomaly::Encrypted { .. }
78            | Qcow2Anomaly::OrphanClusters { .. }
79            | Qcow2Anomaly::ExternalDataFile => Severity::Medium,
80            Qcow2Anomaly::InternalSnapshots { .. }
81            | Qcow2Anomaly::Snapshot { .. }
82            | Qcow2Anomaly::LegacyQcow1
83            | Qcow2Anomaly::Dirty => Severity::Low,
84        }
85    }
86
87    /// Stable machine-readable code.
88    #[must_use]
89    pub fn code(&self) -> &'static str {
90        match self {
91            Qcow2Anomaly::BackingFile { .. } => "QCOW2-BACKING-FILE",
92            Qcow2Anomaly::Encrypted { .. } => "QCOW2-ENCRYPTED",
93            Qcow2Anomaly::InternalSnapshots { .. } => "QCOW2-INTERNAL-SNAPSHOTS",
94            Qcow2Anomaly::Snapshot { .. } => "QCOW2-SNAPSHOT",
95            Qcow2Anomaly::Dirty => "QCOW2-DIRTY",
96            Qcow2Anomaly::Corrupt => "QCOW2-CORRUPT",
97            Qcow2Anomaly::ExternalDataFile => "QCOW2-EXTERNAL-DATA",
98            Qcow2Anomaly::OrphanClusters { .. } => "QCOW2-ORPHAN-CLUSTERS",
99            Qcow2Anomaly::LegacyQcow1 => "QCOW2-QCOW1",
100        }
101    }
102
103    /// Human-readable, "consistent with" note.
104    #[must_use]
105    pub fn note(&self) -> String {
106        match self {
107            Qcow2Anomaly::BackingFile { name, format } => match (name, format) {
108                (Some(n), Some(f)) => format!("image references backing file `{n}` (format {f}) — it is an overlay and does not alone contain the full guest data"),
109                (Some(n), None) => format!("image references backing file `{n}` — it is an overlay and does not alone contain the full guest data"),
110                (None, _) => "image references a backing file — it is an overlay and does not alone contain the full guest data".to_string(),
111            },
112            Qcow2Anomaly::Encrypted { method } => format!("image is encrypted (method {method}) — contents are inaccessible without the key"),
113            Qcow2Anomaly::InternalSnapshots { count } => format!("image carries {count} internal snapshot(s) — additional captured guest states to examine"),
114            Qcow2Anomaly::Snapshot { name, date_unix_secs } => format!("internal snapshot `{name}` captured at unix time {date_unix_secs} — a recoverable point-in-time guest state to examine"),
115            Qcow2Anomaly::Dirty => "the dirty bit is set — consistent with the image not having been closed cleanly (in use or crashed)".to_string(),
116            Qcow2Anomaly::Corrupt => "the corrupt bit is set — QEMU flagged the image as corrupt".to_string(),
117            Qcow2Anomaly::ExternalDataFile => "image stores guest data in an external data file — the data is not self-contained".to_string(),
118            Qcow2Anomaly::OrphanClusters { count, allocated } => format!("{count} of {allocated} allocated cluster(s) are reachable through L1/L2 yet have a host refcount of 0 — consistent with orphaned or deleted guest data left in the image"),
119            Qcow2Anomaly::LegacyQcow1 => "image uses the legacy QCOW1 (version 1) format — an obsolete container predating QCOW2's refcounts, snapshots, and feature bits".to_string(),
120        }
121    }
122}
123
124impl forensicnomicon::report::Observation for Qcow2Anomaly {
125    fn severity(&self) -> Option<Severity> {
126        Some(self.severity())
127    }
128    fn code(&self) -> &'static str {
129        self.code()
130    }
131    fn note(&self) -> String {
132        self.note()
133    }
134    fn evidence(&self) -> Vec<forensicnomicon::report::Evidence> {
135        use forensicnomicon::report::{Evidence, Location};
136        // A single named snapshot's evidence lives in the snapshot table and
137        // carries both the name and its capture timestamp; every other anomaly
138        // is a single header field.
139        let header = |field: &str, value: String| Evidence {
140            field: field.to_string(),
141            value,
142            location: Some(Location::Field("QCOW2 header".to_string())),
143        };
144        match self {
145            Qcow2Anomaly::Snapshot { name, date_unix_secs } => {
146                let loc = || Some(Location::Field("QCOW2 snapshot table".to_string()));
147                vec![
148                    Evidence {
149                        field: "snapshot_name".to_string(),
150                        value: name.clone(),
151                        location: loc(),
152                    },
153                    Evidence {
154                        field: "date_unix_secs".to_string(),
155                        value: date_unix_secs.to_string(),
156                        location: loc(),
157                    },
158                ]
159            }
160            Qcow2Anomaly::BackingFile { name, format } => {
161                let mut ev = vec![header(
162                    "backing_file",
163                    name.clone().unwrap_or_else(|| "present".to_string()),
164                )];
165                if let Some(f) = format {
166                    ev.push(header("backing_format", f.clone()));
167                }
168                ev
169            }
170            Qcow2Anomaly::Encrypted { method } => vec![header("crypt_method", method.to_string())],
171            Qcow2Anomaly::InternalSnapshots { count } => vec![header("nb_snapshots", count.to_string())],
172            Qcow2Anomaly::Dirty => vec![header("incompatible_features", "dirty".to_string())],
173            Qcow2Anomaly::Corrupt => vec![header("incompatible_features", "corrupt".to_string())],
174            Qcow2Anomaly::ExternalDataFile => {
175                vec![header("incompatible_features", "external-data".to_string())]
176            }
177            Qcow2Anomaly::OrphanClusters { count, allocated } => {
178                let loc = || Some(Location::Field("QCOW2 refcount table".to_string()));
179                vec![
180                    Evidence {
181                        field: "orphan_clusters".to_string(),
182                        value: count.to_string(),
183                        location: loc(),
184                    },
185                    Evidence {
186                        field: "allocated_clusters".to_string(),
187                        value: allocated.to_string(),
188                        location: loc(),
189                    },
190                ]
191            }
192            Qcow2Anomaly::LegacyQcow1 => vec![header("version", "1".to_string())],
193        }
194    }
195}
196
197/// Audit parsed QCOW2 header facts for forensic anomalies.
198#[must_use]
199pub fn audit(info: &Qcow2Info) -> Vec<Qcow2Anomaly> {
200    let mut out = Vec::new();
201    if info.version == 1 {
202        out.push(Qcow2Anomaly::LegacyQcow1);
203    }
204    if info.has_backing_file {
205        out.push(Qcow2Anomaly::BackingFile {
206            name: info.backing_file.clone(),
207            format: info.backing_format.clone(),
208        });
209    }
210    if info.encryption_method != 0 {
211        out.push(Qcow2Anomaly::Encrypted {
212            method: info.encryption_method,
213        });
214    }
215    if info.snapshot_count > 0 {
216        out.push(Qcow2Anomaly::InternalSnapshots {
217            count: info.snapshot_count,
218        });
219    }
220    if info.incompatible_features & FEAT_DIRTY != 0 {
221        out.push(Qcow2Anomaly::Dirty);
222    }
223    if info.incompatible_features & FEAT_CORRUPT != 0 {
224        out.push(Qcow2Anomaly::Corrupt);
225    }
226    if info.incompatible_features & FEAT_EXTERNAL_DATA != 0 {
227        out.push(Qcow2Anomaly::ExternalDataFile);
228    }
229    out
230}
231
232/// Audit an enumerated snapshot list, emitting one `QCOW2-SNAPSHOT` finding per
233/// snapshot with its name and creation timestamp.
234#[must_use]
235pub fn audit_snapshots(snapshots: &[Qcow2Snapshot]) -> Vec<Qcow2Anomaly> {
236    snapshots
237        .iter()
238        .map(|s| Qcow2Anomaly::Snapshot {
239            name: s.name.clone(),
240            date_unix_secs: s.date_unix_secs,
241        })
242        .collect()
243}
244
245/// Audit a refcount/orphan report, emitting a single `QCOW2-ORPHAN-CLUSTERS`
246/// finding when clusters reachable through L1/L2 have a host refcount of 0.
247#[must_use]
248pub fn audit_orphans(report: &Qcow2RefcountReport) -> Option<Qcow2Anomaly> {
249    if report.orphan_clusters == 0 {
250        return None;
251    }
252    Some(Qcow2Anomaly::OrphanClusters {
253        count: report.orphan_clusters,
254        allocated: report.allocated_clusters,
255    })
256}
257
258/// Inspect and audit a QCOW2 image at `path` in one step. Surfaces the header-
259/// level anomalies, one per-snapshot finding, and an orphan-cluster finding when
260/// allocated-but-unreferenced clusters are present. Malformed input surfaces as
261/// an error rather than silent emptiness.
262pub fn audit_path(path: &Path) -> Result<Vec<Qcow2Anomaly>, Qcow2Error> {
263    let mut out = audit(&qcow2::inspect(path)?);
264    out.extend(audit_snapshots(&qcow2::snapshots(path)?));
265    out.extend(audit_orphans(&qcow2::refcount_report(path)?));
266    Ok(out)
267}
268
269#[cfg(test)]
270mod tests {
271    use super::*;
272    use forensicnomicon::report::{Observation, Source};
273    use std::io::Write;
274
275    fn info() -> Qcow2Info {
276        Qcow2Info {
277            version: 3,
278            cluster_bits: 16,
279            virtual_disk_size: 1 << 20,
280            l1_size: 1,
281            has_backing_file: false,
282            encryption_method: 0,
283            snapshot_count: 0,
284            incompatible_features: 0,
285            backing_file: None,
286            backing_format: None,
287        }
288    }
289
290    fn all_anomalies() -> Vec<Qcow2Anomaly> {
291        let mut i = info();
292        i.has_backing_file = true;
293        i.encryption_method = 1;
294        i.snapshot_count = 4;
295        i.incompatible_features = FEAT_DIRTY | FEAT_CORRUPT | FEAT_EXTERNAL_DATA;
296        audit(&i)
297    }
298
299    #[test]
300    fn clean_image_has_no_anomalies() {
301        assert!(audit(&info()).is_empty());
302    }
303
304    #[test]
305    fn backing_file_finding_names_the_referenced_image() {
306        let mut i = info();
307        i.has_backing_file = true;
308        i.backing_file = Some("base.qcow2".to_string());
309        i.backing_format = Some("qcow2".to_string());
310        let out = audit(&i);
311        let bf = out
312            .iter()
313            .find(|a| a.code() == "QCOW2-BACKING-FILE")
314            .expect("backing-file finding");
315        assert!(bf.note().contains("base.qcow2"), "note: {}", bf.note());
316        let mut joined = String::new();
317        for e in &bf.evidence() {
318            joined.push_str(&e.field);
319            joined.push('=');
320            joined.push_str(&e.value);
321            joined.push(';');
322        }
323        assert!(joined.contains("base.qcow2"), "evidence name: {joined}");
324        assert!(joined.contains("qcow2"), "evidence format: {joined}");
325    }
326
327    #[test]
328    fn backing_file_without_name_still_emits_a_generic_finding() {
329        let mut i = info();
330        i.has_backing_file = true; // present but name not captured
331        let out = audit(&i);
332        assert!(out.iter().any(|a| a.code() == "QCOW2-BACKING-FILE"));
333    }
334
335    #[test]
336    fn backing_file_name_without_format_is_noted() {
337        let mut i = info();
338        i.has_backing_file = true;
339        i.backing_file = Some("base.qcow2".to_string());
340        i.backing_format = None; // name known, format not recorded
341        let out = audit(&i);
342        let bf = out.iter().find(|a| a.code() == "QCOW2-BACKING-FILE").unwrap();
343        assert!(bf.note().contains("base.qcow2"));
344        assert!(!bf.note().contains("format "), "no format clause when absent: {}", bf.note());
345    }
346
347    #[test]
348    fn every_anomaly_kind_is_emitted_and_round_trips_to_a_finding() {
349        let anomalies = all_anomalies();
350        let codes: Vec<&str> = anomalies.iter().map(Observation::code).collect();
351        for expected in [
352            "QCOW2-BACKING-FILE",
353            "QCOW2-ENCRYPTED",
354            "QCOW2-INTERNAL-SNAPSHOTS",
355            "QCOW2-DIRTY",
356            "QCOW2-CORRUPT",
357            "QCOW2-EXTERNAL-DATA",
358        ] {
359            assert!(codes.contains(&expected), "missing {expected}");
360        }
361        let src = Source {
362            analyzer: "qcow2-forensic".to_string(),
363            scope: "image".to_string(),
364            version: None,
365        };
366        for a in &anomalies {
367            let f = a.to_finding(src.clone());
368            assert!(f.code.starts_with("QCOW2-"));
369            assert!(f.severity.is_some());
370            assert!(!f.note.is_empty());
371            assert!(!f.evidence.is_empty());
372        }
373    }
374
375    fn report(allocated: u64, orphans: u64) -> Qcow2RefcountReport {
376        Qcow2RefcountReport {
377            refcount_order: 4,
378            refcount_table_offset: 65_536,
379            refcount_table_clusters: 1,
380            allocated_clusters: allocated,
381            orphan_clusters: orphans,
382        }
383    }
384
385    #[test]
386    fn no_orphans_yields_no_finding() {
387        assert!(audit_orphans(&report(100, 0)).is_none());
388    }
389
390    #[test]
391    fn qcow1_version_is_flagged() {
392        let mut i = info();
393        i.version = 1;
394        let out = audit(&i);
395        let a = out
396            .iter()
397            .find(|a| a.code() == "QCOW2-QCOW1")
398            .expect("v1 finding");
399        assert_eq!(a.severity(), Severity::Low);
400        assert!(a.note().to_lowercase().contains("version 1") || a.note().contains("QCOW1"));
401        assert!(!a.evidence().is_empty());
402    }
403
404    #[test]
405    fn qcow2_and_qcow3_are_not_flagged_as_v1() {
406        for v in [2u32, 3] {
407            let mut i = info();
408            i.version = v;
409            assert!(audit(&i).iter().all(|a| a.code() != "QCOW2-QCOW1"));
410        }
411    }
412
413    #[test]
414    fn orphans_yield_a_medium_finding_with_the_count() {
415        let a = audit_orphans(&report(100, 7)).expect("orphan finding");
416        assert_eq!(a.code(), "QCOW2-ORPHAN-CLUSTERS");
417        assert_eq!(a.severity(), Severity::Medium);
418        assert!(a.note().contains('7'), "note must carry the count: {}", a.note());
419        let mut joined = String::new();
420        for e in &a.evidence() {
421            joined.push_str(&e.field);
422            joined.push('=');
423            joined.push_str(&e.value);
424            joined.push(';');
425        }
426        assert!(joined.contains('7'), "evidence must carry the count: {joined}");
427    }
428
429    #[test]
430    fn orphan_anomaly_round_trips_to_a_finding() {
431        let src = Source {
432            analyzer: "qcow2-forensic".to_string(),
433            scope: "image".to_string(),
434            version: None,
435        };
436        let a = audit_orphans(&report(10, 3)).unwrap();
437        let f = a.to_finding(src);
438        assert_eq!(f.code, "QCOW2-ORPHAN-CLUSTERS");
439        assert_eq!(f.severity, Some(Severity::Medium));
440        assert!(!f.evidence.is_empty());
441    }
442
443    #[test]
444    fn severities_are_graded() {
445        let mut i = info();
446        i.incompatible_features = FEAT_CORRUPT | FEAT_DIRTY;
447        let a = audit(&i);
448        let corrupt = a.iter().find(|x| x.code() == "QCOW2-CORRUPT").unwrap();
449        let dirty = a.iter().find(|x| x.code() == "QCOW2-DIRTY").unwrap();
450        assert_eq!(corrupt.severity(), Severity::High);
451        assert_eq!(dirty.severity(), Severity::Low);
452    }
453
454    #[test]
455    fn audit_path_inspects_a_real_header_then_audits() {
456        let mut h = vec![0u8; 72];
457        h[0..4].copy_from_slice(&0x5146_49fb_u32.to_be_bytes());
458        h[4..8].copy_from_slice(&2u32.to_be_bytes());
459        h[8..16].copy_from_slice(&512u64.to_be_bytes()); // backing_file_offset
460        h[20..24].copy_from_slice(&16u32.to_be_bytes()); // cluster_bits
461        let mut f = tempfile::NamedTempFile::new().unwrap();
462        f.write_all(&h).unwrap();
463        let anomalies = audit_path(f.path()).unwrap();
464        assert!(anomalies
465            .iter()
466            .any(|a| matches!(a, Qcow2Anomaly::BackingFile { .. })));
467    }
468
469    #[test]
470    fn audit_path_propagates_errors_on_non_qcow2() {
471        let mut f = tempfile::NamedTempFile::new().unwrap();
472        f.write_all(b"definitely not a qcow2 image header").unwrap();
473        assert!(audit_path(f.path()).is_err());
474    }
475
476    fn snap(name: &str, secs: u32) -> Qcow2Snapshot {
477        Qcow2Snapshot {
478            id: "1".to_string(),
479            name: name.to_string(),
480            date_unix_secs: secs,
481            date_nsecs: 0,
482            vm_state_size: 0,
483        }
484    }
485
486    #[test]
487    fn audit_snapshots_emits_one_finding_per_snapshot() {
488        let snaps = vec![snap("alpha", 1_700_000_000), snap("beta", 1_700_000_050)];
489        let out = audit_snapshots(&snaps);
490        assert_eq!(out.len(), 2);
491        assert!(out.iter().all(|a| a.code() == "QCOW2-SNAPSHOT"));
492    }
493
494    #[test]
495    fn audit_snapshots_empty_yields_nothing() {
496        assert!(audit_snapshots(&[]).is_empty());
497    }
498
499    #[test]
500    fn snapshot_finding_carries_name_and_timestamp_in_evidence() {
501        let out = audit_snapshots(&[snap("alpha", 1_700_000_000)]);
502        let a = &out[0];
503        assert_eq!(a.code(), "QCOW2-SNAPSHOT");
504        assert!(a.severity() == Severity::Low || a.severity() == Severity::Info);
505        assert!(a.note().contains("alpha"), "note must name the snapshot: {}", a.note());
506        let ev = a.evidence();
507        let mut joined = String::new();
508        for e in &ev {
509            joined.push_str(&e.field);
510            joined.push('=');
511            joined.push_str(&e.value);
512            joined.push(';');
513        }
514        assert!(joined.contains("alpha"), "evidence must carry the name: {joined}");
515        assert!(joined.contains("1700000000"), "evidence must carry the timestamp: {joined}");
516    }
517
518    #[test]
519    fn snapshot_anomaly_round_trips_to_a_finding() {
520        let src = Source {
521            analyzer: "qcow2-forensic".to_string(),
522            scope: "image".to_string(),
523            version: None,
524        };
525        let out = audit_snapshots(&[snap("alpha", 1_700_000_000)]);
526        let f = out[0].to_finding(src);
527        assert_eq!(f.code, "QCOW2-SNAPSHOT");
528        assert!(f.severity.is_some());
529        assert!(!f.note.is_empty());
530        assert!(!f.evidence.is_empty());
531    }
532
533}