Skip to main content

gpt_forensic/
analyse.rs

1//! Orchestration: the public [`analyse`] entry point.
2//!
3//! Reads the primary GPT (LBA 1 + entry array), validates its header and array
4//! CRCs, then reads the backup GPT at the alternate LBA and reconciles the two
5//! — primary/backup divergence is a strong tampering signal. Finally checks the
6//! partition set for overlaps and out-of-bounds extents.
7
8use std::io::{Read, Seek, SeekFrom};
9
10use crate::crc32;
11use crate::entry::{parse_entry_array, GptEntry};
12use crate::findings::{Anomaly, AnomalyKind, GptAnalysis, Location};
13use crate::header::GptHeader;
14use crate::Error;
15
16/// Options controlling [`analyse_with_options`].
17#[derive(Debug, Clone, Copy, Default)]
18#[cfg_attr(feature = "serde", derive(serde::Serialize))]
19pub struct AnalyseOptions {
20    /// Force the logical sector size instead of auto-detecting it from the GPT
21    /// header location. `None` (the default) auto-detects 512 vs 4096.
22    pub sector_size: Option<u64>,
23}
24
25/// Like [`analyse`], but with explicit [`AnalyseOptions`] (e.g. to force the
26/// sector size when the header magic is corrupt).
27///
28/// # Errors
29/// Same as [`analyse`].
30pub fn analyse_with_options<R: Read + Seek>(
31    reader: &mut R,
32    disk_size_bytes: u64,
33    opts: AnalyseOptions,
34) -> Result<GptAnalysis, Error> {
35    analyse_inner(reader, disk_size_bytes, opts)
36}
37
38/// Probe the logical sector size by locating the GPT header ("EFI PART") at
39/// LBA 1 — byte 512 for 512-byte/512e sectors, byte 4096 for 4Kn. Defaults to
40/// 512 when neither matches (the primary parse then reports `BadSignature`).
41fn detect_sector_size<R: Read + Seek>(reader: &mut R) -> Result<u64, Error> {
42    for size in [512u64, 4096] {
43        reader.seek(SeekFrom::Start(size))?;
44        let mut sig = [0u8; 8];
45        if reader.read_exact(&mut sig).is_ok() && &sig == crate::header::SIGNATURE {
46            return Ok(size);
47        }
48    }
49    Ok(512)
50}
51
52/// Perform a full forensic analysis of a GPT-partitioned disk image.
53///
54/// `disk_size_bytes` bounds the backup-GPT read; pass `0` if unknown (the backup
55/// is then located solely via the primary header's `alternate_lba`).
56///
57/// # Errors
58/// [`Error::BadSignature`] if LBA 1 is not a GPT header; [`Error::Io`] on read
59/// failure of the primary structures.
60#[cfg_attr(feature = "trace", tracing::instrument(level = "debug", skip(reader)))]
61pub fn analyse<R: Read + Seek>(reader: &mut R, disk_size_bytes: u64) -> Result<GptAnalysis, Error> {
62    analyse_inner(reader, disk_size_bytes, AnalyseOptions::default())
63}
64
65fn analyse_inner<R: Read + Seek>(
66    reader: &mut R,
67    disk_size_bytes: u64,
68    opts: AnalyseOptions,
69) -> Result<GptAnalysis, Error> {
70    let mut anomalies = Vec::new();
71    let sector_size = match opts.sector_size {
72        Some(s) => s,
73        None => detect_sector_size(reader)?,
74    };
75
76    // ── Primary header + entry array ────────────────────────────────────────
77    let primary_sector = read_sector(reader, 1, sector_size)?;
78    let primary = GptHeader::parse(&primary_sector)?;
79    if !primary.header_crc_valid {
80        record(
81            &mut anomalies,
82            AnomalyKind::HeaderCrcInvalid {
83                location: Location::Primary,
84            },
85        );
86    }
87    check_header_slack(
88        &primary_sector,
89        primary.header_size,
90        Location::Primary,
91        &mut anomalies,
92    );
93    if primary.my_lba != 1 {
94        record(
95            &mut anomalies,
96            AnomalyKind::HeaderLbaMismatch {
97                location: Location::Primary,
98                claimed: primary.my_lba,
99                actual: 1,
100            },
101        );
102    }
103
104    let primary_array = read_entry_array(reader, &primary, sector_size)?;
105    if crc32::checksum(&primary_array) != primary.partition_array_crc32 {
106        record(
107            &mut anomalies,
108            AnomalyKind::PartitionArrayCrcInvalid {
109                location: Location::Primary,
110            },
111        );
112    }
113    let partitions = parse_entry_array(
114        &primary_array,
115        primary.num_partition_entries,
116        primary.partition_entry_size,
117    );
118
119    // ── Backup header + entry array, reconciled with the primary ────────────
120    let backup = read_backup(
121        reader,
122        &primary,
123        &primary_array,
124        sector_size,
125        &mut anomalies,
126    );
127
128    // The backup GPT should sit at the last LBA; anything past it is hidden.
129    if disk_size_bytes > 0 {
130        let disk_last_lba = (disk_size_bytes / sector_size).saturating_sub(1);
131        if disk_last_lba > primary.alternate_lba {
132            record(
133                &mut anomalies,
134                AnomalyKind::BackupGptNotAtDiskEnd {
135                    alternate_lba: primary.alternate_lba,
136                    disk_last_lba,
137                },
138            );
139        }
140    }
141
142    // ── Partition geometry checks ───────────────────────────────────────────
143    check_overlaps(&partitions, &mut anomalies);
144    check_bounds(
145        &partitions,
146        primary.first_usable_lba,
147        primary.last_usable_lba,
148        &mut anomalies,
149    );
150
151    for (a, b) in crate::collision::find_duplicate_partition_guids(&partitions) {
152        record(&mut anomalies, AnomalyKind::DuplicatePartitionGuid { a, b });
153    }
154    check_encrypted_volumes(reader, &partitions, sector_size, &mut anomalies);
155
156    // ── MBR ↔ GPT reconciliation (standalone — reads LBA 0 itself) ──────────
157    reconcile_mbr(
158        reader,
159        &partitions,
160        disk_size_bytes,
161        sector_size,
162        &mut anomalies,
163    );
164
165    // Tamper-evident fingerprint of the partition table (header + entry array).
166    let mut evidence = primary_sector.to_vec();
167    evidence.extend_from_slice(&primary_array);
168    let gpt_sha256 = crate::sha256::hex(&crate::sha256::digest(&evidence));
169
170    let disk_guid = primary.disk_guid;
171    Ok(GptAnalysis {
172        primary,
173        backup,
174        disk_guid,
175        partitions,
176        sector_size,
177        gpt_sha256,
178        anomalies,
179    })
180}
181
182fn record(anomalies: &mut Vec<Anomaly>, kind: AnomalyKind) {
183    anomalies.push(Anomaly::new(kind));
184}
185
186/// Flag non-zero bytes in the GPT header LBA past `header_size` (the
187/// CRC-unprotected reserved area the UEFI spec requires to be zero).
188fn check_header_slack(
189    sector: &[u8; 512],
190    header_size: u32,
191    location: Location,
192    anomalies: &mut Vec<Anomaly>,
193) {
194    let start = (header_size as usize).clamp(92, 512);
195    if sector[start..].iter().any(|&b| b != 0) {
196        record(anomalies, AnomalyKind::HeaderSlackData { location });
197    }
198}
199
200/// Read the leading 512 bytes of the sector at `lba` (enough for a GPT header;
201/// the header is 92 bytes and never spans sectors). `sector_size` sets the LBA→
202/// byte stride (512 or 4096).
203fn read_sector<R: Read + Seek>(
204    reader: &mut R,
205    lba: u64,
206    sector_size: u64,
207) -> Result<[u8; 512], Error> {
208    reader.seek(SeekFrom::Start(lba * sector_size))?;
209    let mut buf = [0u8; 512];
210    reader.read_exact(&mut buf)?;
211    Ok(buf)
212}
213
214/// Read a header's partition entry array (`num * entry_size` bytes).
215fn read_entry_array<R: Read + Seek>(
216    reader: &mut R,
217    h: &GptHeader,
218    sector_size: u64,
219) -> Result<Vec<u8>, Error> {
220    let len = h.num_partition_entries as usize * h.partition_entry_size as usize;
221    reader.seek(SeekFrom::Start(h.partition_entry_lba * sector_size))?;
222    let mut buf = vec![0u8; len];
223    reader.read_exact(&mut buf)?;
224    Ok(buf)
225}
226
227/// Read and reconcile the backup GPT. A read/parse failure yields
228/// [`AnomalyKind::BackupGptUnreadable`]; field divergences from the primary
229/// yield [`AnomalyKind::PrimaryBackupDivergence`].
230fn read_backup<R: Read + Seek>(
231    reader: &mut R,
232    primary: &GptHeader,
233    primary_array: &[u8],
234    sector_size: u64,
235    anomalies: &mut Vec<Anomaly>,
236) -> Option<GptHeader> {
237    let Ok(backup_sector) = read_sector(reader, primary.alternate_lba, sector_size) else {
238        record(anomalies, AnomalyKind::BackupGptUnreadable);
239        return None;
240    };
241    let Ok(backup) = GptHeader::parse(&backup_sector) else {
242        record(anomalies, AnomalyKind::BackupGptUnreadable);
243        return None;
244    };
245
246    if !backup.header_crc_valid {
247        record(
248            anomalies,
249            AnomalyKind::HeaderCrcInvalid {
250                location: Location::Backup,
251            },
252        );
253    }
254    check_header_slack(
255        &backup_sector,
256        backup.header_size,
257        Location::Backup,
258        anomalies,
259    );
260    if backup.my_lba != primary.alternate_lba {
261        record(
262            anomalies,
263            AnomalyKind::HeaderLbaMismatch {
264                location: Location::Backup,
265                claimed: backup.my_lba,
266                actual: primary.alternate_lba,
267            },
268        );
269    }
270    if let Ok(arr) = read_entry_array(reader, &backup, sector_size) {
271        if crc32::checksum(&arr) != backup.partition_array_crc32 {
272            record(
273                anomalies,
274                AnomalyKind::PartitionArrayCrcInvalid {
275                    location: Location::Backup,
276                },
277            );
278        }
279        // Byte-compare the two entry arrays directly: this catches tampering even
280        // when the CRC *fields* were forged to match.
281        if arr != primary_array {
282            record(
283                anomalies,
284                AnomalyKind::PrimaryBackupDivergence {
285                    field: "entry array contents",
286                },
287            );
288        }
289    }
290
291    // Fields that MUST match between the two copies (my_lba/alternate_lba/
292    // partition_entry_lba are intentionally mirrored, so they are excluded).
293    let checks: &[(&'static str, bool)] = &[
294        ("revision", primary.revision == backup.revision),
295        ("header_size", primary.header_size == backup.header_size),
296        ("disk_guid", primary.disk_guid == backup.disk_guid),
297        (
298            "first_usable_lba",
299            primary.first_usable_lba == backup.first_usable_lba,
300        ),
301        (
302            "last_usable_lba",
303            primary.last_usable_lba == backup.last_usable_lba,
304        ),
305        (
306            "num_partition_entries",
307            primary.num_partition_entries == backup.num_partition_entries,
308        ),
309        (
310            "partition_entry_size",
311            primary.partition_entry_size == backup.partition_entry_size,
312        ),
313        (
314            "partition_array_crc32",
315            primary.partition_array_crc32 == backup.partition_array_crc32,
316        ),
317    ];
318    for &(field, ok) in checks {
319        if !ok {
320            record(anomalies, AnomalyKind::PrimaryBackupDivergence { field });
321        }
322    }
323
324    Some(backup)
325}
326
327/// Minimum hidden tail (sectors) before an undersized protective MBR is flagged.
328const PROTECTIVE_UNDERSIZE_TOLERANCE: u64 = 2048;
329
330/// Reconcile the legacy/protective MBR (LBA 0) against the GPT.
331///
332/// Surfaces a missing or undersized protective entry, and hybrid-MBR partitions
333/// that match no GPT partition (legacy-visible, GPT-invisible). Reads LBA 0
334/// directly — no dependency on a full MBR parser — so standalone gpt-forensic
335/// consumers get the cross-examination too.
336fn reconcile_mbr<R: Read + Seek>(
337    reader: &mut R,
338    partitions: &[GptEntry],
339    disk_size_bytes: u64,
340    sector_size: u64,
341    anomalies: &mut Vec<Anomaly>,
342) {
343    let Ok(sector) = read_sector(reader, 0, sector_size) else {
344        return; // no readable MBR to reconcile against
345    };
346    let mbr = crate::mbr::parse_mbr_entries(&sector);
347    let active: Vec<_> = mbr.iter().filter(|e| !e.is_empty()).collect();
348
349    match active.iter().find(|e| e.is_protective()) {
350        None => record(anomalies, AnomalyKind::MissingProtectiveMbr),
351        Some(p) if disk_size_bytes > 0 && p.lba_count != u32::MAX => {
352            let disk_last_lba = (disk_size_bytes / sector_size).saturating_sub(1);
353            let covered_last_lba = p.lba_end();
354            if disk_last_lba.saturating_sub(covered_last_lba) > PROTECTIVE_UNDERSIZE_TOLERANCE {
355                record(
356                    anomalies,
357                    AnomalyKind::ProtectiveMbrUndersized {
358                        covered_last_lba,
359                        disk_last_lba,
360                    },
361                );
362            }
363        }
364        Some(_) => {}
365    }
366
367    // Hybrid entries (non-protective) that overlap no GPT partition are hidden.
368    for e in active.iter().filter(|e| !e.is_protective()) {
369        let (start, end) = (u64::from(e.lba_start), e.lba_end());
370        let overlaps_gpt = partitions
371            .iter()
372            .any(|g| start <= g.last_lba && g.first_lba <= end);
373        if !overlaps_gpt {
374            record(
375                anomalies,
376                AnomalyKind::HybridMbrHiddenPartition {
377                    mbr_index: e.index,
378                    lba_start: e.lba_start,
379                    lba_count: e.lba_count,
380                },
381            );
382        }
383    }
384}
385
386/// Flag partitions whose first sector is near-maximal entropy with no readable
387/// filesystem — a hidden encrypted container. Partitions typed as encrypted
388/// (LUKS) are skipped, since high entropy is expected there.
389fn check_encrypted_volumes<R: Read + Seek>(
390    reader: &mut R,
391    partitions: &[GptEntry],
392    sector_size: u64,
393    anomalies: &mut Vec<Anomaly>,
394) {
395    for (index, p) in partitions.iter().enumerate() {
396        if p.type_name() == Some("Linux LUKS") {
397            continue;
398        }
399        let Ok(sector) = read_sector(reader, p.first_lba, sector_size) else {
400            continue;
401        };
402        // A recognized filesystem magic means it is not an opaque encrypted blob.
403        if forensicnomicon::filesystems::detect_name(&sector).is_some() {
404            continue;
405        }
406        let entropy = crate::entropy::shannon(&sector);
407        if entropy > crate::entropy::HIGH_ENTROPY_THRESHOLD {
408            record(
409                anomalies,
410                AnomalyKind::HiddenEncryptedVolume { index, entropy },
411            );
412        }
413    }
414}
415
416/// Flag overlapping partition extents.
417fn check_overlaps(partitions: &[GptEntry], anomalies: &mut Vec<Anomaly>) {
418    let mut idx: Vec<usize> = (0..partitions.len()).collect();
419    idx.sort_by_key(|&i| partitions[i].first_lba);
420    for pair in idx.windows(2) {
421        let (a, b) = (pair[0], pair[1]);
422        if partitions[b].first_lba <= partitions[a].last_lba {
423            record(anomalies, AnomalyKind::OverlappingPartitions { a, b });
424        }
425    }
426}
427
428/// Flag partitions extending outside the usable LBA range — past `last_usable`,
429/// or starting before `first_usable` (on the reserved GPT metadata region).
430fn check_bounds(
431    partitions: &[GptEntry],
432    first_usable: u64,
433    last_usable: u64,
434    anomalies: &mut Vec<Anomaly>,
435) {
436    for (index, p) in partitions.iter().enumerate() {
437        if p.last_lba > last_usable {
438            record(
439                anomalies,
440                AnomalyKind::PartitionOutOfBounds {
441                    index,
442                    last_lba: p.last_lba,
443                    last_usable,
444                },
445            );
446        }
447        if p.first_lba < first_usable {
448            record(
449                anomalies,
450                AnomalyKind::PartitionOverlapsGptArea {
451                    index,
452                    first_lba: p.first_lba,
453                    first_usable,
454                },
455            );
456        }
457    }
458}