vhdx-forensic 0.2.0

Forensic integrity analyzer for VHDX (Hyper-V) virtual disks — tamper/anomaly findings and in-memory repair, built on vhdx-core
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
//! In-memory VHDX builder for tests.
//!
//! Constructs the minimal valid VHDX byte structure per MS-VHDX spec,
//! including correct CRC32C checksums in headers and region tables.

use std::collections::HashMap;

// Castagnoli CRC32C (same as in header.rs — duplicated here to avoid
// depending on crate-internal items from tests).
fn crc32c(data: &[u8]) -> u32 {
    const POLY: u32 = 0x82F6_3B78;
    let mut crc: u32 = 0xFFFF_FFFF;
    for &byte in data {
        crc ^= u32::from(byte);
        for _ in 0..8 {
            if crc & 1 != 0 {
                crc = (crc >> 1) ^ POLY;
            } else {
                crc >>= 1;
            }
        }
    }
    crc ^ 0xFFFF_FFFF
}

fn write_crc32c(block: &mut [u8], crc_offset: usize) {
    block[crc_offset..crc_offset + 4].fill(0);
    let crc = crc32c(block);
    block[crc_offset..crc_offset + 4].copy_from_slice(&crc.to_le_bytes());
}

pub struct VhdxBuilder {
    virtual_disk_size: u64,
    block_size: u32,
    logical_sector_size: u32,
    sector_data: HashMap<u64, Vec<u8>>,
    sparse: bool,
    // Adversarial overrides — applied after normal build so CRCs are correct
    // for CRC-unprotected regions (metadata, BAT) or re-CRCed (region table).
    meta_block_size_override: Option<u32>,
    meta_sector_size_override: Option<u32>,
    meta_vdisk_size_override: Option<u64>,
    meta_has_parent: bool,
    region_bat_offset_override: Option<u64>,
    bat_patches: Vec<(usize, u64)>,
    trailing_bytes: usize,
}

impl VhdxBuilder {
    pub fn new(virtual_disk_size: u64) -> Self {
        Self {
            virtual_disk_size,
            block_size: 32 * 1024 * 1024, // 32 MB default
            logical_sector_size: 512,
            sector_data: HashMap::new(),
            sparse: false,
            meta_block_size_override: None,
            meta_sector_size_override: None,
            meta_vdisk_size_override: None,
            meta_has_parent: false,
            region_bat_offset_override: None,
            bat_patches: Vec::new(),
            trailing_bytes: 0,
        }
    }

    /// Override the BlockSize field in the metadata item area (bypasses CRC — not protected).
    pub fn with_meta_block_size(mut self, block_size: u32) -> Self {
        self.meta_block_size_override = Some(block_size);
        self
    }

    /// Override the LogicalSectorSize field in the metadata item area.
    pub fn with_meta_sector_size(mut self, sector_size: u32) -> Self {
        self.meta_sector_size_override = Some(sector_size);
        self
    }

    /// Override the VirtualDiskSize field in the metadata item area.
    pub fn with_meta_vdisk_size(mut self, vdisk_size: u64) -> Self {
        self.meta_vdisk_size_override = Some(vdisk_size);
        self
    }

    /// Override the BAT region file_offset in both region table copies (re-CRCs each).
    pub fn with_region_bat_offset(mut self, offset: u64) -> Self {
        self.region_bat_offset_override = Some(offset);
        self
    }

    /// Patch a raw BAT entry (index into the BAT array) with an arbitrary u64 value.
    pub fn with_bat_patch(mut self, entry_idx: usize, value: u64) -> Self {
        self.bat_patches.push((entry_idx, value));
        self
    }

    /// Mark all data blocks as not-present (sparse). Reads return zeros.
    pub fn build_sparse(mut self) -> Vec<u8> {
        self.sparse = true;
        self.build()
    }

    /// Add payload for a specific logical sector (0-indexed).
    pub fn with_sector_data(mut self, sector: u64, data: Vec<u8>) -> Self {
        self.sector_data.insert(sector, data);
        self
    }

    /// Set HasParent=true in the FileParameters metadata item (bit 1 of Flags).
    pub fn with_has_parent(mut self) -> Self {
        self.meta_has_parent = true;
        self
    }

    /// Append N extra non-zero bytes at the end of the image (simulates trailing slack).
    pub fn with_trailing_bytes(mut self, n: usize) -> Self {
        self.trailing_bytes = n;
        self
    }

    /// Build the VHDX byte image.
    pub fn build(self) -> Vec<u8> {
        // Fixed layout for test images (MS-VHDX spec §2.1):
        //   0x000000 - 0x00FFFF : File Identifier (64 KB slot)
        //   0x010000 - 0x01FFFF : Header 1 (4 KB content + 60 KB padding)
        //   0x020000 - 0x02FFFF : Header 2 (4 KB content + 60 KB padding)
        //   0x030000 - 0x03FFFF : Region Table 1 (64 KB)
        //   0x040000 - 0x04FFFF : Region Table 2 (64 KB)
        //   0x050000 - 0x0FFFFF : Reserved padding to 1 MB boundary
        //   0x300000 - 0x31FFFF : Metadata region (64 KB table + 64 KB items)
        //   0x400000 - ...      : BAT region (1 MB-aligned after metadata)
        //   <bat-addressed>      : Data blocks

        let metadata_offset: u64 = 0x0030_0000; // 3 MB (1MB-aligned)
        let metadata_len: u32 = 0x0002_0000; // 128 KB

        // Compute BAT size.
        let block_size = u64::from(self.block_size);
        let data_block_count = self.virtual_disk_size.div_ceil(block_size);
        let chunk_ratio = (1u64 << 23) * u64::from(self.logical_sector_size) / block_size;
        let total_bat_entries =
            data_block_count + (data_block_count + chunk_ratio - 1) / chunk_ratio;
        let bat_len = (total_bat_entries * 8).next_multiple_of(0x0010_0000) as u32;
        // BAT must be at a 1MB-aligned offset (BAT entries encode offsets in MB units).
        let bat_offset: u64 =
            (metadata_offset + u64::from(metadata_len)).next_multiple_of(0x0010_0000);
        // Data blocks must also start at a 1MB-aligned offset.
        let data_start: u64 = (bat_offset + u64::from(bat_len)).next_multiple_of(0x0010_0000);

        // Allocate file buffer.
        // Each data block is block_size bytes at data_start + index * block_size.
        let file_size = if self.sparse || self.sector_data.is_empty() {
            data_start
        } else {
            data_start + data_block_count * block_size
        };
        let file_size = file_size.next_multiple_of(0x0010_0000) as usize; // align to 1MB
        let mut buf = vec![0u8; file_size];

        // File Identifier at offset 0.
        buf[0..8].copy_from_slice(b"vhdxfile");
        // Creator string (UTF-16LE "vhdx-forensic-test\0" padded to 512 bytes).
        let creator = "vhdx-forensic-test";
        let mut creator_utf16: Vec<u8> = creator
            .encode_utf16()
            .flat_map(|c| c.to_le_bytes())
            .collect();
        creator_utf16.extend_from_slice(&[0, 0]); // null terminator
        let copy_len = creator_utf16.len().min(504);
        buf[8..8 + copy_len].copy_from_slice(&creator_utf16[..copy_len]);

        // Header 1 at 0x10000.
        Self::write_header(&mut buf, 0x0001_0000, 1);
        // Header 2 at 0x20000 (sequence 0 — header 1 wins).
        Self::write_header(&mut buf, 0x0002_0000, 0);

        // Region Table 1 at 0x30000.
        Self::write_region_table(
            &mut buf,
            0x0003_0000,
            bat_offset,
            bat_len,
            metadata_offset,
            metadata_len,
        );
        // Region Table 2 at 0x40000 (identical copy).
        Self::write_region_table(
            &mut buf,
            0x0004_0000,
            bat_offset,
            bat_len,
            metadata_offset,
            metadata_len,
        );

        // Metadata region.
        Self::write_metadata(
            &mut buf,
            metadata_offset as usize,
            self.block_size,
            self.virtual_disk_size,
            self.logical_sector_size,
        );

        // BAT entries and data blocks.
        // Only write FULLY_PRESENT entries when there is actual sector data —
        // otherwise leave BAT as all-zero (NOT_PRESENT), which is a valid
        // sparse state. Writing FULLY_PRESENT with no allocated file space
        // would produce BatEntryBeyondContainer on a "clean" image.
        if !self.sparse && !self.sector_data.is_empty() {
            for block_idx in 0..data_block_count {
                let bat_entry_idx = (block_idx + block_idx / chunk_ratio) as usize;
                // File offset for this data block in units of 1 MB.
                let block_file_offset = data_start + block_idx * block_size;
                let offset_mb = block_file_offset / 0x0010_0000;
                // State = PAYLOAD_BLOCK_FULLY_PRESENT (6), bits 0-2.
                let bat_entry: u64 = (offset_mb << 20) | 6;
                let bat_pos = bat_offset as usize + bat_entry_idx * 8;
                if bat_pos + 8 <= buf.len() {
                    buf[bat_pos..bat_pos + 8].copy_from_slice(&bat_entry.to_le_bytes());
                }

                // Write any sector payloads that fall in this block.
                let sectors_per_block = block_size / u64::from(self.logical_sector_size);
                let first_sector = block_idx * sectors_per_block;
                for sector_off in 0..sectors_per_block {
                    let sector = first_sector + sector_off;
                    if let Some(payload) = self.sector_data.get(&sector) {
                        let sector_file_offset =
                            block_file_offset + sector_off * u64::from(self.logical_sector_size);
                        let dst = sector_file_offset as usize;
                        let copy_len = payload.len().min(self.logical_sector_size as usize);
                        if dst + copy_len <= buf.len() {
                            buf[dst..dst + copy_len].copy_from_slice(&payload[..copy_len]);
                        }
                    }
                }
            }
        }

        // Apply adversarial overrides.
        // Items live at metadata_offset + 0x10000 (items area, NOT CRC-protected).
        let meta_items_base = metadata_offset as usize + 0x10000;
        if let Some(bs) = self.meta_block_size_override {
            buf[meta_items_base..meta_items_base + 4].copy_from_slice(&bs.to_le_bytes());
        }
        if let Some(vds) = self.meta_vdisk_size_override {
            buf[meta_items_base + 8..meta_items_base + 16].copy_from_slice(&vds.to_le_bytes());
        }
        if let Some(ss) = self.meta_sector_size_override {
            buf[meta_items_base + 16..meta_items_base + 20].copy_from_slice(&ss.to_le_bytes());
        }
        // Region table: BAT entry's file_offset field is at byte 32 within the region table.
        // Region tables ARE CRC32C-protected, so re-CRC both copies after patching.
        if let Some(new_bat_off) = self.region_bat_offset_override {
            for rt_off in [0x0003_0000usize, 0x0004_0000usize] {
                buf[rt_off + 32..rt_off + 40].copy_from_slice(&new_bat_off.to_le_bytes());
                let slice = &mut buf[rt_off..rt_off + 65536];
                write_crc32c(slice, 4);
            }
        }
        // BAT entries are NOT CRC-protected — patch directly.
        for (entry_idx, value) in &self.bat_patches {
            let bat_pos = bat_offset as usize + entry_idx * 8;
            if bat_pos + 8 <= buf.len() {
                buf[bat_pos..bat_pos + 8].copy_from_slice(&value.to_le_bytes());
            }
        }

        // HasParent flag — bit 1 of the FileParameters Flags u32 at meta_items_base+4.
        if self.meta_has_parent {
            let flags_off = meta_items_base + 4;
            let mut flags = u32::from_le_bytes(buf[flags_off..flags_off + 4].try_into().unwrap());
            flags |= 2; // HasParent bit
            buf[flags_off..flags_off + 4].copy_from_slice(&flags.to_le_bytes());
        }

        // Trailing bytes (non-zero to be detectable).
        if self.trailing_bytes > 0 {
            buf.resize(buf.len() + self.trailing_bytes, 0xCC);
        }

        buf
    }

    fn write_header(buf: &mut [u8], offset: usize, seq: u64) {
        let slice = &mut buf[offset..offset + 4096];
        slice[0..4].copy_from_slice(b"head");
        slice[8..16].copy_from_slice(&seq.to_le_bytes()); // SequenceNumber
        // FileWriteGuid [16..32]: non-zero so FileWriteGuidAllZeros does not fire on clean images.
        slice[16..32].copy_from_slice(&[
            0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
            0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10,
        ]);
        // DataWriteGuid [32..48]: non-zero.
        slice[32..48].copy_from_slice(&[
            0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
            0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20,
        ]);
        // LogGuid [48..64]: all zeros (no dirty log, LogGuid=0 is correct clean state).
        slice[64..66].copy_from_slice(&1u16.to_le_bytes()); // LogVersion = 1 (spec-required)
        slice[66..68].copy_from_slice(&1u16.to_le_bytes()); // Version = 1
        slice[68..72].copy_from_slice(&0u32.to_le_bytes()); // LogLength
        slice[72..80].copy_from_slice(&0u64.to_le_bytes()); // LogOffset
        write_crc32c(slice, 4);
    }

    fn write_region_table(
        buf: &mut [u8],
        offset: usize,
        bat_offset: u64,
        bat_len: u32,
        metadata_offset: u64,
        metadata_len: u32,
    ) {
        let slice = &mut buf[offset..offset + 65536];
        slice[0..4].copy_from_slice(b"regi");
        // Checksum at [4..8] — written last.
        slice[8..12].copy_from_slice(&2u32.to_le_bytes()); // EntryCount = 2
        slice[12..16].fill(0); // Reserved

        // Entry 0: BAT  (GUID: 2DC27766-F623-4200-9D64-115E9BFD4A08)
        let bat_guid: [u8; 16] = [
            0x66, 0x77, 0xC2, 0x2D, 0x23, 0xF6, 0x00, 0x42, 0x9D, 0x64, 0x11, 0x5E, 0x9B, 0xFD,
            0x4A, 0x08,
        ];
        slice[16..32].copy_from_slice(&bat_guid);
        slice[32..40].copy_from_slice(&bat_offset.to_le_bytes());
        slice[40..44].copy_from_slice(&bat_len.to_le_bytes());
        slice[44..48].copy_from_slice(&1u32.to_le_bytes()); // Required

        // Entry 1: Metadata (GUID: 8B7CA206-4790-4B9A-B8FE-575F050F886E)
        let meta_guid: [u8; 16] = [
            0x06, 0xA2, 0x7C, 0x8B, 0x90, 0x47, 0x9A, 0x4B, 0xB8, 0xFE, 0x57, 0x5F, 0x05, 0x0F,
            0x88, 0x6E,
        ];
        slice[48..64].copy_from_slice(&meta_guid);
        slice[64..72].copy_from_slice(&metadata_offset.to_le_bytes());
        slice[72..76].copy_from_slice(&metadata_len.to_le_bytes());
        slice[76..80].copy_from_slice(&1u32.to_le_bytes()); // Required

        write_crc32c(slice, 4);
    }

    fn write_metadata(
        buf: &mut [u8],
        region_start: usize,
        block_size: u32,
        virtual_disk_size: u64,
        logical_sector_size: u32,
    ) {
        // Metadata table occupies the first 64 KB of the metadata region.
        // Metadata items live at region_start + 0x10000 + item_offset.
        let table = &mut buf[region_start..region_start + 0x10000];
        table[0..8].copy_from_slice(b"metadata");
        // Reserved: u16 at [8..10].
        table[10..12].copy_from_slice(&3u16.to_le_bytes()); // EntryCount = 3

        // Item offsets per MS-VHDX §3.3.2: from the start of the metadata region.
        // Items are placed starting at region_start + 0x10000 (after the 64 KB table area).
        let off_file_params: u32 = 0x10000;
        let off_vdisk_size: u32 = 0x10008;
        let off_sector_size: u32 = 0x10010;

        // Entry 0: FileParameters (GUID: CAA16737-FA36-4D43-B3B6-33F0AA44E76B)
        let guid_fp: [u8; 16] = [
            0x37, 0x67, 0xA1, 0xCA, 0x36, 0xFA, 0x43, 0x4D, 0xB3, 0xB6, 0x33, 0xF0, 0xAA, 0x44,
            0xE7, 0x6B,
        ];
        table[32..48].copy_from_slice(&guid_fp);
        table[48..52].copy_from_slice(&off_file_params.to_le_bytes()); // Offset
        table[52..56].copy_from_slice(&8u32.to_le_bytes()); // Length = 8
        table[56..60].copy_from_slice(&0b110u32.to_le_bytes()); // IsVirtualDisk|IsRequired

        // Entry 1: VirtualDiskSize (GUID: 2FA54224-CD1B-4876-B211-5BE07A6CE32C)
        let guid_vds: [u8; 16] = [
            0x24, 0x42, 0xA5, 0x2F, 0x1B, 0xCD, 0x76, 0x48, 0xB2, 0x11, 0x5B, 0xE0, 0x7A, 0x6C,
            0xE3, 0x2C,
        ];
        table[64..80].copy_from_slice(&guid_vds);
        table[80..84].copy_from_slice(&off_vdisk_size.to_le_bytes());
        table[84..88].copy_from_slice(&8u32.to_le_bytes()); // Length = 8
        table[88..92].copy_from_slice(&0b110u32.to_le_bytes());

        // Entry 2: LogicalSectorSize (GUID: 8141BF1D-A96F-4709-BA47-F233A8FAAB5F)
        let guid_lss: [u8; 16] = [
            0x1D, 0xBF, 0x41, 0x81, 0x6F, 0xA9, 0x09, 0x47, 0xBA, 0x47, 0xF2, 0x33, 0xA8, 0xFA,
            0xAB, 0x5F,
        ];
        table[96..112].copy_from_slice(&guid_lss);
        table[112..116].copy_from_slice(&off_sector_size.to_le_bytes());
        table[116..120].copy_from_slice(&4u32.to_le_bytes()); // Length = 4
        table[120..124].copy_from_slice(&0b110u32.to_le_bytes());

        // Write item data at region_start + 0x10000.
        let items = &mut buf[region_start + 0x10000..region_start + 0x10000 + 64];

        // FileParameters: BlockSize (u32) + Flags (u32, bit1=HasParent=0).
        items[0..4].copy_from_slice(&block_size.to_le_bytes());
        items[4..8].copy_from_slice(&0u32.to_le_bytes()); // HasParent=false

        // VirtualDiskSize: u64.
        items[8..16].copy_from_slice(&virtual_disk_size.to_le_bytes());

        // LogicalSectorSize: u32.
        items[16..20].copy_from_slice(&logical_sector_size.to_le_bytes());
    }
}