Skip to main content

winreg_format/
header.rs

1//! REGF base block (header) — first 4096 bytes of a hive file.
2
3use binrw::BinRead;
4
5/// REGF base block header (first 512 bytes of the 4096-byte header block).
6///
7/// Reference: research/regf-binary-format-specification.md Section 1.1
8#[derive(Debug, Clone, BinRead)]
9#[br(little, magic = b"regf")]
10pub struct BaseBlock {
11    /// Incremented on each write; must match secondary if hive was properly synced.
12    pub primary_sequence: u32,
13    /// Updated after successful write; mismatch = dirty hive.
14    pub secondary_sequence: u32,
15    /// FILETIME (UTC). Not updated as of Windows 8.1.
16    pub last_written: u64,
17    /// Always 1 for all known Windows versions.
18    pub major_version: u32,
19    /// 0-2 (NT 3.x), 3 (NT 4.0), 5 (XP+), 6 (Win10+ differencing).
20    pub minor_version: u32,
21    /// 0 = primary, 1 = transaction log, 2 = alternate (Win2000 SYSTEM.ALT).
22    pub file_type: u32,
23    /// Always 1 (direct memory load).
24    pub format: u32,
25    /// Offset to root key node cell, relative to hive bins data start.
26    pub root_cell_offset: u32,
27    /// Total size of all hive bins in bytes.
28    pub hive_bins_data_size: u32,
29    /// Logical sector size / 512. Typically 1 or 8.
30    pub clustering_factor: u32,
31    /// Internal hive path, UTF-16LE, 64 bytes. May contain remnant data.
32    pub file_name: [u8; 64],
33    /// Resource Manager GUID (Vista+). Null if CLFS not used.
34    pub rm_id: [u8; 16],
35    /// Log GUID. Usually same as `rm_id`.
36    pub log_id: [u8; 16],
37    /// Bit mask: 0x1 = pending txns, 0x2 = differencing hive.
38    pub flags: u32,
39    /// Transaction Manager GUID.
40    pub tm_id: [u8; 16],
41    /// "rmtm" signature validating GUID fields are present.
42    pub guid_signature: u32,
43    /// FILETIME of latest hive reorganization (Win8+).
44    pub last_reorganize_time: u64,
45    /// Reserved (332 bytes = 83 DWORDs).
46    #[br(count = 332)]
47    pub reserved1: Vec<u8>,
48    /// XOR-32 checksum of first 508 bytes (offsets 0x000-0x1FB).
49    pub checksum: u32,
50}
51
52impl BaseBlock {
53    /// Size of the base block in the file (always 4096 bytes).
54    pub const SIZE: usize = 4096;
55
56    /// Validate the XOR-32 checksum.
57    ///
58    /// Algorithm: XOR all 127 u32 LE words from offsets 0x000-0x1FB.
59    /// Special cases: result 0 becomes 1, result 0xFFFFFFFF becomes 0xFFFFFFFE.
60    pub fn validate_checksum(header_bytes: &[u8]) -> bool {
61        if header_bytes.len() < 512 {
62            return false;
63        }
64        let computed = Self::compute_checksum(header_bytes);
65        let stored = u32::from_le_bytes([
66            header_bytes[0x1FC],
67            header_bytes[0x1FD],
68            header_bytes[0x1FE],
69            header_bytes[0x1FF],
70        ]);
71        computed == stored
72    }
73
74    /// Compute the XOR-32 checksum over the first 508 bytes.
75    pub fn compute_checksum(header_bytes: &[u8]) -> u32 {
76        let mut checksum: u32 = 0;
77        for i in 0..127 {
78            let offset = i * 4;
79            let word = u32::from_le_bytes([
80                header_bytes[offset],
81                header_bytes[offset + 1],
82                header_bytes[offset + 2],
83                header_bytes[offset + 3],
84            ]);
85            checksum ^= word;
86        }
87        if checksum == 0 {
88            checksum = 1;
89        }
90        if checksum == 0xFFFF_FFFF {
91            checksum = 0xFFFF_FFFE;
92        }
93        checksum
94    }
95
96    /// Check if primary and secondary sequence numbers match (clean hive).
97    pub fn is_clean(&self) -> bool {
98        self.primary_sequence == self.secondary_sequence
99    }
100
101    /// Decode the internal file name from UTF-16LE.
102    pub fn file_name_string(&self) -> String {
103        let u16s: Vec<u16> = self
104            .file_name
105            .chunks_exact(2)
106            .map(|c| u16::from_le_bytes([c[0], c[1]]))
107            .take_while(|&c| c != 0)
108            .collect();
109        String::from_utf16_lossy(&u16s)
110    }
111}
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116    use std::io::Cursor;
117
118    /// Build a minimal valid 512-byte base block header for testing.
119    fn build_test_header() -> Vec<u8> {
120        let mut buf = vec![0u8; 4096];
121        // Signature "regf"
122        buf[0..4].copy_from_slice(b"regf");
123        // Primary sequence = 1
124        buf[0x04..0x08].copy_from_slice(&1u32.to_le_bytes());
125        // Secondary sequence = 1
126        buf[0x08..0x0C].copy_from_slice(&1u32.to_le_bytes());
127        // Major version = 1
128        buf[0x14..0x18].copy_from_slice(&1u32.to_le_bytes());
129        // Minor version = 5
130        buf[0x18..0x1C].copy_from_slice(&5u32.to_le_bytes());
131        // Format = 1
132        buf[0x20..0x24].copy_from_slice(&1u32.to_le_bytes());
133        // Root cell offset = 32 (0x20)
134        buf[0x24..0x28].copy_from_slice(&32u32.to_le_bytes());
135        // Hive bins data size = 4096
136        buf[0x28..0x2C].copy_from_slice(&4096u32.to_le_bytes());
137        // Clustering factor = 1
138        buf[0x2C..0x30].copy_from_slice(&1u32.to_le_bytes());
139        // Compute and store checksum
140        let checksum = BaseBlock::compute_checksum(&buf);
141        buf[0x1FC..0x200].copy_from_slice(&checksum.to_le_bytes());
142        buf
143    }
144
145    #[test]
146    fn parse_base_block_from_bytes() {
147        let buf = build_test_header();
148        let mut cursor = Cursor::new(&buf[..]);
149        let header = BaseBlock::read(&mut cursor).expect("should parse valid header");
150        assert_eq!(header.major_version, 1);
151        assert_eq!(header.minor_version, 5);
152        assert_eq!(header.root_cell_offset, 32);
153        assert_eq!(header.hive_bins_data_size, 4096);
154        assert!(header.is_clean());
155    }
156
157    #[test]
158    fn checksum_validates_on_clean_header() {
159        let buf = build_test_header();
160        assert!(BaseBlock::validate_checksum(&buf));
161    }
162
163    #[test]
164    fn checksum_fails_on_corrupt_header() {
165        let mut buf = build_test_header();
166        buf[0x14] = 0xFF; // corrupt major version
167        assert!(!BaseBlock::validate_checksum(&buf));
168    }
169
170    #[test]
171    fn checksum_special_case_zero_becomes_one() {
172        // Construct a header where XOR of all 127 words would be 0 before adjustment.
173        // Word 0 = b"regf" = 0x66676572.
174        // Place the same value at word 1 (offset 4) so they cancel: 0x66676572 ^ 0x66676572 = 0.
175        // All other words are zero, so total XOR = 0 → special case returns 1.
176        let mut buf = vec![0u8; 512];
177        buf[0..4].copy_from_slice(b"regf");
178        buf[4..8].copy_from_slice(b"regf");
179        let checksum = BaseBlock::compute_checksum(&buf);
180        assert_eq!(checksum, 1, "zero checksum should become 1");
181    }
182
183    #[test]
184    fn dirty_hive_detection() {
185        let mut buf = build_test_header();
186        // Make primary != secondary
187        buf[0x04..0x08].copy_from_slice(&2u32.to_le_bytes());
188        // Recompute checksum
189        let checksum = BaseBlock::compute_checksum(&buf);
190        buf[0x1FC..0x200].copy_from_slice(&checksum.to_le_bytes());
191
192        let mut cursor = Cursor::new(&buf[..]);
193        let header = BaseBlock::read(&mut cursor).unwrap();
194        assert!(!header.is_clean());
195    }
196
197    #[test]
198    fn rejects_invalid_signature() {
199        let mut buf = build_test_header();
200        buf[0..4].copy_from_slice(b"nope");
201        let mut cursor = Cursor::new(&buf[..]);
202        assert!(BaseBlock::read(&mut cursor).is_err());
203    }
204}