Skip to main content

winreg_core/
hive.rs

1//! `Hive` — the entry point for reading a Windows Registry hive.
2
3use std::io::{Cursor, Read, Seek};
4
5use winreg_format::header::BaseBlock;
6use winreg_format::version::RegfVersion;
7
8use crate::error::{HiveError, Result};
9
10use binrw::BinRead;
11
12/// `ReadSeek` trait alias for convenience.
13pub trait ReadSeek: Read + Seek {}
14impl<T: Read + Seek> ReadSeek for T {}
15
16/// Descriptor for a cataloged hive bin.
17#[derive(Debug, Clone)]
18pub struct HbinDescriptor {
19    /// Offset of this hbin from start of hive bins data.
20    pub offset: u32,
21    /// Size of this hbin in bytes.
22    pub size: u32,
23    /// File offset where this hbin starts.
24    pub file_offset: u64,
25}
26
27/// A parsed Windows Registry hive file.
28///
29/// Generic over `R: ReadSeek` to support mmap, in-memory buffers, and overlays.
30pub struct Hive<R: ReadSeek> {
31    // Used by CellReader (Task 8) and future I/O methods.
32    #[allow(dead_code)]
33    pub(crate) reader: R,
34    pub(crate) header: BaseBlock,
35    pub(crate) version: RegfVersion,
36    pub(crate) bins: Vec<HbinDescriptor>,
37    /// Raw header bytes (first 4096) — kept for checksum validation and transaction log replay.
38    #[allow(dead_code)]
39    pub(crate) header_bytes: Vec<u8>,
40}
41
42impl Hive<Cursor<Vec<u8>>> {
43    /// Open a hive from an in-memory byte buffer.
44    pub fn from_bytes(data: Vec<u8>) -> Result<Self> {
45        if data.len() < BaseBlock::SIZE {
46            return Err(HiveError::TruncatedHive {
47                expected: BaseBlock::SIZE as u64,
48                actual: data.len() as u64,
49            });
50        }
51
52        // Parse base block.
53        let mut cursor = Cursor::new(data.clone());
54        let header = BaseBlock::read(&mut cursor).map_err(|_| HiveError::InvalidSignature)?;
55
56        // Validate checksum.
57        if !BaseBlock::validate_checksum(&data) {
58            let computed = BaseBlock::compute_checksum(&data);
59            let expected = u32::from_le_bytes([data[0x1FC], data[0x1FD], data[0x1FE], data[0x1FF]]);
60            return Err(HiveError::ChecksumMismatch { expected, computed });
61        }
62
63        // Determine version.
64        let version =
65            RegfVersion::from_minor(header.minor_version).ok_or(HiveError::UnsupportedVersion {
66                major: header.major_version,
67                minor: header.minor_version,
68            })?;
69
70        // Catalog hive bins.
71        let header_bytes = data[..BaseBlock::SIZE].to_vec();
72        let bins_data_start = BaseBlock::SIZE as u64;
73        let bins_data_size = u64::from(header.hive_bins_data_size);
74        let bins = catalog_hbins(&data, bins_data_start, bins_data_size)?;
75
76        let reader = Cursor::new(data);
77        Ok(Self {
78            reader,
79            header,
80            version,
81            bins,
82            header_bytes,
83        })
84    }
85
86    /// Open a hive from a file path.
87    pub fn from_path(path: &std::path::Path) -> Result<Self> {
88        let data = std::fs::read(path)?;
89        Self::from_bytes(data)
90    }
91}
92
93impl<R: ReadSeek> Hive<R> {
94    /// The REGF format version of this hive.
95    pub fn version(&self) -> RegfVersion {
96        self.version
97    }
98
99    /// Whether the hive was cleanly synchronized (primary == secondary sequence).
100    pub fn is_clean(&self) -> bool {
101        self.header.is_clean()
102    }
103
104    /// Root cell offset (relative to hive bins data start).
105    pub fn root_cell_offset(&self) -> winreg_format::cells::CellOffset {
106        winreg_format::cells::CellOffset(self.header.root_cell_offset)
107    }
108
109    /// Total hive bins data size in bytes.
110    pub fn hive_bins_data_size(&self) -> u32 {
111        self.header.hive_bins_data_size
112    }
113
114    /// Number of hive bins.
115    pub fn bin_count(&self) -> usize {
116        self.bins.len()
117    }
118
119    /// Internal file name from the header.
120    pub fn file_name(&self) -> String {
121        self.header.file_name_string()
122    }
123
124    /// The hbin descriptors.
125    pub fn bins(&self) -> &[HbinDescriptor] {
126        &self.bins
127    }
128}
129
130impl Hive<Cursor<Vec<u8>>> {
131    /// Raw hive bytes, including the base block and every hbin.
132    ///
133    /// Exposed for carving/recovery, which must scan **unallocated** cells and
134    /// cell slack that the typed cell reader deliberately refuses to return.
135    /// Callers get read-only bytes and resolve cell offsets via
136    /// [`HbinDescriptor::file_offset`].
137    pub fn raw_bytes(&self) -> &[u8] {
138        self.reader.get_ref()
139    }
140}
141
142/// Walk the hive bins data and build a catalog of all hbins.
143fn catalog_hbins(data: &[u8], start: u64, expected_size: u64) -> Result<Vec<HbinDescriptor>> {
144    let mut bins = Vec::new();
145    let mut pos = start;
146    let end = start + expected_size;
147
148    while pos < end {
149        let file_offset = pos;
150        let pos_usize = usize::try_from(pos).unwrap_or(usize::MAX);
151
152        if pos_usize.saturating_add(32) > data.len() {
153            break; // Truncated — stop cataloging
154        }
155
156        // Check hbin signature.
157        let sig = &data[pos_usize..pos_usize + 4];
158        if sig != b"hbin" {
159            return Err(HiveError::InvalidHbin { file_offset });
160        }
161
162        let offset = crate::bytes::le_u32(data, pos_usize + 4);
163        let size = crate::bytes::le_u32(data, pos_usize + 8);
164
165        if size == 0 || size % 4096 != 0 {
166            break; // Invalid size — stop
167        }
168
169        bins.push(HbinDescriptor {
170            offset,
171            size,
172            file_offset,
173        });
174
175        pos += u64::from(size);
176    }
177
178    Ok(bins)
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184    use winreg_format::header::BaseBlock;
185
186    /// Build a minimal valid hive with one hbin containing a root NK cell.
187    fn build_minimal_hive() -> Vec<u8> {
188        let hbin_size: u32 = 4096;
189        let total_size = BaseBlock::SIZE + hbin_size as usize;
190        let mut buf = vec![0u8; total_size];
191
192        // Base block header
193        buf[0..4].copy_from_slice(b"regf");
194        buf[0x04..0x08].copy_from_slice(&1u32.to_le_bytes()); // primary seq
195        buf[0x08..0x0C].copy_from_slice(&1u32.to_le_bytes()); // secondary seq
196        buf[0x14..0x18].copy_from_slice(&1u32.to_le_bytes()); // major version
197        buf[0x18..0x1C].copy_from_slice(&5u32.to_le_bytes()); // minor version = 1.5
198        buf[0x20..0x24].copy_from_slice(&1u32.to_le_bytes()); // format = 1
199        buf[0x24..0x28].copy_from_slice(&32u32.to_le_bytes()); // root cell offset = 32
200        buf[0x28..0x2C].copy_from_slice(&hbin_size.to_le_bytes()); // hive bins data size
201        buf[0x2C..0x30].copy_from_slice(&1u32.to_le_bytes()); // clustering factor
202
203        // Compute checksum
204        let checksum = BaseBlock::compute_checksum(&buf);
205        buf[0x1FC..0x200].copy_from_slice(&checksum.to_le_bytes());
206
207        // Hbin header at offset 4096
208        let hbin_start = BaseBlock::SIZE;
209        buf[hbin_start..hbin_start + 4].copy_from_slice(b"hbin");
210        buf[hbin_start + 4..hbin_start + 8].copy_from_slice(&0u32.to_le_bytes()); // offset = 0
211        buf[hbin_start + 8..hbin_start + 12].copy_from_slice(&hbin_size.to_le_bytes()); // size
212
213        // Root NK cell at hbin offset 32 (= file offset 4096 + 32 = 4128)
214        let cell_start = hbin_start + 32;
215        let cell_size: i32 = -128; // allocated, 128 bytes
216        buf[cell_start..cell_start + 4].copy_from_slice(&cell_size.to_le_bytes());
217        buf[cell_start + 4..cell_start + 6].copy_from_slice(b"nk");
218        // flags: HIVE_ENTRY | COMP_NAME = 0x0024
219        buf[cell_start + 6..cell_start + 8].copy_from_slice(&0x0024u16.to_le_bytes());
220
221        // Fill remaining hbin space with a free cell
222        let free_start = cell_start + 128;
223        let free_size = (hbin_size as usize) - 32 - 128;
224        buf[free_start..free_start + 4].copy_from_slice(&(free_size as i32).to_le_bytes());
225
226        buf
227    }
228
229    #[test]
230    fn open_minimal_hive() {
231        let data = build_minimal_hive();
232        let hive = Hive::from_bytes(data).expect("should open minimal hive");
233        assert_eq!(hive.version(), RegfVersion::V1_5);
234        assert!(hive.is_clean());
235        assert_eq!(hive.bin_count(), 1);
236        assert_eq!(hive.hive_bins_data_size(), 4096);
237    }
238
239    #[test]
240    fn rejects_truncated_file() {
241        let data = vec![0u8; 100];
242        assert!(matches!(
243            Hive::from_bytes(data),
244            Err(HiveError::TruncatedHive { .. })
245        ));
246    }
247
248    #[test]
249    fn rejects_bad_signature() {
250        let mut data = build_minimal_hive();
251        data[0..4].copy_from_slice(b"nope");
252        assert!(matches!(
253            Hive::from_bytes(data),
254            Err(HiveError::InvalidSignature)
255        ));
256    }
257
258    #[test]
259    fn rejects_bad_checksum() {
260        let mut data = build_minimal_hive();
261        data[0x14] = 0xFF;
262        assert!(matches!(
263            Hive::from_bytes(data),
264            Err(HiveError::ChecksumMismatch { .. })
265        ));
266    }
267
268    #[test]
269    fn catalogs_hbin_descriptors() {
270        let hive = Hive::from_bytes(build_minimal_hive()).unwrap();
271        let bins = hive.bins();
272        assert_eq!(bins.len(), 1);
273        assert_eq!(bins[0].offset, 0);
274        assert_eq!(bins[0].size, 4096);
275        assert_eq!(bins[0].file_offset, 4096);
276    }
277}