Skip to main content

hfsplus_forensic/
lib.rs

1//! HFS+ / HFSX volume-header detection (Apple TN1150).
2//!
3//! Apple optical discs are frequently *hybrids*: an ISO 9660 filesystem and an
4//! HFS/HFS+ volume sharing the same disc, so a Mac and a PC each see their own
5//! filesystem.  The HFS+ volume header sits at a fixed 1024-byte offset from the
6//! volume start (TN1150 ยง"Volume Header"), with a big-endian `H+` (HFS+) or `HX`
7//! (HFSX) signature.
8//!
9//! This crate reads the volume header (geometry), walks the catalog B-tree to
10//! list directories ([`list_root`], [`list_dir`], recursive [`walk`]), and
11//! extracts file data forks ([`read_file`]).  Journal replay and resource forks
12//! are out of scope.  Validated against real `hdiutil`-created HFS+ volumes.
13
14/// Byte offset of the HFS+ volume header from the start of the volume.
15const VOLUME_HEADER_OFFSET: usize = 1024;
16/// HFS+ signature `H+` (TN1150).
17const SIG_HFS_PLUS: u16 = 0x482B;
18/// HFSX signature `HX` (case-sensitive variant).
19const SIG_HFSX: u16 = 0x4858;
20
21/// Which Apple volume signature was found.
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub enum HfsKind {
24    /// `H+` โ€” standard HFS Plus.
25    HfsPlus,
26    /// `HX` โ€” case-sensitive HFSX.
27    Hfsx,
28}
29
30/// Parsed HFS+ volume header fields (geometry only).
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub struct HfsVolume {
33    pub kind: HfsKind,
34    /// Volume format version (4 for HFS+, 5 for HFSX).
35    pub version: u16,
36    /// Number of files in the volume's catalog.
37    pub file_count: u32,
38    /// Number of folders in the volume's catalog.
39    pub folder_count: u32,
40    /// Allocation block size in bytes.
41    pub block_size: u32,
42    /// Total allocation blocks in the volume.
43    pub total_blocks: u32,
44    /// Free allocation blocks.
45    pub free_blocks: u32,
46}
47
48impl HfsVolume {
49    /// Total volume size in bytes (`block_size * total_blocks`).
50    #[must_use]
51    pub fn volume_size(&self) -> u64 {
52        u64::from(self.block_size) * u64::from(self.total_blocks)
53    }
54}
55
56/// Parse the HFS+/HFSX volume header from a buffer that begins at the volume
57/// start (the header is read at offset 1024).  Returns `None` if the buffer is
58/// too short or carries no HFS+ signature.
59#[must_use]
60pub fn parse(volume: &[u8]) -> Option<HfsVolume> {
61    let h = VOLUME_HEADER_OFFSET;
62    if volume.len() < h + 52 {
63        return None;
64    }
65    let hdr = &volume[h..];
66    let kind = match be16(&hdr[0..2]) {
67        SIG_HFS_PLUS => HfsKind::HfsPlus,
68        SIG_HFSX => HfsKind::Hfsx,
69        _ => return None,
70    };
71    Some(HfsVolume {
72        kind,
73        version: be16(&hdr[2..4]),
74        file_count: be32(&hdr[32..36]),
75        folder_count: be32(&hdr[36..40]),
76        block_size: be32(&hdr[40..44]),
77        total_blocks: be32(&hdr[44..48]),
78        free_blocks: be32(&hdr[48..52]),
79    })
80}
81
82/// Catalog node ID of the root folder (TN1150).
83const ROOT_FOLDER_CNID: u32 = 2;
84/// Catalog record types (TN1150): folder / file leaf records.
85const RECORD_FOLDER: i16 = 1;
86const RECORD_FILE: i16 = 2;
87/// Bound on catalog leaf nodes walked, guarding against a corrupt `fLink` chain.
88const MAX_LEAF_NODES: u32 = 65536;
89
90/// An entry in an HFS+ directory.
91#[derive(Debug, Clone, PartialEq, Eq)]
92pub struct HfsEntry {
93    /// File or folder name (decoded from UTF-16).
94    pub name: String,
95    /// True for a folder, false for a file.
96    pub is_dir: bool,
97    /// Catalog node ID (CNID) of this entry.
98    pub cnid: u32,
99}
100
101/// Located catalog B-tree geometry within an HFS+ volume.
102struct CatalogLoc {
103    cat_base: usize,
104    node_size: usize,
105    first_leaf: u32,
106    block_size: usize,
107}
108
109/// Locate the catalog B-tree from the volume header (its first extent).
110fn locate_catalog(volume: &[u8]) -> Option<CatalogLoc> {
111    let h = VOLUME_HEADER_OFFSET;
112    if volume.len() < h + 352 {
113        return None;
114    }
115    match be16(&volume[h..h + 2]) {
116        SIG_HFS_PLUS | SIG_HFSX => {}
117        _ => return None,
118    }
119    let block_size = be32(&volume[h + 40..h + 44]) as usize;
120    if block_size == 0 {
121        return None;
122    }
123    // catalogFile fork is at header offset 272; its first extent's start at +16.
124    let cat_fork = h + 272;
125    let start_block = be32(&volume[cat_fork + 16..cat_fork + 20]) as usize;
126    let cat_base = start_block.checked_mul(block_size)?;
127    // B-tree header record follows the 14-byte node descriptor of node 0.
128    let hdr = cat_base.checked_add(14)?;
129    if volume.len() < hdr + 20 {
130        return None;
131    }
132    let first_leaf = be32(&volume[hdr + 10..hdr + 14]);
133    let node_size = be16(&volume[hdr + 18..hdr + 20]) as usize;
134    if node_size < 14 {
135        return None;
136    }
137    Some(CatalogLoc {
138        cat_base,
139        node_size,
140        first_leaf,
141        block_size,
142    })
143}
144
145/// Walk the catalog leaf-node chain, invoking `f` with each record slice.
146fn for_each_record(volume: &[u8], loc: &CatalogLoc, mut f: impl FnMut(&[u8])) {
147    let mut node = loc.first_leaf;
148    let mut walked = 0u32;
149    while node != 0 && walked < MAX_LEAF_NODES {
150        walked += 1;
151        let Some(node_off) = (node as usize)
152            .checked_mul(loc.node_size)
153            .and_then(|x| x.checked_add(loc.cat_base))
154        else {
155            break;
156        };
157        if volume.len() < node_off + loc.node_size {
158            break;
159        }
160        let nd = &volume[node_off..node_off + loc.node_size];
161        let f_link = be32(&nd[0..4]);
162        let num_records = be16(&nd[10..12]) as usize;
163        for i in 0..num_records {
164            // Record offsets are stored backwards from the node end.
165            let Some(slot) = loc.node_size.checked_sub(2 * (i + 1)) else {
166                break;
167            };
168            let rec = be16(&nd[slot..slot + 2]) as usize;
169            if rec + 8 <= loc.node_size {
170                f(&nd[rec..]);
171            }
172        }
173        node = f_link;
174    }
175}
176
177/// List the root directory of an HFS+ volume.  See [`list_dir`].
178#[must_use]
179pub fn list_root(volume: &[u8]) -> Option<Vec<HfsEntry>> {
180    list_dir(volume, ROOT_FOLDER_CNID)
181}
182
183/// List the immediate children of the folder `parent_cnid` by walking the HFS+
184/// catalog B-tree.
185///
186/// `volume` must contain the whole HFS+ volume from its first byte (header at
187/// offset 1024).  Entries include HFS+ private metadata directories (real, not
188/// hidden); thread records are skipped.  Returns `None` if this is not an HFS+
189/// volume or the catalog cannot be located.  Assumes the catalog fits in its
190/// first extent (true for typical optical/hybrid volumes).
191#[must_use]
192pub fn list_dir(volume: &[u8], parent_cnid: u32) -> Option<Vec<HfsEntry>> {
193    let loc = locate_catalog(volume)?;
194    let mut entries = Vec::new();
195    for_each_record(volume, &loc, |rec| {
196        if let Some((parent, entry)) = record_entry(rec) {
197            if parent == parent_cnid {
198                entries.push(entry);
199            }
200        }
201    });
202    Some(entries)
203}
204
205/// Read a file's data-fork contents by catalog node ID.
206///
207/// Returns the file's bytes (concatenated from its data-fork extents, truncated
208/// to the logical size), or `None` if `cnid` is not a file in this volume.
209#[must_use]
210pub fn read_file(volume: &[u8], cnid: u32) -> Option<Vec<u8>> {
211    let loc = locate_catalog(volume)?;
212    let mut found: Option<(u64, Vec<(u32, u32)>)> = None;
213    for_each_record(volume, &loc, |rec| {
214        if found.is_none() {
215            found = file_data_fork(rec, cnid);
216        }
217    });
218    let (logical, extents) = found?;
219    let logical = logical as usize;
220    let mut data = Vec::with_capacity(logical.min(1 << 20));
221    for (start, count) in extents {
222        if data.len() >= logical {
223            break;
224        }
225        let begin = (start as usize).checked_mul(loc.block_size)?;
226        let len = (count as usize).checked_mul(loc.block_size)?;
227        let end = begin.checked_add(len)?.min(volume.len());
228        if begin >= volume.len() {
229            break;
230        }
231        data.extend_from_slice(&volume[begin..end]);
232    }
233    data.truncate(logical);
234    Some(data)
235}
236
237/// Parse a catalog record into `(parentID, entry)` for file/folder records.
238fn record_entry(rec: &[u8]) -> Option<(u32, HfsEntry)> {
239    if rec.len() < 8 {
240        return None;
241    }
242    let key_len = be16(&rec[0..2]) as usize;
243    let parent_id = be32(&rec[2..6]);
244    let name_len = be16(&rec[6..8]) as usize;
245    let name_end = 8 + name_len * 2;
246    if name_end > rec.len() {
247        return None;
248    }
249    let name = decode_utf16(&rec[8..name_end]);
250    let data = 2 + key_len;
251    if data + 12 > rec.len() {
252        return None;
253    }
254    let is_dir = match i16::from_be_bytes([rec[data], rec[data + 1]]) {
255        RECORD_FOLDER => true,
256        RECORD_FILE => false,
257        _ => return None, // thread records and anything else
258    };
259    // folderID / fileID at offset 8 of the folder/file record.
260    let cnid = be32(&rec[data + 8..data + 12]);
261    Some((parent_id, HfsEntry { name, is_dir, cnid }))
262}
263
264/// If `rec` is the file record for `cnid`, return its data fork as
265/// `(logical_size, extents)`.
266fn file_data_fork(rec: &[u8], cnid: u32) -> Option<(u64, Vec<(u32, u32)>)> {
267    if rec.len() < 8 {
268        return None;
269    }
270    let key_len = be16(&rec[0..2]) as usize;
271    let data = 2 + key_len;
272    // File record + data fork (HFSPlusForkData at +88, 80 bytes).
273    if data + 168 > rec.len() {
274        return None;
275    }
276    if i16::from_be_bytes([rec[data], rec[data + 1]]) != RECORD_FILE {
277        return None;
278    }
279    if be32(&rec[data + 8..data + 12]) != cnid {
280        return None;
281    }
282    let fork = data + 88;
283    let logical = u64::from_be_bytes(rec[fork..fork + 8].try_into().ok()?);
284    let mut extents = Vec::new();
285    for i in 0..8 {
286        let e = fork + 16 + i * 8;
287        let start = be32(&rec[e..e + 4]);
288        let count = be32(&rec[e + 4..e + 8]);
289        if count != 0 {
290            extents.push((start, count));
291        }
292    }
293    Some((logical, extents))
294}
295
296/// Decode a big-endian UTF-16 byte slice to a `String` (lossy).
297fn decode_utf16(bytes: &[u8]) -> String {
298    let units: Vec<u16> = bytes
299        .chunks_exact(2)
300        .map(|c| u16::from_be_bytes([c[0], c[1]]))
301        .collect();
302    String::from_utf16_lossy(&units)
303}
304
305fn be16(b: &[u8]) -> u16 {
306    u16::from_be_bytes([b[0], b[1]])
307}
308fn be32(b: &[u8]) -> u32 {
309    u32::from_be_bytes([b[0], b[1], b[2], b[3]])
310}
311
312/// A path-qualified entry produced by [`walk`].
313#[derive(Debug, Clone, PartialEq, Eq)]
314pub struct HfsPathEntry {
315    /// `/`-joined path from the volume root (e.g. `"SUB/NESTED.TXT"`).
316    pub path: String,
317    /// True for a folder.
318    pub is_dir: bool,
319    /// Catalog node ID (CNID).
320    pub cnid: u32,
321}
322
323/// Recursively list every file and folder in an HFS+ volume, depth-first from
324/// the root, returning `/`-joined paths.
325///
326/// Returns `None` if this is not an HFS+ volume.  A visited-CNID set guards
327/// against cycles in a corrupt catalog.
328#[must_use]
329pub fn walk(volume: &[u8]) -> Option<Vec<HfsPathEntry>> {
330    // Confirm this is an HFS+ volume up front so a non-HFS buffer yields None.
331    list_dir(volume, ROOT_FOLDER_CNID)?;
332    let mut out = Vec::new();
333    let mut visited = std::collections::HashSet::new();
334    visited.insert(ROOT_FOLDER_CNID);
335    let mut stack = vec![(ROOT_FOLDER_CNID, String::new())];
336    while let Some((parent, prefix)) = stack.pop() {
337        let Some(entries) = list_dir(volume, parent) else {
338            continue;
339        };
340        for e in entries {
341            let path = if prefix.is_empty() {
342                e.name.clone()
343            } else {
344                format!("{prefix}/{}", e.name)
345            };
346            if e.is_dir && visited.insert(e.cnid) {
347                stack.push((e.cnid, path.clone()));
348            }
349            out.push(HfsPathEntry {
350                path,
351                is_dir: e.is_dir,
352                cnid: e.cnid,
353            });
354        }
355    }
356    Some(out)
357}