lamexfat 0.1.0

no_std read-only exFAT reader for UEFI bootloaders (removable media)
Documentation
// SPDX-License-Identifier: Apache-2.0
//! Directory walk: materialize a directory's entries, then decode each entry
//! set (File `0x85` + StreamExtension `0xC0` + FileName `0xC1`) with its
//! `SetChecksum`.
//!
//! Adapted from exfat-slim @2ffd2c2 `directory_entry.rs` (Apache-2.0) — the
//! 32-byte entry types/offsets and the rotate-right SetChecksum are the on-disk
//! format; the contiguous-buffer materialization (so an entry set never straddles
//! a cluster read), the end-of-directory and cycle bounds are `lamexfat`.

use alloc::{vec, vec::Vec};

use crate::{
    block_read::{read_exact, u16_at, u32_at, u64_at, BlockRead},
    error::{Error, Result},
    fat::next_cluster,
    vbr::Geometry,
};

const ENTRY: usize = 32;
const TYPE_END: u8 = 0x00;
const TYPE_IN_USE: u8 = 0x80;
const TYPE_UPCASE: u8 = 0x82;
const TYPE_LABEL: u8 = 0x83;
const TYPE_FILE: u8 = 0x85;
const TYPE_STREAM: u8 = 0xC0;
const TYPE_FILENAME: u8 = 0xC1;

const ATTR_DIRECTORY: u16 = 0x0010;
const FLAG_NO_FAT_CHAIN: u8 = 0x02;

/// One decoded directory entry set — the exFAT "inode".
pub(crate) struct EntrySet {
    pub is_dir: bool,
    pub first_cluster: u32,
    pub data_length: u64,
    pub valid_data_length: u64,
    pub contiguous: bool,
    pub name: Vec<u16>,
}

/// The up-case table + volume label locations found while scanning the root.
#[derive(Default)]
pub(crate) struct RootMeta {
    pub upcase_cluster: Option<u32>,
    pub label: Option<Vec<u16>>,
}

/// Read a directory's raw entry bytes (all clusters of its chain, up to the
/// end-of-directory marker), bounded against the read cap and a per-cluster cycle
/// guard. Used for both the root and subdirectories — the `0x00` terminator is
/// authoritative for both, so the (absent-for-root) `DataLength` is not needed.
fn materialize(
    reader: &mut impl BlockRead,
    geo: &Geometry,
    first_cluster: u32,
    contiguous: bool,
) -> Result<Vec<u8>> {
    let csize = geo.cluster_size() as usize;
    let mut out: Vec<u8> = Vec::new();
    let mut cluster = first_cluster;
    let mut steps = 0u32;
    loop {
        let base = geo.cluster_byte(cluster).ok_or(Error::Inconsistent {
            token: "cluster_oob",
        })?;
        let start = out.len();
        out.resize(start + csize, 0);
        read_exact(reader, base, &mut out[start..], "io_dir")?;
        // The end-of-directory marker (entry type 0x00) terminates the walk.
        if let Some(p) = (start..out.len())
            .step_by(ENTRY)
            .find(|&p| out[p] == TYPE_END)
        {
            out.truncate(p);
            return Ok(out);
        }
        if out.len() as u64 > crate::MAX_FILE_BYTES {
            return Err(Error::FileTooLarge {
                size: out.len() as u64,
                max: crate::MAX_FILE_BYTES,
            });
        }
        steps += 1;
        if steps >= geo.cluster_count {
            return Err(Error::Inconsistent { token: "dir_cycle" });
        }
        let next = if contiguous {
            cluster
                .checked_add(1)
                .filter(|n| n.checked_sub(2).is_some_and(|r| r < geo.cluster_count))
        } else {
            next_cluster(reader, geo, cluster)?
        };
        match next {
            Some(c) => cluster = c,
            None => return Ok(out), // chain ended without an explicit 0x00
        }
    }
}

/// SetChecksum over a `(secondary_count + 1)`-entry set: rotate-right then add
/// each byte, excluding bytes 2..4 of the first (File) entry (the checksum field).
fn set_checksum(entries: &[u8]) -> u16 {
    let mut sum: u16 = 0;
    for (i, &b) in entries.iter().enumerate() {
        if i == 2 || i == 3 {
            continue;
        }
        sum = sum.rotate_right(1).wrapping_add(u16::from(b));
    }
    sum
}

/// Decode every valid entry set in a directory. A set that fails its checksum or
/// is structurally short is skipped (a single bad name must not hide siblings).
pub(crate) fn read_entries(
    reader: &mut impl BlockRead,
    geo: &Geometry,
    first_cluster: u32,
    contiguous: bool,
) -> Result<Vec<EntrySet>> {
    let bytes = materialize(reader, geo, first_cluster, contiguous)?;
    let mut out = Vec::new();
    let mut i = 0usize;
    while i + ENTRY <= bytes.len() {
        let etype = bytes[i];
        if etype & TYPE_IN_USE == 0 {
            // not in use (deleted or padding) — skip one entry
            i += ENTRY;
            continue;
        }
        if etype != TYPE_FILE {
            i += ENTRY;
            continue;
        }
        let secondary = usize::from(bytes[i + 1]);
        let set_len = (secondary + 1) * ENTRY;
        let Some(set) = bytes.get(i..i + set_len) else {
            break; // truncated final set
        };
        i += set_len;
        if let Some(es) = decode_set(set, secondary) {
            out.push(es);
        }
    }
    Ok(out)
}

fn decode_set(set: &[u8], secondary: usize) -> Option<EntrySet> {
    // SetChecksum (stored at the File entry's bytes 2..4).
    let stored = u16_at(set, 2)?;
    if set_checksum(set) != stored {
        return None;
    }
    let attrs = u16_at(set, 4)?;
    let is_dir = attrs & ATTR_DIRECTORY != 0;

    // First secondary must be the StreamExtension.
    let stream = set.get(ENTRY..2 * ENTRY)?;
    if stream[0] != TYPE_STREAM {
        return None;
    }
    let flags = stream[1];
    let name_length = usize::from(stream[3]);
    let valid_data_length = u64_at(stream, 8)?;
    let first_cluster = u32_at(stream, 20)?;
    let data_length = u64_at(stream, 24)?;
    let contiguous = flags & FLAG_NO_FAT_CHAIN != 0;

    // The remaining secondaries are FileName entries: 15 UTF-16 units each.
    let mut name: Vec<u16> = Vec::with_capacity(name_length);
    for s in 2..=secondary {
        let entry = set.get(s * ENTRY..(s + 1) * ENTRY)?;
        if entry[0] != TYPE_FILENAME {
            break;
        }
        for k in 0..15 {
            if name.len() >= name_length {
                break;
            }
            name.push(u16_at(entry, 2 + k * 2)?);
        }
    }
    if name.len() != name_length || name_length == 0 {
        return None;
    }
    Some(EntrySet {
        is_dir,
        first_cluster,
        data_length,
        valid_data_length,
        contiguous,
        name,
    })
}

/// Scan the root directory's first cluster for the up-case table location and
/// the volume label (read once at mount, before the up-case table is available,
/// so it walks only the root's first cluster — both entries live there).
pub(crate) fn scan_root_meta(reader: &mut impl BlockRead, geo: &Geometry) -> Result<RootMeta> {
    let base = geo
        .cluster_byte(geo.root_cluster)
        .ok_or(Error::Inconsistent {
            token: "cluster_oob",
        })?;
    let mut buf = vec![0u8; geo.cluster_size() as usize];
    read_exact(reader, base, &mut buf, "io_root")?;
    let mut meta = RootMeta::default();
    let mut i = 0usize;
    while i + ENTRY <= buf.len() {
        match buf[i] {
            TYPE_END => break,
            TYPE_UPCASE => meta.upcase_cluster = u32_at(&buf, i + 20),
            TYPE_LABEL => {
                let count = usize::from(buf[i + 1]).min(11);
                let mut label = Vec::with_capacity(count);
                for k in 0..count {
                    if let Some(c) = u16_at(&buf, i + 2 + k * 2) {
                        label.push(c);
                    }
                }
                meta.label = Some(label);
            }
            _ => {}
        }
        i += ENTRY;
    }
    Ok(meta)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn set_checksum_excludes_its_own_field() {
        let mut set = [0u8; 64];
        for (i, b) in set.iter_mut().enumerate() {
            *b = i as u8;
        }
        let base = set_checksum(&set);

        // Bytes 2..4 (the checksum field itself) are excluded from the sum, so
        // overwriting them must not change the result.
        let mut with_field_changed = set;
        with_field_changed[2] = 0xFF;
        with_field_changed[3] = 0xFF;
        assert_eq!(set_checksum(&with_field_changed), base);

        // Any other byte does contribute.
        let mut with_data_changed = set;
        with_data_changed[5] ^= 0xFF;
        assert_ne!(set_checksum(&with_data_changed), base);
    }
}