lamexfat 0.1.0

no_std read-only exFAT reader for UEFI bootloaders (removable media)
Documentation
// SPDX-License-Identifier: Apache-2.0
//! The up-case table: exFAT is case-insensitive, case-preserving, so name
//! lookup up-cases both sides.
//!
//! Adapted from exfat-slim @2ffd2c2 `upcase_table.rs` (Apache-2.0): like it, we
//! keep only the first 128 entries (the ASCII range) — the standard table stores
//! those explicitly and uncompressed, and boot-media file names are effectively
//! ASCII. Characters >= 128 map to themselves, so a non-ASCII name still matches
//! case-sensitively (exact-case lookup always works).

use crate::{
    block_read::{read_exact, BlockRead},
    error::{Error, Result},
    vbr::Geometry,
};

pub(crate) struct Upcase {
    mapping: [u16; 128],
}

impl Upcase {
    /// The standard ASCII up-casing (`a..=z` -> `A..=Z`, identity elsewhere) —
    /// the default until the on-disk table is loaded, and the correct mapping for
    /// the first 128 entries of every conformant volume.
    pub(crate) fn ascii() -> Self {
        let mut mapping = [0u16; 128];
        for (i, slot) in mapping.iter_mut().enumerate() {
            let c = i as u16;
            // a..=z (0x61..=0x7a) up-case to A..=Z by clearing bit 5.
            *slot = if (0x61..=0x7a).contains(&c) {
                c - 0x20
            } else {
                c
            };
        }
        Self { mapping }
    }

    /// Load the first 128 entries from the on-disk up-case table at
    /// `first_cluster`. The standard table stores the ASCII range uncompressed at
    /// the start, so a raw read of the first 256 bytes is the mapping.
    pub(crate) fn load<R: BlockRead>(
        &mut self,
        reader: &mut R,
        geo: &Geometry,
        first_cluster: u32,
    ) -> Result<()> {
        let base = geo
            .cluster_byte(first_cluster)
            .ok_or(Error::BadUpcaseTable)?;
        let mut b = [0u8; 256];
        read_exact(reader, base, &mut b, "io_upcase")?;
        for (i, slot) in self.mapping.iter_mut().enumerate() {
            *slot = u16::from_le_bytes([b[i * 2], b[i * 2 + 1]]);
        }
        Ok(())
    }

    /// Up-case one UTF-16 code unit.
    pub(crate) fn up(&self, c: u16) -> u16 {
        match self.mapping.get(usize::from(c)) {
            Some(&m) => m,
            None => c,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn ascii_table_upcases_lowercase_only() {
        let up = Upcase::ascii();
        assert_eq!(up.up(u16::from(b'a')), u16::from(b'A'));
        assert_eq!(up.up(u16::from(b'z')), u16::from(b'Z'));
        assert_eq!(up.up(u16::from(b'A')), u16::from(b'A')); // already upper
        assert_eq!(up.up(u16::from(b'5')), u16::from(b'5')); // digit unchanged
        assert_eq!(up.up(u16::from(b'_')), u16::from(b'_')); // punctuation unchanged
    }

    #[test]
    fn codepoints_outside_the_table_map_to_themselves() {
        let up = Upcase::ascii();
        assert_eq!(up.up(200), 200); // past the 128-entry table
        assert_eq!(up.up(0x00E9), 0x00E9); // é
        assert_eq!(up.up(0xFFFF), 0xFFFF);
    }
}