lamexfat 0.1.0

no_std read-only exFAT reader for UEFI bootloaders (removable media)
Documentation
// SPDX-License-Identifier: MIT OR Apache-2.0
//! UTF-16 file-name handling: case-insensitive compare against a UTF-8 query,
//! and lossy decode for directory listings.

use alloc::string::String;

use crate::upcase::Upcase;

/// Case-insensitive equality between a UTF-8 query component and an on-disk
/// UTF-16 name, both up-cased through the volume's table. A non-UTF-8 query
/// never matches.
pub(crate) fn name_eq(query: &[u8], ondisk: &[u16], up: &Upcase) -> bool {
    let Ok(query) = core::str::from_utf8(query) else {
        return false;
    };
    let mut on = ondisk.iter().map(|&c| up.up(c));
    let mut buf = [0u16; 2];
    for ch in query.chars() {
        for &unit in ch.encode_utf16(&mut buf).iter() {
            match on.next() {
                Some(o) if o == up.up(unit) => {}
                _ => return false,
            }
        }
    }
    on.next().is_none()
}

/// Decode an on-disk UTF-16 name to an owned `String` (lossy on a malformed
/// surrogate — a listing must never fail on one bad name).
pub(crate) fn decode_lossy(ondisk: &[u16]) -> String {
    char::decode_utf16(ondisk.iter().copied())
        .map(|r| r.unwrap_or(char::REPLACEMENT_CHARACTER))
        .collect()
}

#[cfg(test)]
mod tests {
    extern crate alloc;
    use alloc::vec::Vec;

    use super::*;

    fn utf16(s: &str) -> Vec<u16> {
        s.encode_utf16().collect()
    }

    #[test]
    fn ascii_compare_is_case_insensitive() {
        let up = Upcase::ascii();
        let on = utf16("README.TXT");
        assert!(name_eq(b"readme.txt", &on, &up));
        assert!(name_eq(b"ReAdMe.TxT", &on, &up));
        assert!(name_eq(b"README.TXT", &on, &up));
        // Length must match exactly: neither a prefix nor an extension matches.
        assert!(!name_eq(b"readme.tx", &on, &up));
        assert!(!name_eq(b"readme.txtx", &on, &up));
        assert!(!name_eq(b"other", &on, &up));
    }

    #[test]
    fn non_ascii_matches_exact_case() {
        // The 128-entry ASCII table maps >= 128 to identity, so a non-ASCII name
        // still matches case-sensitively (exact-case lookup always works).
        let up = Upcase::ascii();
        let on = utf16("café");
        assert!(name_eq("café".as_bytes(), &on, &up));
    }

    #[test]
    fn invalid_utf8_query_never_matches() {
        let up = Upcase::ascii();
        let on = utf16("x");
        assert!(!name_eq(&[0xFF, 0xFE], &on, &up));
    }

    #[test]
    fn decode_replaces_lone_surrogate() {
        assert_eq!(decode_lossy(&utf16("grub.cfg")), "grub.cfg");
        assert_eq!(decode_lossy(&[0xD800]), "\u{FFFD}");
    }
}