lunar-lib 0.9.0

Common utilities for lunar applications
Documentation
use std::{
    ffi::{OsStr, OsString},
    path::PathBuf,
    sync::LazyLock,
};

pub(crate) static NULL_PATH: &str = "NUL";
pub(crate) const ILLEGAL_CHARACTERS: [(u8, char); 9] = [
    (b'<', '_'),
    (b'>', '_'),
    (b':', '_'),
    (b'\"', '_'),
    (b'/', '_'),
    (b'\\', '_'),
    (b'|', '_'),
    (b'?', '_'),
    (b'*', '_'),
];

pub(crate) fn sanitize_string(os_str: impl AsRef<OsStr>) -> OsString {
    let os_str = os_str.as_ref();

    let encoded_bytes = os_str.as_encoded_bytes();
    let mut buf: Vec<u8> = Vec::with_capacity(encoded_bytes.len());

    for chunk in encoded_bytes.chunks_exact(2) {
        let short = u16::from_le_bytes([chunk[0], chunk[1]]);

        if let Some(replacement) = ILLEGAL_CHARACTERS
            .iter()
            .find_map(|(illegal, replacement)| (*illegal as u16 == short).then_some(replacement))
        {
            let mut char_buf = [0u16; 2];
            let encoded = replacement.encode_utf16(&mut char_buf);

            for short in encoded {
                buf.extend(short.to_le_bytes());
            }
        } else {
            buf.extend(short.to_le_bytes());
        }
    }

    // SAFETY: 'bytes' originates from a UTF-16 encoded string, bytes are only modified from a UTF-16 encoded char
    unsafe { OsString::from_encoded_bytes_unchecked(buf) }
}

/// Returns all paths marked as explicitly protected. These paths should be treated carefully, like preventing deletion or moves
///
/// Protected paths are explicitly protected, meaning descendents of this path SHOULD NOT BE protected
/// If you would like to get all paths that have inexplict protections, see [`protected_directories()`]
///
/// # Notes
///
/// Users can specify their own protections using the environment variable `PROTECTED_PATHS`. Users can use the path 'default' to use additional default protections
/// If no environment variable is set, defaults will be used
///
/// The root directory will always be marked as protected regardless
///
/// The value will be computed once and stored internally in a [`LazyLock`], any other call will return the cached result
pub fn protected_paths() -> &'static [PathBuf] {
    &PROTECTED_PATHS
}
static PROTECTED_PATHS: LazyLock<Box<[PathBuf]>> = LazyLock::new(|| Box::new([]));

/// Returns all paths marked as inexplicitly protected. These paths should be treated carefully, like preventing deletion or moves
///
/// Protected paths are inexplicitly protected, meaning descendents of this path SHOULD BE protected
/// If you would like to get all paths that have inexplict protections, see [`protected_directories()`]
///
/// # Notes
///
/// Users can specify their own protections using the environment variable `PROTECTED_DIRS`
///
/// The value will be computed once and stored internally in a [`LazyLock`], any other call will return the cached result
pub fn protected_directories() -> &'static [PathBuf] {
    &PROTECTED_DIRECTORIES
}
static PROTECTED_DIRECTORIES: LazyLock<Box<[PathBuf]>> = LazyLock::new(|| Box::new([]));

#[cfg(test)]
mod tests {
    #[test]
    fn windows_sanitize_str_replaces_chars() {
        use std::ffi::OsString;

        let os_str = OsString::from("<>:\"/\\|?*");
        let sanitized = super::sanitize_string(os_str);

        assert_eq!(sanitized, OsString::from("<>:\"/\\|?*"))
    }

    #[test]
    fn windows_sanitize_path_keeps_structure() {
        use crate::paths::sys::sanitize_path;
        use std::path::PathBuf;

        let path = PathBuf::from("test/invalid<char/test");
        let sanitized = sanitize_path(path);

        assert_eq!(sanitized, PathBuf::from("test/invalid_char/test"))
    }
}