constr 1.0.2

Constant string generics.
Documentation
//! Constant string generics.
//!
//! See [`Constr`] trait and [`constr!`] macro for more info.
#![no_std]

/// Constant string "type".
///
/// To get around the lack of constant string generics, we use types that implement this trait to
/// provide generic strings instead.
///
/// To easily construct zero-sized types which implement this trait, see [`constr!`].
pub trait Constr {
    /// String value.
    const STR: &'static str;
}

/// Assertion that [`Constr`] is ASCII.
///
/// # Safety
///
/// This assumption can be relied upon in unsafe code.
pub unsafe trait Ascii: Constr {}

/// Assertion that [`Constr`] is caseless, i.e., none of its characters have the `Cased` Unicode
/// property.
///
/// # Safety
///
/// This assumption can be relied upon in unsafe code.
pub unsafe trait Caseless: Constr {}

/// Assertion that [`Constr`] is nonempty.
///
/// # Safety
///
/// This assumption can be relied upon in unsafe code.
pub unsafe trait Nonempty: Constr {}

/// [`Constr`] empty string, so you don't have to do it yourself.
///
/// By default, [`constr!`] types will implement [`Nonempty`].
#[allow(missing_debug_implementations)]
pub struct Empty;
impl Constr for Empty {
    const STR: &'static str = "";
}

// SAFETY: All characters in the empty string are caseless.
unsafe impl Caseless for Empty {}

// SAFETY: All characters in the empty string are ASCII.
unsafe impl Ascii for Empty {}

/// UTF-8 decoding functions modified from libcore versions to work on stable Rust.
mod utf8 {
    /// Taken from libcore.
    const CONT_MASK: u8 = 0b0011_1111;

    /// Taken from libcore.
    const fn utf8_first_byte(byte: u8, width: u32) -> u32 {
        (byte & (0x7F >> width)) as u32
    }

    /// Taken from libcore.
    const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
        (ch << 6) | (byte & CONT_MASK) as u32
    }

    /// Taken from libcore.
    pub(super) const unsafe fn next_code_point(bytes: &mut &[u8]) -> Option<char> {
        // Decode UTF-8
        let Some((x, xs)) = bytes.split_first() else {
            return None;
        };
        *bytes = xs;
        let x = *x;
        if x < 128 {
            return Some(x as char);
        }

        // Multibyte case follows
        // Decode from a byte combination out of: [[[x y] z] w]
        // NOTE: Performance is sensitive to the exact formulation here
        let init = utf8_first_byte(x, 2);
        // SAFETY: `bytes` produces an UTF-8-like string,
        // so the iterator must produce a value here.
        let (y, ys) = unsafe { bytes.split_first().unwrap_unchecked() };
        *bytes = ys;
        let y = *y;
        let mut ch = utf8_acc_cont_byte(init, y);
        if x >= 0xE0 {
            // [[x y z] w] case
            // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
            // SAFETY: `bytes` produces an UTF-8-like string,
            // so the iterator must produce a value here.
            let (z, zs) = unsafe { bytes.split_first().unwrap_unchecked() };
            *bytes = zs;
            let z = *z;
            let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
            ch = init << 12 | y_z;
            if x >= 0xF0 {
                // [x y z w] case
                // use only the lower 3 bits of `init`
                // SAFETY: `bytes` produces an UTF-8-like string,
                // so the iterator must produce a value here.
                let (w, ws) = unsafe { bytes.split_first().unwrap_unchecked() };
                *bytes = ws;
                let w = *w;
                ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
            }
        }

        // SAFETY: Came from valid UTF-8, so, we get a valid character.
        Some(unsafe { char::from_u32_unchecked(ch) })
    }
}

/// Title-case matching.
///
/// While libcore technically has the title-case tables, they're not exposed publicly.
/// Since they're small, just include one here.
const fn is_titlecase(c: char) -> bool {
    // General_Category=Titlecase_Letter in DerivedGeneralCategory.txt
    matches!(c,
        | '\u{01C5}'
        | '\u{01C8}'
        | '\u{01CB}'
        | '\u{01F2}'
        | '\u{1F88}'..='\u{1F8F}'
        | '\u{1F98}'..='\u{1F9F}'
        | '\u{1FA8}'..='\u{1FAF}'
        | '\u{1FBC}'
        | '\u{1FCC}'
        | '\u{1FFC}'
    )
}

/// Constant-time verification for [`constr`] macro.
#[doc(hidden)]
#[allow(non_snake_case)]
pub mod verify {
    /// String is ASCII.
    #[doc(hidden)]
    pub const fn Ascii(s: &'static str) -> bool {
        s.is_ascii()
    }

    /// String is caseless.
    #[doc(hidden)]
    pub const fn Caseless(s: &'static str) -> bool {
        let mut bytes = s.as_bytes();
        loop {
            // SAFETY: We always get a valid string in the end, so, we can keep messing with it
            // like this.
            match unsafe { crate::utf8::next_code_point(&mut bytes) } {
                Some(c) if c.is_lowercase() || c.is_uppercase() || crate::is_titlecase(c) => {
                    return false;
                }
                Some(_) => (),
                None => return true,
            }
        }
    }
}

/// Creates an <code>impl [Constr]</code> type for a string.
///
/// Allows implementing marker traits as well: in order to avoid using a proc macro, these have to
/// be explicitly specified. Validity is ensured at compile time to avoid accidental unsoundness.
///
/// This macro will only accept nonempty strings. For the empty string, use [`Empty`] instead.
///
/// # Examples
///
/// ```
/// use constr::constr
///
/// constr! {
///     /// Caseless string with non-ASCII characters.
///     pub type Caseless = (Caseless) "中 ?";
///
///     /// Caseless ASCII string.
///     pub(crate) type All = (Ascii, Caseless) "???";
///
///     /// Cased string.
///     type Cased = "AAAあ";
/// }
/// ```
#[macro_export]
macro_rules! constr {
    (
        $(
            $(#[$attr:meta])*
            $vis:vis type $name:ident = $(($($flag:ident),* $(,)?))? $lit:literal;
        )*
    ) => {
        $(
            $(#[$attr])*
            #[allow(missing_debug_implementations)]
            $vis struct $name;
            impl $crate::Constr for $name {
                const STR: &'static str = $lit;
            }

            // SAFETY: Verified by constant assertion.
            unsafe impl $crate::Nonempty for $name {}
            const _: () = {
                assert!(
                    !$lit.is_empty(),
                    concat!(stringify!($lit), " was empty"),
                );
            };

            $(
                $(
                    // SAFETY: Verified by constant assertion.
                    unsafe impl $crate::$flag for $name {}
                    const _: () = {
                        assert!(
                            $crate::verify::$flag($lit),
                            concat!(stringify!($lit), " did not satisfy ", stringify!($flag)),
                        );
                    };
                )*
            )*
        )*
    };
}