xocomil 0.3.0 - Docs.rs

//! HTTP-significant byte constants as a zero-cost `#[repr(u8)]` enum.
//!
//! Replaces scattered `b'\r'`, `b'\n'`, `b':'` literals with named,
//! autocomplete-friendly variants. The `#[repr(u8)]` layout guarantees
//! each variant compiles to a single immediate byte, and the symmetric
//! `PartialEq<u8>` impls allow direct comparison against raw bytes.

/// Named byte constants for characters with special meaning in HTTP parsing.
///
/// # Zero-cost guarantee
///
/// `#[repr(u8)]` ensures each variant is stored as a single byte.
/// All conversions (`as_u8`, `as_i8`, `From`) compile to identity moves
/// or no-ops — there is no runtime cost compared to bare `b'\r'` literals.
///
/// # Usage
///
/// ```
/// use xocomil::ascii::HttpChar;
///
/// let b: u8 = b'\r';
///
/// // Compare directly against u8
/// assert!(b == HttpChar::CarriageReturn);
///
/// // Use in SIMD intrinsics
/// let _i8_val: i8 = HttpChar::CarriageReturn.as_i8();
/// ```
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
#[repr(u8)]
pub enum HttpChar {
    // ---- Control / forbidden ------------------------------------------------
    /// NUL (0x00) — forbidden in header values (response-splitting vector).
    Null = 0x00,
    /// Horizontal tab (0x09) — optional whitespace (OWS) per RFC 7230 §3.2.6.
    HorizontalTab = b'\t',
    /// Line feed (0x0A) — second byte of CRLF line endings.
    LineFeed = b'\n',
    /// Carriage return (0x0D) — first byte of CRLF line endings.
    CarriageReturn = b'\r',

    // ---- Printable delimiters -----------------------------------------------
    /// Space (0x20) — request-line element delimiter, OWS character.
    Space = b' ',
    /// ASCII digit zero (0x30) — base for decimal digit encoding.
    Zero = b'0',
    /// Colon (0x3A) — header name:value separator.
    Colon = b':',

    // ---- Upper boundary -----------------------------------------------------
    /// DEL (0x7F) — forbidden in request targets (RFC 7230 §3.1.1).
    Delete = 0x7F,
}

impl HttpChar {
    /// The raw `u8` value.
    #[inline]
    #[must_use]
    pub const fn as_u8(self) -> u8 {
        self as u8
    }

    /// The raw value as `i8`, for SIMD intrinsics (`_mm_set1_epi8` etc.).
    #[inline]
    #[must_use]
    #[allow(clippy::cast_possible_wrap)]
    pub const fn as_i8(self) -> i8 {
        self as u8 as i8
    }
}

// ---------------------------------------------------------------------------
// Conversions
// ---------------------------------------------------------------------------

impl From<HttpChar> for u8 {
    #[inline]
    fn from(b: HttpChar) -> Self {
        b as Self
    }
}

impl From<HttpChar> for i8 {
    #[inline]
    #[allow(clippy::cast_possible_wrap)]
    fn from(b: HttpChar) -> Self {
        b as u8 as Self
    }
}

// ---------------------------------------------------------------------------
// Arithmetic — allows `HttpChar::Zero + digit` for decimal encoding
// ---------------------------------------------------------------------------

impl std::ops::Add<u8> for HttpChar {
    type Output = u8;

    #[inline]
    fn add(self, rhs: u8) -> u8 {
        self as u8 + rhs
    }
}

// ---------------------------------------------------------------------------
// Content-Length parsing (shared between request and response)
// ---------------------------------------------------------------------------

/// Maximum accepted Content-Length value (1 PiB).
///
/// Comfortably above any real HTTP body, well below `usize::MAX` on
/// 32-bit targets, and below `u64::MAX / 2` so that downstream `+`
/// arithmetic on the parsed value cannot overflow without an absurd
/// second operand.
pub(crate) const MAX_CONTENT_LENGTH: u64 = 1 << 50;

/// Parse a Content-Length header value as a decimal u64.
/// Returns `None` for empty, non-numeric, overflowing, or
/// implausibly-large values (see [`MAX_CONTENT_LENGTH`]).
///
/// Leading zeros are rejected (e.g. `007`). Some intermediary proxies
/// interpret leading-zero values as octal, creating a request smuggling
/// vector where different parties disagree on the body length.
#[inline]
pub(crate) fn parse_content_length(value: &[u8]) -> Option<u64> {
    if value.is_empty() {
        return None;
    }
    // Reject leading zeros — ambiguous between decimal and octal.
    if value.len() > 1 && value[0] == b'0' {
        return None;
    }
    let mut n: u64 = 0;
    for &b in value {
        let d = b.wrapping_sub(b'0');
        if d > 9 {
            return None;
        }
        n = n.checked_mul(10)?.checked_add(u64::from(d))?;
    }
    if n > MAX_CONTENT_LENGTH {
        return None;
    }
    Some(n)
}

// ---------------------------------------------------------------------------
// Symmetric PartialEq<u8> — allows `byte == HttpChar::CarriageReturn` and vice versa
// ---------------------------------------------------------------------------

impl PartialEq<u8> for HttpChar {
    #[inline]
    fn eq(&self, other: &u8) -> bool {
        *self as u8 == *other
    }
}

impl PartialEq<HttpChar> for u8 {
    #[inline]
    fn eq(&self, other: &HttpChar) -> bool {
        *self == *other as Self
    }
}

#[cfg(test)]
#[allow(clippy::cast_possible_wrap)]
mod tests {
    use super::*;

    #[test]
    fn repr_values() {
        assert_eq!(HttpChar::Null as u8, 0x00);
        assert_eq!(HttpChar::HorizontalTab as u8, 0x09);
        assert_eq!(HttpChar::LineFeed as u8, 0x0A);
        assert_eq!(HttpChar::CarriageReturn as u8, 0x0D);
        assert_eq!(HttpChar::Space as u8, 0x20);
        assert_eq!(HttpChar::Zero as u8, 0x30);
        assert_eq!(HttpChar::Colon as u8, 0x3A);
        assert_eq!(HttpChar::Delete as u8, 0x7F);
    }

    #[test]
    fn partial_eq_u8_both_directions() {
        let cr: u8 = b'\r';
        assert!(cr == HttpChar::CarriageReturn);
        assert!(HttpChar::CarriageReturn == cr);
        assert!(cr != HttpChar::LineFeed);
        assert!(HttpChar::LineFeed != cr);
    }

    #[test]
    fn as_i8_for_simd() {
        assert_eq!(HttpChar::CarriageReturn.as_i8(), b'\r' as i8);
        assert_eq!(HttpChar::Delete.as_i8(), 0x7F_u8 as i8);
    }

    #[test]
    fn from_conversions() {
        let u: u8 = HttpChar::Space.into();
        assert_eq!(u, b' ');
        let i: i8 = HttpChar::Space.into();
        assert_eq!(i, b' ' as i8);
    }

    #[test]
    fn add_u8_for_digit_encoding() {
        assert_eq!(HttpChar::Zero + 0, b'0');
        assert_eq!(HttpChar::Zero + 5, b'5');
        assert_eq!(HttpChar::Zero + 9, b'9');
    }

    #[test]
    fn size_of_is_one() {
        assert_eq!(std::mem::size_of::<HttpChar>(), 1);
    }

    #[test]
    fn content_length_simple_values() {
        assert_eq!(parse_content_length(b"0"), Some(0));
        assert_eq!(parse_content_length(b"5"), Some(5));
        assert_eq!(parse_content_length(b"1024"), Some(1024));
    }

    #[test]
    fn content_length_rejects_leading_zero() {
        assert_eq!(parse_content_length(b"007"), None);
        assert_eq!(parse_content_length(b"01"), None);
    }

    #[test]
    fn content_length_rejects_above_cap() {
        // u64::MAX would have overflowed, but a plausible-looking
        // 2 PiB request must also be rejected to stop downstream
        // arithmetic footguns.
        let two_pib = (1u64 << 51).to_string();
        assert_eq!(parse_content_length(two_pib.as_bytes()), None);
        assert_eq!(parse_content_length(b"99999999999999999999"), None);
    }

    #[test]
    fn content_length_at_cap_succeeds() {
        let max = MAX_CONTENT_LENGTH.to_string();
        assert_eq!(
            parse_content_length(max.as_bytes()),
            Some(MAX_CONTENT_LENGTH)
        );
    }
}