xocomil 0.3.0

A lightweight, zero-allocation HTTP/1.1 request parser and response writer
Documentation
mod parse;
mod read;

#[cfg(test)]
mod tests;

pub use read::ReadRequest;

use std::mem::MaybeUninit;
use std::num::NonZeroU8;

use crate::headers::{Header, HeaderName, HttpVersion, Method, RequestHeader};

/// Default maximum byte size for HTTP headers (8 KiB).
pub const DEFAULT_MAX_HEADER_SIZE: usize = 8192;

/// Default maximum number of headers to parse.
pub const DEFAULT_MAX_HEADERS: usize = 32;

/// Compact 1-byte slot for O(1) known-header lookup.
///
/// Uses `NonZeroU8` with +1 index encoding so that Rust's niche
/// optimization keeps `Option<NonZeroU8>` at exactly 1 byte (0 = absent).
#[derive(Clone, Copy)]
#[repr(transparent)]
struct HeaderSlot(Option<NonZeroU8>);

impl HeaderSlot {
    const EMPTY: Self = Self(None);

    /// Store a header index. Caller must ensure `idx < 255`.
    #[inline]
    const fn new(idx: u8) -> Self {
        // idx < 255 guaranteed by MAX_HDRS <= 255 compile-time assert,
        // so idx + 1 is in 1..=255 and fits in NonZeroU8.
        Self(NonZeroU8::new(idx + 1))
    }

    /// Retrieve the stored header index, or `None` if empty.
    #[inline]
    const fn get(self) -> Option<u8> {
        match self.0 {
            Some(v) => Some(v.get() - 1),
            None => None,
        }
    }

    #[inline]
    const fn is_some(self) -> bool {
        self.0.is_some()
    }

    #[inline]
    const fn is_none(self) -> bool {
        self.0.is_none()
    }
}

pub struct Request<'buf, const MAX_HDRS: usize = DEFAULT_MAX_HEADERS> {
    method: Method,
    version: HttpVersion,
    path: &'buf [u8],
    /// Header slots. Only the first `header_count` are initialized.
    /// Reading beyond that boundary is undefined behavior; all
    /// internal accessors gate on `header_count` via
    /// [`Self::headers_init`].
    ///
    /// `MaybeUninit` avoids zeroing all `MAX_HDRS` slots up front —
    /// previously a noticeable fraction of small-request parse cost
    /// (1 KiB of stack stores at the default 32 slots × 32 B `Header`).
    headers: [MaybeUninit<Header<'buf>>; MAX_HDRS],
    header_count: usize,
    /// O(1) lookup table: maps `RequestHeader` discriminant → index in
    /// `headers`. `None` means "not present". Uses +1 encoding via
    /// `NonZeroU8` for 1-byte niche-optimized representation.
    known: [HeaderSlot; RequestHeader::COUNT],
    /// Cached parsed Content-Length value. Validated at parse time.
    content_length: Option<u64>,
}

impl<const MAX_HDRS: usize> std::fmt::Debug for Request<'_, MAX_HDRS> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("Request")
            .field("method", &self.method)
            .field("version", &self.version)
            .field("path", &self.path_str())
            .field("header_count", &self.header_count)
            .finish()
    }
}

impl<'buf, const MAX_HDRS: usize> Request<'buf, MAX_HDRS> {
    /// Returns the initialized prefix of the headers array as a
    /// `&[Header<'buf>]`.
    ///
    /// # Safety invariant
    ///
    /// `self.headers[0..self.header_count]` is initialized after
    /// `parse_impl` returns. All public accessors and internal
    /// validators gate on `header_count` and use this helper, which
    /// is the single point at which we transmute the
    /// `[MaybeUninit<Header>]` prefix to `[Header]`.
    #[inline]
    fn headers_init(&self) -> &[Header<'buf>] {
        let init = &self.headers[..self.header_count];
        // Safety: `parse_impl` writes exactly `self.header_count`
        // slots before storing the count, and never decrements it.
        // `MaybeUninit<T>` has the same layout as `T`, so reinterpreting
        // a slice of initialized `MaybeUninit<T>` as `[T]` is sound.
        unsafe {
            &*(std::ptr::from_ref::<[MaybeUninit<Header<'buf>>]>(init) as *const [Header<'buf>])
        }
    }

    /// Returns the parsed HTTP method.
    #[inline]
    #[must_use]
    pub const fn method(&self) -> Method {
        self.method
    }

    /// Returns the parsed HTTP version.
    #[inline]
    #[must_use]
    pub const fn version(&self) -> HttpVersion {
        self.version
    }

    /// Returns the request path as raw bytes.
    ///
    /// This is the full request-target including any query string. For
    /// the path component alone (before `?`), use
    /// [`path_only`](Self::path_only); for the query string, use
    /// [`query`](Self::query).
    #[inline]
    #[must_use]
    pub const fn path(&self) -> &'buf [u8] {
        self.path
    }

    /// Returns the path component (before `?`).
    ///
    /// If the request target has no query string, this is identical to
    /// [`path`](Self::path).
    #[inline]
    #[must_use]
    pub fn path_only(&self) -> &'buf [u8] {
        self.path
            .iter()
            .position(|&b| b == b'?')
            .map_or(self.path, |q| &self.path[..q])
    }

    /// Returns the query string component (after `?`, without the `?`).
    ///
    /// Returns an empty slice if the request target has no query string.
    /// Iterate the parsed pairs with [`query_pairs`](Self::query_pairs).
    #[inline]
    #[must_use]
    pub fn query(&self) -> &'buf [u8] {
        self.path
            .iter()
            .position(|&b| b == b'?')
            .map_or(&[][..], |q| &self.path[q + 1..])
    }

    /// Iterator over query string `(name, value)` pairs (raw, undecoded).
    ///
    /// Each item is borrowed from the request buffer. Decode individual
    /// names and values with [`pct::decode`](crate::pct::decode) using
    /// [`Mode::Form`](crate::pct::Mode::Form).
    #[inline]
    #[must_use]
    pub fn query_pairs(&self) -> crate::query::QueryIter<'buf> {
        crate::query::parse(self.query())
    }

    /// Returns the request path as a UTF-8 string.
    ///
    /// # Errors
    ///
    /// Returns `std::str::Utf8Error` if the path is not valid UTF-8.
    #[inline]
    pub const fn path_str(&self) -> Result<&'buf str, std::str::Utf8Error> {
        std::str::from_utf8(self.path)
    }

    /// Returns the percent-decoded request path.
    ///
    /// Zero-copy when the raw path contains no `%` escapes: returns a
    /// slice borrowed from the parser buffer and `out` is not touched.
    /// Otherwise, the decoded bytes are written into the prefix of `out`.
    ///
    /// `+` is left as-is — paths use RFC 3986 decoding, not form decoding.
    /// To decode the query component, iterate
    /// [`query_pairs`](Self::query_pairs) and pass each value through
    /// [`pct::decode`](crate::pct::decode) with [`Mode::Form`](crate::pct::Mode::Form).
    ///
    /// # Errors
    ///
    /// Returns <code>[Error::Pct]([PctErrorKind::InvalidEscape])</code>
    /// if a `%` is not followed by two hex digits, or
    /// <code>[Error::Pct]([PctErrorKind::BufferTooSmall])</code> if `out`
    /// cannot hold the decoded output.
    ///
    /// [Error::Pct]: crate::error::Error::Pct
    /// [PctErrorKind::InvalidEscape]: crate::error::PctErrorKind::InvalidEscape
    /// [PctErrorKind::BufferTooSmall]: crate::error::PctErrorKind::BufferTooSmall
    #[inline]
    pub fn path_decoded<'a>(&self, out: &'a mut [u8]) -> Result<&'a [u8], crate::error::Error>
    where
        'buf: 'a,
    {
        crate::pct::decode(self.path, crate::pct::Mode::Path, out).map_err(Into::into)
    }

    /// Look up a header value by name (case-insensitive).
    /// Returns the raw bytes of the value.
    ///
    /// When called with a [`crate::headers::RequestHeader`]
    /// enum variant, this is an O(1) table lookup. For dynamic `&str` names
    /// it falls back to a linear scan.
    #[inline]
    pub fn header<'name>(&self, name: impl HeaderName<'name>) -> Option<&'buf [u8]> {
        // Fast path: known header enum → O(1) table lookup
        if let Some(slot) = name.known_index() {
            return self.known[slot]
                .get()
                .map(|idx| self.headers_init()[idx as usize].value());
        }

        // Slow path: dynamic string → linear scan
        let name_bytes = name.as_header_bytes();
        let init = self.headers_init();
        for h in init {
            if h.name().eq_ignore_ascii_case(name_bytes) {
                return Some(h.value());
            }
        }
        None
    }

    /// Look up a header value by name (case-insensitive) as a UTF-8 string.
    ///
    /// # Errors
    ///
    /// Returns `std::str::Utf8Error` if the header value is not valid UTF-8.
    #[inline]
    pub fn header_str<'name>(
        &self,
        name: impl HeaderName<'name>,
    ) -> Result<Option<&'buf str>, std::str::Utf8Error> {
        self.header(name)
            .map_or(Ok(None), |v| std::str::from_utf8(v).map(Some))
    }

    /// Returns the number of parsed headers.
    #[inline]
    #[must_use]
    pub const fn header_count(&self) -> usize {
        self.header_count
    }

    /// Returns all parsed headers as a slice.
    #[inline]
    #[must_use]
    pub fn headers(&self) -> &[Header<'buf>] {
        self.headers_init()
    }

    /// Parsed `Content-Type` header value, if present.
    ///
    /// Returns `None` when the header is absent. Returns
    /// `Some(Err(_))` when the header is present but malformed.
    #[inline]
    pub fn content_type(
        &self,
    ) -> Option<Result<crate::media::MediaType<'buf>, crate::error::MediaErrorKind>> {
        self.header(RequestHeader::ContentType)
            .map(crate::media::MediaType::parse)
    }

    /// Returns the Content-Length value if the header is present.
    ///
    /// The value was parsed and validated at parse time. Returns
    /// `None` only when the header is absent.
    #[inline]
    #[must_use]
    pub const fn content_length(&self) -> Option<u64> {
        self.content_length
    }

    /// Returns how the request body is framed.
    ///
    /// Inspects Transfer-Encoding and Content-Length headers to determine
    /// whether the body uses chunked encoding, a known length, or is absent.
    ///
    /// **Invariant:** This assumes Transfer-Encoding has been validated as
    /// "chunked" at parse time (`parse_impl` rejects unknown TE values).
    /// Do not call on a `Request` constructed outside `parse()`.
    #[inline]
    #[must_use]
    pub const fn body_kind(&self) -> crate::body::BodyKind {
        use crate::body::BodyKind;

        if self.known[RequestHeader::TransferEncoding as usize].is_some() {
            return BodyKind::Chunked;
        }
        match self.content_length {
            Some(len) => BodyKind::ContentLength(len),
            None => BodyKind::None,
        }
    }
}