set-cookie-parser 0.1.1

Parse Set-Cookie response headers into structured cookies, and split a comma-joined Set-Cookie header without choking on commas in Expires dates. A faithful port of the set-cookie-parser npm package. Zero dependencies, no_std.
Documentation
//! # set-cookie-parser — parse `Set-Cookie` response headers
//!
//! Parse a `Set-Cookie` header value into a structured [`Cookie`], and split a
//! comma-joined `Set-Cookie` string into individual cookies *without* choking on the
//! commas inside an `Expires` date. A faithful Rust port of the
//! [`set-cookie-parser`](https://www.npmjs.com/package/set-cookie-parser) npm package.
//! Zero dependencies and `#![no_std]`.
//!
//! ```
//! use set_cookie_parser::{parse, parse_all, split_cookies_string};
//!
//! let c = parse("sid=abc123; Path=/; HttpOnly; SameSite=Lax").unwrap();
//! assert_eq!(c.name, "sid");
//! assert_eq!(c.value, "abc123");
//! assert_eq!(c.path.as_deref(), Some("/"));
//! assert!(c.http_only);
//! assert_eq!(c.same_site.as_deref(), Some("Lax"));
//!
//! // A single combined header with two cookies (note the comma inside Expires):
//! let header = "a=1; Expires=Wed, 09 Jun 2021 10:18:14 GMT, b=2";
//! assert_eq!(split_cookies_string(header), ["a=1; Expires=Wed, 09 Jun 2021 10:18:14 GMT", "b=2"]);
//! assert_eq!(parse_all(header).len(), 2);
//! ```

#![no_std]
#![doc(html_root_url = "https://docs.rs/set-cookie-parser/0.1.0")]

extern crate alloc;

use alloc::string::{String, ToString};
use alloc::vec;
use alloc::vec::Vec;

// Compile-test the README's examples as part of `cargo test`.
#[cfg(doctest)]
#[doc = include_str!("../README.md")]
struct ReadmeDoctests;

/// A parsed cookie from a `Set-Cookie` header.
///
/// `name` and `value` come from the first `name=value` pair; the remaining fields are
/// the cookie's attributes (present only when set). `expires` is kept as the raw
/// header value (parse it with a date crate if you need a timestamp). Attributes other
/// than the well-known ones are collected, in order, into [`other`](Cookie::other).
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct Cookie {
    /// The cookie name (may be empty for a bare `value` with no `=`).
    pub name: String,
    /// The cookie value (URI-decoded unless decoding is disabled).
    pub value: String,
    /// The raw `Expires` attribute value, if present.
    pub expires: Option<String>,
    /// The `Max-Age` attribute, parsed as an integer (à la JS `parseInt`).
    pub max_age: Option<i64>,
    /// The `Domain` attribute.
    pub domain: Option<String>,
    /// The `Path` attribute.
    pub path: Option<String>,
    /// Whether the `Secure` attribute is present.
    pub secure: bool,
    /// Whether the `HttpOnly` attribute is present.
    pub http_only: bool,
    /// The `SameSite` attribute value (e.g. `Lax`, `Strict`, `None`).
    pub same_site: Option<String>,
    /// Whether the `Partitioned` attribute is present.
    pub partitioned: bool,
    /// Any other attributes, as `(lower-cased key, value)` pairs, in order.
    pub other: Vec<(String, String)>,
}

/// Parse a single `Set-Cookie` header value into a [`Cookie`], URI-decoding the value.
///
/// Returns `None` for an empty/blank input or a cookie whose name is a reserved
/// JavaScript object key (a prototype-pollution guard kept for fidelity).
///
/// ```
/// assert_eq!(set_cookie_parser::parse("foo=bar").unwrap().value, "bar");
/// assert_eq!(set_cookie_parser::parse("enc=a%20b").unwrap().value, "a b");
/// ```
#[must_use]
pub fn parse(set_cookie: &str) -> Option<Cookie> {
    parse_with(set_cookie, true)
}

/// Parse a single `Set-Cookie` header value, controlling whether the cookie value is
/// URI-decoded (`decodeURIComponent`). On a decode error the raw value is kept.
#[must_use]
pub fn parse_with(set_cookie: &str, decode_values: bool) -> Option<Cookie> {
    let parts: Vec<&str> = set_cookie
        .split(';')
        .filter(|p| !p.trim_matches(is_js_whitespace).is_empty())
        .collect();
    let name_value = *parts.first()?;
    let (name, raw_value) = parse_name_value_pair(name_value);
    if is_forbidden_key(name) {
        return None;
    }

    let value = if decode_values {
        decode_uri_component(raw_value).unwrap_or_else(|| raw_value.to_string())
    } else {
        raw_value.to_string()
    };

    let mut cookie = Cookie {
        name: name.to_string(),
        value,
        ..Cookie::default()
    };

    for part in &parts[1..] {
        let (key, val) = match part.split_once('=') {
            Some((k, v)) => (k, v),
            None => (*part, ""),
        };
        let key = key.trim_start_matches(is_js_whitespace).to_lowercase();
        if is_forbidden_key(&key) {
            continue;
        }
        match key.as_str() {
            "expires" => cookie.expires = Some(val.to_string()),
            "max-age" => {
                if let Some(n) = parse_int_js(val) {
                    cookie.max_age = Some(n);
                }
            }
            "domain" => cookie.domain = Some(val.to_string()),
            "path" => cookie.path = Some(val.to_string()),
            "secure" => cookie.secure = true,
            "httponly" => cookie.http_only = true,
            "samesite" => cookie.same_site = Some(val.to_string()),
            "partitioned" => cookie.partitioned = true,
            "" => {}
            _ => {
                // Mirror JS object assignment: a repeated key overwrites in place.
                if let Some(slot) = cookie.other.iter_mut().find(|(k, _)| *k == key) {
                    slot.1 = val.to_string();
                } else {
                    cookie.other.push((key, val.to_string()));
                }
            }
        }
    }

    Some(cookie)
}

/// Split a combined `Set-Cookie` header string into individual cookie strings.
///
/// Some servers/proxies join multiple `Set-Cookie` field values with commas. This
/// splits on those separators while leaving alone the commas inside a single value,
/// such as the date in an `Expires` attribute.
///
/// ```
/// assert_eq!(set_cookie_parser::split_cookies_string("a=1, b=2"), ["a=1", "b=2"]);
/// ```
#[must_use]
pub fn split_cookies_string(cookies_string: &str) -> Vec<String> {
    let chars: Vec<char> = cookies_string.chars().collect();
    let len = chars.len();
    let mut result = Vec::new();
    let mut pos = 0;

    while pos < len {
        let mut start = pos;
        let mut separator_found = false;

        loop {
            while pos < len && is_js_whitespace(chars[pos]) {
                pos += 1;
            }
            if pos >= len {
                break;
            }
            if chars[pos] == ',' {
                let last_comma = pos;
                pos += 1;
                while pos < len && is_js_whitespace(chars[pos]) {
                    pos += 1;
                }
                let next_start = pos;
                while pos < len && {
                    let c = chars[pos];
                    c != '=' && c != ';' && c != ','
                } {
                    pos += 1;
                }
                if pos < len && chars[pos] == '=' {
                    // A real cookie separator: the next token reaches a '='.
                    separator_found = true;
                    pos = next_start;
                    result.push(chars[start..last_comma].iter().collect());
                    start = pos;
                } else {
                    // A comma inside a value (e.g. an Expires date) — keep going.
                    pos = last_comma + 1;
                }
            } else {
                pos += 1;
            }
        }

        if !separator_found || pos >= len {
            result.push(chars[start..len].iter().collect());
        }
    }

    result
}

/// Split a combined `Set-Cookie` header and parse each cookie (URI-decoding values).
///
/// ```
/// let cookies = set_cookie_parser::parse_all("a=1, b=2; Path=/");
/// assert_eq!(cookies.len(), 2);
/// assert_eq!(cookies[1].path.as_deref(), Some("/"));
/// ```
#[must_use]
pub fn parse_all(combined: &str) -> Vec<Cookie> {
    parse_all_with(combined, true)
}

/// Split a combined `Set-Cookie` header and parse each cookie, controlling URI-decoding.
#[must_use]
pub fn parse_all_with(combined: &str, decode_values: bool) -> Vec<Cookie> {
    if combined.trim_matches(is_js_whitespace).is_empty() {
        return Vec::new();
    }
    split_cookies_string(combined)
        .into_iter()
        .filter_map(|s| parse_with(&s, decode_values))
        .collect()
}

/// Split a `name=value` pair: name is the text before the first `=`, value the rest.
/// With no `=`, the whole string is the value and the name is empty.
fn parse_name_value_pair(s: &str) -> (&str, &str) {
    match s.split_once('=') {
        Some((name, value)) => (name, value),
        None => ("", s),
    }
}

/// JavaScript object prototype keys, rejected as cookie names / skipped as attribute
/// keys to mirror the reference's prototype-pollution guard (`key in {}`).
const FORBIDDEN_KEYS: &[&str] = &[
    "constructor",
    "__proto__",
    "__defineGetter__",
    "__defineSetter__",
    "hasOwnProperty",
    "__lookupGetter__",
    "__lookupSetter__",
    "isPrototypeOf",
    "propertyIsEnumerable",
    "toString",
    "valueOf",
    "toLocaleString",
];

fn is_forbidden_key(key: &str) -> bool {
    FORBIDDEN_KEYS.contains(&key)
}

/// Parse a leading base-10 integer like JS `parseInt`: skip leading whitespace, an
/// optional sign, then digits; ignore any trailing characters. `None` if no digits.
fn parse_int_js(s: &str) -> Option<i64> {
    let chars: Vec<char> = s.chars().collect();
    let mut i = 0;
    while i < chars.len() && is_js_whitespace(chars[i]) {
        i += 1;
    }
    let negative = match chars.get(i) {
        Some('+') => {
            i += 1;
            false
        }
        Some('-') => {
            i += 1;
            true
        }
        _ => false,
    };
    let start = i;
    let mut acc: i64 = 0;
    while i < chars.len() && chars[i].is_ascii_digit() {
        let d = i64::from(chars[i] as u32 - '0' as u32);
        acc = acc.saturating_mul(10).saturating_add(d);
        i += 1;
    }
    if i == start {
        return None;
    }
    Some(if negative { -acc } else { acc })
}

/// Decode a string as JS `decodeURIComponent`: `%XX` runs are decoded as UTF-8 and
/// other characters pass through. Returns `None` (a `URIError`) on a malformed escape
/// or invalid UTF-8 sequence.
fn decode_uri_component(s: &str) -> Option<String> {
    let chars: Vec<char> = s.chars().collect();
    let len = chars.len();
    let mut out = String::new();
    let mut i = 0;

    while i < len {
        if chars[i] == '%' {
            let b0 = read_hex_byte(&chars, i)?;
            i += 3;
            if b0 < 0x80 {
                out.push(b0 as char);
            } else {
                let n = utf8_sequence_len(b0)?;
                let mut buf = vec![b0];
                for _ in 1..n {
                    if i >= len || chars[i] != '%' {
                        return None;
                    }
                    let bn = read_hex_byte(&chars, i)?;
                    if bn & 0xC0 != 0x80 {
                        return None;
                    }
                    buf.push(bn);
                    i += 3;
                }
                out.push_str(core::str::from_utf8(&buf).ok()?);
            }
        } else {
            out.push(chars[i]);
            i += 1;
        }
    }

    Some(out)
}

/// Read the two hex digits after `%` at `chars[i]`, returning the byte.
fn read_hex_byte(chars: &[char], i: usize) -> Option<u8> {
    let hi = u8::try_from(chars.get(i + 1)?.to_digit(16)?).ok()?;
    let lo = u8::try_from(chars.get(i + 2)?.to_digit(16)?).ok()?;
    Some(hi * 16 + lo)
}

/// Number of bytes in the UTF-8 sequence starting with lead byte `b` (`None` if `b` is
/// not a valid multi-byte lead).
fn utf8_sequence_len(b: u8) -> Option<usize> {
    match b {
        0xC0..=0xDF => Some(2),
        0xE0..=0xEF => Some(3),
        0xF0..=0xF7 => Some(4),
        _ => None,
    }
}

/// Whitespace per JavaScript's regex `\s` (Rust `White_Space` minus NEL `U+0085`,
/// plus the BOM `U+FEFF`).
fn is_js_whitespace(c: char) -> bool {
    (c.is_whitespace() && c != '\u{0085}') || c == '\u{feff}'
}