cellos-core 0.7.2

CellOS domain types and ports — typed authority, formation DAG, CloudEvent envelopes, RBAC primitives. No I/O.
Documentation
//! Shared hostname allowlist matcher used by every L7 / DNS gate.
//!
//! Both the SEAM-1 DNS proxy ([`crate::dns_proxy`] in `cellos-supervisor`) and
//! the SEC-22 Phase 2 SNI proxy enforce the same matching semantics on
//! `dnsAuthority.hostnameAllowlist` — literal exact match or single-leading-`*.`
//! wildcard suffix match. Centralising the matcher here lets both gates evolve
//! together; if the contract ever admits more permissive forms (e.g. multi-`*`
//! patterns) the change lands in one place.
//!
//! # Match semantics
//!
//! - **Literal** (`api.example.com`): exact case-insensitive match. The query
//!   name `api.example.com` matches; `x.api.example.com` does NOT (subdomains
//!   of literal entries are not implicitly admitted).
//! - **Wildcard** (`*.cdn.example.com`): the `*.` prefix matches one or more
//!   subdomain labels. `foo.cdn.example.com` matches; `cdn.example.com` does
//!   NOT (the wildcard requires at least one extra label). This mirrors
//!   RFC 2818 / RFC 6125 wildcard semantics — the wildcard is anchored to the
//!   leftmost label and does NOT match the bare apex.
//! - Empty `host` always returns `false` (defensive; the SNI / DNS parsers
//!   are expected to reject empty names upstream).
//! - The `allowlist` itself is treated as case-insensitive on each entry; the
//!   caller does NOT need to lowercase entries before passing them.
//!
//! # What this matcher does NOT do
//!
//! - It does not validate that an entry is well-formed — that is
//!   [`crate::spec_validation::is_fqdn_or_wildcard`]'s job at admission time.
//! - It does not strip ports, schemes, or paths — pass the bare hostname.
//! - It does not handle IDN / punycode normalisation. Hosts and entries are
//!   compared as ASCII byte sequences after lowercasing; operators that need
//!   to admit IDN names should encode the punycode form in the allowlist.

/// Match `host` against an allowlist of literal or single-leading-`*.`
/// wildcard entries.
///
/// Returns `true` iff at least one entry matches under the documented
/// semantics. Comparison is case-insensitive on both the host and each entry.
///
/// # Examples
///
/// ```
/// use cellos_core::hostname_allowlist::matches_allowlist;
///
/// // Literal match.
/// assert!(matches_allowlist("api.example.com", &["api.example.com".into()]));
/// assert!(!matches_allowlist("x.api.example.com", &["api.example.com".into()]));
///
/// // Wildcard match.
/// assert!(matches_allowlist(
///     "foo.cdn.example.com",
///     &["*.cdn.example.com".into()],
/// ));
/// // Wildcard does NOT match the bare apex.
/// assert!(!matches_allowlist(
///     "cdn.example.com",
///     &["*.cdn.example.com".into()],
/// ));
///
/// // Case-insensitive on both sides.
/// assert!(matches_allowlist("API.Example.COM", &["api.example.com".into()]));
/// ```
pub fn matches_allowlist(host: &str, allowlist: &[String]) -> bool {
    if host.is_empty() {
        return false;
    }
    let host_lc = host.to_ascii_lowercase();
    for entry in allowlist {
        let entry_lc = entry.to_ascii_lowercase();
        if let Some(suffix) = entry_lc.strip_prefix("*.") {
            // Wildcard: host must be strictly longer than `.<suffix>` and end
            // in `.<suffix>`. The dot-anchor prevents `evil-cdn.example.com`
            // from matching `*.cdn.example.com`.
            if host_lc.len() > suffix.len() + 1
                && host_lc.ends_with(suffix)
                && host_lc.as_bytes()[host_lc.len() - suffix.len() - 1] == b'.'
            {
                return true;
            }
        } else if host_lc == entry_lc {
            return true;
        }
    }
    false
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn literal_match_exact() {
        assert!(matches_allowlist(
            "api.example.com",
            &["api.example.com".into()]
        ));
    }

    #[test]
    fn literal_does_not_match_subdomain() {
        assert!(!matches_allowlist(
            "x.api.example.com",
            &["api.example.com".into()]
        ));
    }

    #[test]
    fn wildcard_matches_one_label() {
        assert!(matches_allowlist(
            "foo.cdn.example.com",
            &["*.cdn.example.com".into()]
        ));
    }

    #[test]
    fn wildcard_matches_multiple_labels() {
        assert!(matches_allowlist(
            "deep.foo.cdn.example.com",
            &["*.cdn.example.com".into()]
        ));
    }

    #[test]
    fn wildcard_does_not_match_apex() {
        assert!(!matches_allowlist(
            "cdn.example.com",
            &["*.cdn.example.com".into()]
        ));
    }

    #[test]
    fn wildcard_does_not_match_sibling_with_dash() {
        // The dot-anchor rejects `evil-cdn.example.com` matching `*.cdn.example.com`.
        assert!(!matches_allowlist(
            "evil-cdn.example.com",
            &["*.cdn.example.com".into()]
        ));
    }

    #[test]
    fn empty_host_returns_false() {
        assert!(!matches_allowlist(
            "",
            &["api.example.com".into(), "*.cdn.example.com".into()]
        ));
    }

    #[test]
    fn empty_allowlist_returns_false() {
        assert!(!matches_allowlist("api.example.com", &[]));
    }

    #[test]
    fn case_insensitive_on_host() {
        assert!(matches_allowlist(
            "API.Example.COM",
            &["api.example.com".into()]
        ));
    }

    #[test]
    fn case_insensitive_on_entry() {
        assert!(matches_allowlist(
            "api.example.com",
            &["API.EXAMPLE.COM".into()]
        ));
    }

    #[test]
    fn multiple_entries_or_semantics() {
        let allow: Vec<String> = vec!["api.example.com".into(), "*.cdn.example.com".into()];
        assert!(matches_allowlist("api.example.com", &allow));
        assert!(matches_allowlist("img.cdn.example.com", &allow));
        assert!(!matches_allowlist("blocked.example.com", &allow));
    }
}