Skip to main content

solid_pod_rs/wac/
origin.rs

1//! `acl:origin` value objects and enforcement helpers (WAC §4.3 / F4).
2//!
3//! Implements the Origin gate described in
4//! `docs/design/jss-parity/03-wac-enforcement-context.md`. The gate runs
5//! **after** the existing agent / agent-class / mode / accessTo checks;
6//! if any authorisation in the effective ACL declares `acl:origin`
7//! triples, the request's `Origin` header must match one of them.
8//!
9//! This module is strictly additive: consumers that never pass an
10//! `Origin` value object observe no behavioural change, because an
11//! ACL with zero `acl:origin` triples yields [`OriginDecision::NoPolicySet`].
12//!
13//! # Ubiquitous language
14//!
15//! - **Origin**: RFC 6454 web origin, canonicalised as
16//!   `scheme://host[:port]` with default ports (80/443) elided.
17//! - **OriginPattern**: a rule's declared origin list entry; exact
18//!   origin, wildcard subdomain (`https://*.example.org`), or global
19//!   wildcard (`*`). Global wildcard disables the gate for that rule.
20//! - **Origin gate**: the additional check that runs after agent matching.
21
22use std::collections::HashSet;
23
24use url::Url;
25
26use crate::wac::AclAuthorization;
27
28// ---------------------------------------------------------------------------
29// Origin — canonicalised `scheme://host[:port]`
30// ---------------------------------------------------------------------------
31
32/// Canonicalised web origin per RFC 6454.
33///
34/// The internal representation is a lowercased, default-port-stripped
35/// serialisation of the form `scheme://host` or `scheme://host:port`.
36/// Paths, queries and fragments are discarded; only the tuple
37/// `(scheme, host, port)` is preserved.
38#[derive(Debug, Clone, PartialEq, Eq, Hash)]
39pub struct Origin(String);
40
41impl Origin {
42    /// Parse a string into a canonical [`Origin`].
43    ///
44    /// Accepts raw origin forms (`https://example.org`,
45    /// `https://example.org:8443`) as well as full URLs; in the latter
46    /// case path/query/fragment are discarded.
47    ///
48    /// Returns `None` if the input is not a parseable URL, has no host,
49    /// or uses a scheme without a hierarchical origin (e.g. `data:`).
50    pub fn parse(s: &str) -> Option<Self> {
51        let trimmed = s.trim();
52        if trimmed.is_empty() {
53            return None;
54        }
55        let url = Url::parse(trimmed).ok()?;
56        Self::from_url(&url)
57    }
58
59    /// Extract a canonical [`Origin`] from a parsed URL.
60    ///
61    /// Returns `None` for opaque-origin schemes (schemes without a host
62    /// such as `data:`, `javascript:`, `file:` without host).
63    pub fn from_url(url: &Url) -> Option<Self> {
64        let scheme = url.scheme().to_ascii_lowercase();
65        let host = url.host_str()?.to_ascii_lowercase();
66        let port = url.port(); // None when the URL uses the default port
67        let serialised = match port {
68            None => format!("{scheme}://{host}"),
69            Some(p) => format!("{scheme}://{host}:{p}"),
70        };
71        Some(Origin(serialised))
72    }
73
74    /// Canonical serialised form (e.g. `https://example.org`,
75    /// `https://example.org:8443`).
76    pub fn as_str(&self) -> &str {
77        &self.0
78    }
79}
80
81impl std::fmt::Display for Origin {
82    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83        f.write_str(&self.0)
84    }
85}
86
87// ---------------------------------------------------------------------------
88// OriginPattern — exact / wildcard-subdomain / global wildcard
89// ---------------------------------------------------------------------------
90
91/// A rule-declared origin pattern.
92///
93/// Three forms are supported:
94///
95/// - **Exact**: `https://example.org` — matches only that origin.
96/// - **Wildcard subdomain**: `https://*.example.org` — matches any
97///   single-or-multi-level subdomain of `example.org` on the same
98///   scheme. Does **not** match the bare `example.org`.
99/// - **Global wildcard**: `*` — matches any origin. Equivalent to
100///   "origin gate effectively disabled for this rule". Discouraged;
101///   requires explicit opt-in (the caller must write `*` literally).
102#[derive(Debug, Clone, PartialEq, Eq)]
103pub enum OriginPattern {
104    /// Exact origin match.
105    Exact(Origin),
106    /// Wildcard subdomain: scheme + suffix (e.g. `https` + `example.org`).
107    Wildcard { scheme: String, suffix: String },
108    /// Global wildcard (`*`). Matches any origin.
109    Any,
110}
111
112impl OriginPattern {
113    /// Parse a pattern string.
114    ///
115    /// - `"*"` → [`OriginPattern::Any`]
116    /// - `"https://*.example.org"` → [`OriginPattern::Wildcard`]
117    /// - `"https://example.org"` / `"https://example.org:8443"` →
118    ///   [`OriginPattern::Exact`]
119    ///
120    /// Returns `None` for malformed input (missing scheme, empty host,
121    /// trailing slashes, etc). The invariant in the DDD doc is that
122    /// only canonical origins are stored; the exact-origin branch uses
123    /// [`Origin::parse`] for strict validation.
124    pub fn parse(s: &str) -> Option<Self> {
125        let trimmed = s.trim();
126        if trimmed.is_empty() {
127            return None;
128        }
129        if trimmed == "*" {
130            return Some(OriginPattern::Any);
131        }
132        // Wildcard subdomain: scheme://*.suffix[:port]
133        if let Some(rest) = trimmed.strip_prefix("https://*.") {
134            return Self::parse_wildcard("https", rest);
135        }
136        if let Some(rest) = trimmed.strip_prefix("http://*.") {
137            return Self::parse_wildcard("http", rest);
138        }
139        // Exact origin — must round-trip through Origin::parse.
140        // Reject trailing slashes to match the DDD invariant
141        // "canonical scheme://host[:port]" only.
142        if trimmed.ends_with('/') {
143            return None;
144        }
145        let origin = Origin::parse(trimmed)?;
146        // Reject if Origin::parse canonicalised away user-supplied
147        // content (e.g. user supplied a path we quietly stripped).
148        let lc = trimmed.to_ascii_lowercase();
149        if origin.as_str() != lc {
150            return None;
151        }
152        Some(OriginPattern::Exact(origin))
153    }
154
155    fn parse_wildcard(scheme: &str, suffix_part: &str) -> Option<Self> {
156        // suffix_part is e.g. "example.org" or "example.org:8443".
157        if suffix_part.is_empty() {
158            return None;
159        }
160        // Basic host-shape validation: reject whitespace, empty labels,
161        // stray wildcard characters in the suffix itself.
162        if suffix_part.contains(char::is_whitespace) || suffix_part.contains('*') {
163            return None;
164        }
165        // Reject trailing slash / path segments.
166        if suffix_part.contains('/') {
167            return None;
168        }
169        Some(OriginPattern::Wildcard {
170            scheme: scheme.to_string(),
171            suffix: suffix_part.to_ascii_lowercase(),
172        })
173    }
174
175    /// Test whether a request origin matches this pattern.
176    pub fn matches(&self, origin: &Origin) -> bool {
177        match self {
178            OriginPattern::Any => true,
179            OriginPattern::Exact(expected) => expected == origin,
180            OriginPattern::Wildcard { scheme, suffix } => {
181                // Rebuild expected tuple from the request origin.
182                let serialised = origin.as_str();
183                let (req_scheme, req_rest) = match serialised.split_once("://") {
184                    Some(v) => v,
185                    None => return false,
186                };
187                if req_scheme != scheme {
188                    return false;
189                }
190                // req_rest is host[:port]; match if host ends with
191                // ".{suffix}" with at least one non-empty label in
192                // front. Ports are not part of the "which suffix"
193                // question so they are stripped before comparison.
194                let req_host = match req_rest.split_once(':') {
195                    Some((h, _)) => h,
196                    None => req_rest,
197                };
198                let pattern_suffix = match suffix.split_once(':') {
199                    Some((h, _)) => h,
200                    None => suffix.as_str(),
201                };
202                let needle = format!(".{pattern_suffix}");
203                req_host.ends_with(&needle) && req_host.len() > needle.len()
204            }
205        }
206    }
207}
208
209// ---------------------------------------------------------------------------
210// Extraction from ACL rules + aggregate-level decision
211// ---------------------------------------------------------------------------
212
213/// Extract `acl:origin` patterns from a single authorisation.
214///
215/// Malformed entries are silently dropped (matches the forgiving
216/// posture of [`super::parse_turtle_acl`]); strict validation happens
217/// at write time, not read time.
218pub fn extract_origin_patterns(auth: &AclAuthorization) -> Vec<OriginPattern> {
219    let mut out = Vec::new();
220    if let Some(ids) = &auth.origin {
221        for id in iter_ids(ids) {
222            if let Some(p) = OriginPattern::parse(id) {
223                out.push(p);
224            }
225        }
226    }
227    out
228}
229
230fn iter_ids(ids: &crate::wac::IdOrIds) -> Vec<&str> {
231    match ids {
232        crate::wac::IdOrIds::Single(r) => vec![r.id.as_str()],
233        crate::wac::IdOrIds::Multiple(v) => v.iter().map(|r| r.id.as_str()).collect(),
234    }
235}
236
237/// Origin-gate decision for a request against an ACL document.
238#[derive(Debug, Clone, Copy, PartialEq, Eq)]
239pub enum OriginDecision {
240    /// No authorisation in the ACL declares `acl:origin`. Permissive:
241    /// backward-compatible with pre-F4 ACLs.
242    NoPolicySet,
243    /// Request origin matches at least one authorisation's pattern.
244    Permitted,
245    /// Policies exist and the request origin does not match any of them.
246    RejectedMismatch,
247    /// Policies exist and the request carries no `Origin` header.
248    RejectedNoOrigin,
249}
250
251/// Check whether the request origin is permitted by any authorisation's
252/// origin patterns in the supplied ACL document.
253///
254/// Semantics:
255///
256/// - If no authorisation declares `acl:origin` → [`OriginDecision::NoPolicySet`]
257///   (gate inactive; backward compatible).
258/// - If any authorisation declares patterns and the request origin
259///   matches at least one → [`OriginDecision::Permitted`].
260/// - If patterns exist but the request origin doesn't match any →
261///   [`OriginDecision::RejectedMismatch`].
262/// - If patterns exist and no `Origin` header was supplied →
263///   [`OriginDecision::RejectedNoOrigin`].
264///
265/// This check is ACL-document-wide, matching the doc's integration
266/// semantics for v0.4.0; rule-level short-circuiting is performed by
267/// the evaluator when combined with agent matching.
268pub fn check_origin(
269    acl: &crate::wac::AclDocument,
270    request_origin: Option<&Origin>,
271) -> OriginDecision {
272    let graph = match acl.graph.as_ref() {
273        Some(g) => g,
274        None => return OriginDecision::NoPolicySet,
275    };
276    let mut any_patterns = false;
277    let mut matched = false;
278    // Deduplicate patterns via a HashSet of their canonical string forms
279    // to avoid quadratic work when the same origin is repeated across
280    // many rules. Using Vec would also work; HashSet documents intent.
281    let mut seen: HashSet<String> = HashSet::new();
282    for auth in graph {
283        for pattern in extract_origin_patterns(auth) {
284            let key = pattern_key(&pattern);
285            if !seen.insert(key) {
286                continue;
287            }
288            any_patterns = true;
289            if let Some(req) = request_origin {
290                if pattern.matches(req) {
291                    matched = true;
292                }
293            }
294        }
295    }
296    if !any_patterns {
297        OriginDecision::NoPolicySet
298    } else if matched {
299        OriginDecision::Permitted
300    } else if request_origin.is_none() {
301        OriginDecision::RejectedNoOrigin
302    } else {
303        OriginDecision::RejectedMismatch
304    }
305}
306
307fn pattern_key(p: &OriginPattern) -> String {
308    match p {
309        OriginPattern::Any => "*".to_string(),
310        OriginPattern::Exact(o) => format!("exact:{}", o.as_str()),
311        OriginPattern::Wildcard { scheme, suffix } => {
312            format!("wild:{scheme}://*.{suffix}")
313        }
314    }
315}
316
317// ---------------------------------------------------------------------------
318// Tests (module-local; broader integration lives in tests/acl_origin_test.rs)
319// ---------------------------------------------------------------------------
320
321#[cfg(test)]
322mod tests {
323    use super::*;
324
325    #[test]
326    fn origin_parse_strips_default_https_port() {
327        let o = Origin::parse("https://example.org:443/foo").unwrap();
328        assert_eq!(o.as_str(), "https://example.org");
329    }
330
331    #[test]
332    fn origin_parse_preserves_non_default_port() {
333        let o = Origin::parse("https://example.org:8443/foo").unwrap();
334        assert_eq!(o.as_str(), "https://example.org:8443");
335    }
336
337    #[test]
338    fn origin_parse_lowercases_host_and_scheme() {
339        let o = Origin::parse("HTTPS://Example.ORG").unwrap();
340        assert_eq!(o.as_str(), "https://example.org");
341    }
342
343    #[test]
344    fn origin_parse_rejects_empty_and_opaque() {
345        assert!(Origin::parse("").is_none());
346        assert!(Origin::parse("not a url").is_none());
347        assert!(Origin::parse("data:text/plain,hello").is_none());
348    }
349
350    #[test]
351    fn pattern_any_matches_everything() {
352        let any = OriginPattern::parse("*").unwrap();
353        assert!(any.matches(&Origin::parse("https://example.org").unwrap()));
354        assert!(any.matches(&Origin::parse("http://foo.test:9000").unwrap()));
355    }
356
357    #[test]
358    fn pattern_exact_requires_canonical_input() {
359        assert!(OriginPattern::parse("https://example.org/").is_none());
360        let p = OriginPattern::parse("https://example.org").unwrap();
361        match p {
362            OriginPattern::Exact(o) => assert_eq!(o.as_str(), "https://example.org"),
363            _ => panic!("expected Exact"),
364        }
365    }
366
367    #[test]
368    fn pattern_wildcard_rejects_bare_apex() {
369        let p = OriginPattern::parse("https://*.example.org").unwrap();
370        assert!(!p.matches(&Origin::parse("https://example.org").unwrap()));
371        assert!(p.matches(&Origin::parse("https://app.example.org").unwrap()));
372        assert!(p.matches(&Origin::parse("https://a.b.example.org:8443").unwrap()));
373    }
374}