solid_pod_rs/wac/origin.rs
1//! `acl:origin` value objects and enforcement helpers (WAC §4.3 / F4).
2//!
3//! Implements the Origin gate described in
4//! `docs/design/jss-parity/03-wac-enforcement-context.md`. The gate runs
5//! **after** the existing agent / agent-class / mode / accessTo checks;
6//! if any authorisation in the effective ACL declares `acl:origin`
7//! triples, the request's `Origin` header must match one of them.
8//!
9//! This module is strictly additive: consumers that never pass an
10//! `Origin` value object observe no behavioural change, because an
11//! ACL with zero `acl:origin` triples yields [`OriginDecision::NoPolicySet`].
12//!
13//! # Ubiquitous language
14//!
15//! - **Origin**: RFC 6454 web origin, canonicalised as
16//! `scheme://host[:port]` with default ports (80/443) elided.
17//! - **OriginPattern**: a rule's declared origin list entry; exact
18//! origin, wildcard subdomain (`https://*.example.org`), or global
19//! wildcard (`*`). Global wildcard disables the gate for that rule.
20//! - **Origin gate**: the additional check that runs after agent matching.
21
22use std::collections::HashSet;
23
24use url::Url;
25
26use crate::wac::AclAuthorization;
27
28// ---------------------------------------------------------------------------
29// Origin — canonicalised `scheme://host[:port]`
30// ---------------------------------------------------------------------------
31
32/// Canonicalised web origin per RFC 6454.
33///
34/// The internal representation is a lowercased, default-port-stripped
35/// serialisation of the form `scheme://host` or `scheme://host:port`.
36/// Paths, queries and fragments are discarded; only the tuple
37/// `(scheme, host, port)` is preserved.
38#[derive(Debug, Clone, PartialEq, Eq, Hash)]
39pub struct Origin(String);
40
41impl Origin {
42 /// Parse a string into a canonical [`Origin`].
43 ///
44 /// Accepts raw origin forms (`https://example.org`,
45 /// `https://example.org:8443`) as well as full URLs; in the latter
46 /// case path/query/fragment are discarded.
47 ///
48 /// Returns `None` if the input is not a parseable URL, has no host,
49 /// or uses a scheme without a hierarchical origin (e.g. `data:`).
50 pub fn parse(s: &str) -> Option<Self> {
51 let trimmed = s.trim();
52 if trimmed.is_empty() {
53 return None;
54 }
55 let url = Url::parse(trimmed).ok()?;
56 Self::from_url(&url)
57 }
58
59 /// Extract a canonical [`Origin`] from a parsed URL.
60 ///
61 /// Returns `None` for opaque-origin schemes (schemes without a host
62 /// such as `data:`, `javascript:`, `file:` without host).
63 pub fn from_url(url: &Url) -> Option<Self> {
64 let scheme = url.scheme().to_ascii_lowercase();
65 let host = url.host_str()?.to_ascii_lowercase();
66 let port = url.port(); // None when the URL uses the default port
67 let serialised = match port {
68 None => format!("{scheme}://{host}"),
69 Some(p) => format!("{scheme}://{host}:{p}"),
70 };
71 Some(Origin(serialised))
72 }
73
74 /// Canonical serialised form (e.g. `https://example.org`,
75 /// `https://example.org:8443`).
76 pub fn as_str(&self) -> &str {
77 &self.0
78 }
79}
80
81impl std::fmt::Display for Origin {
82 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83 f.write_str(&self.0)
84 }
85}
86
87// ---------------------------------------------------------------------------
88// OriginPattern — exact / wildcard-subdomain / global wildcard
89// ---------------------------------------------------------------------------
90
91/// A rule-declared origin pattern.
92///
93/// Three forms are supported:
94///
95/// - **Exact**: `https://example.org` — matches only that origin.
96/// - **Wildcard subdomain**: `https://*.example.org` — matches any
97/// single-or-multi-level subdomain of `example.org` on the same
98/// scheme. Does **not** match the bare `example.org`.
99/// - **Global wildcard**: `*` — matches any origin. Equivalent to
100/// "origin gate effectively disabled for this rule". Discouraged;
101/// requires explicit opt-in (the caller must write `*` literally).
102#[derive(Debug, Clone, PartialEq, Eq)]
103pub enum OriginPattern {
104 /// Exact origin match.
105 Exact(Origin),
106 /// Wildcard subdomain: scheme + suffix (e.g. `https` + `example.org`).
107 Wildcard { scheme: String, suffix: String },
108 /// Global wildcard (`*`). Matches any origin.
109 Any,
110}
111
112impl OriginPattern {
113 /// Parse a pattern string.
114 ///
115 /// - `"*"` → [`OriginPattern::Any`]
116 /// - `"https://*.example.org"` → [`OriginPattern::Wildcard`]
117 /// - `"https://example.org"` / `"https://example.org:8443"` →
118 /// [`OriginPattern::Exact`]
119 ///
120 /// Returns `None` for malformed input (missing scheme, empty host,
121 /// trailing slashes, etc). The invariant in the DDD doc is that
122 /// only canonical origins are stored; the exact-origin branch uses
123 /// [`Origin::parse`] for strict validation.
124 pub fn parse(s: &str) -> Option<Self> {
125 let trimmed = s.trim();
126 if trimmed.is_empty() {
127 return None;
128 }
129 if trimmed == "*" {
130 return Some(OriginPattern::Any);
131 }
132 // Wildcard subdomain: scheme://*.suffix[:port]
133 if let Some(rest) = trimmed.strip_prefix("https://*.") {
134 return Self::parse_wildcard("https", rest);
135 }
136 if let Some(rest) = trimmed.strip_prefix("http://*.") {
137 return Self::parse_wildcard("http", rest);
138 }
139 // Exact origin — must round-trip through Origin::parse.
140 // Reject trailing slashes to match the DDD invariant
141 // "canonical scheme://host[:port]" only.
142 if trimmed.ends_with('/') {
143 return None;
144 }
145 let origin = Origin::parse(trimmed)?;
146 // Reject if Origin::parse canonicalised away user-supplied
147 // content (e.g. user supplied a path we quietly stripped).
148 let lc = trimmed.to_ascii_lowercase();
149 if origin.as_str() != lc {
150 return None;
151 }
152 Some(OriginPattern::Exact(origin))
153 }
154
155 fn parse_wildcard(scheme: &str, suffix_part: &str) -> Option<Self> {
156 // suffix_part is e.g. "example.org" or "example.org:8443".
157 if suffix_part.is_empty() {
158 return None;
159 }
160 // Basic host-shape validation: reject whitespace, empty labels,
161 // stray wildcard characters in the suffix itself.
162 if suffix_part.contains(char::is_whitespace) || suffix_part.contains('*') {
163 return None;
164 }
165 // Reject trailing slash / path segments.
166 if suffix_part.contains('/') {
167 return None;
168 }
169 Some(OriginPattern::Wildcard {
170 scheme: scheme.to_string(),
171 suffix: suffix_part.to_ascii_lowercase(),
172 })
173 }
174
175 /// Test whether a request origin matches this pattern.
176 pub fn matches(&self, origin: &Origin) -> bool {
177 match self {
178 OriginPattern::Any => true,
179 OriginPattern::Exact(expected) => expected == origin,
180 OriginPattern::Wildcard { scheme, suffix } => {
181 // Rebuild expected tuple from the request origin.
182 let serialised = origin.as_str();
183 let (req_scheme, req_rest) = match serialised.split_once("://") {
184 Some(v) => v,
185 None => return false,
186 };
187 if req_scheme != scheme {
188 return false;
189 }
190 // req_rest is host[:port]; match if host ends with
191 // ".{suffix}" with at least one non-empty label in
192 // front. Ports are not part of the "which suffix"
193 // question so they are stripped before comparison.
194 let req_host = match req_rest.split_once(':') {
195 Some((h, _)) => h,
196 None => req_rest,
197 };
198 let pattern_suffix = match suffix.split_once(':') {
199 Some((h, _)) => h,
200 None => suffix.as_str(),
201 };
202 let needle = format!(".{pattern_suffix}");
203 req_host.ends_with(&needle) && req_host.len() > needle.len()
204 }
205 }
206 }
207}
208
209// ---------------------------------------------------------------------------
210// Extraction from ACL rules + aggregate-level decision
211// ---------------------------------------------------------------------------
212
213/// Extract `acl:origin` patterns from a single authorisation.
214///
215/// Malformed entries are silently dropped (matches the forgiving
216/// posture of [`super::parse_turtle_acl`]); strict validation happens
217/// at write time, not read time.
218pub fn extract_origin_patterns(auth: &AclAuthorization) -> Vec<OriginPattern> {
219 let mut out = Vec::new();
220 if let Some(ids) = &auth.origin {
221 for id in iter_ids(ids) {
222 if let Some(p) = OriginPattern::parse(id) {
223 out.push(p);
224 }
225 }
226 }
227 out
228}
229
230fn iter_ids(ids: &crate::wac::IdOrIds) -> Vec<&str> {
231 match ids {
232 crate::wac::IdOrIds::Single(r) => vec![r.id.as_str()],
233 crate::wac::IdOrIds::Multiple(v) => v.iter().map(|r| r.id.as_str()).collect(),
234 }
235}
236
237/// Origin-gate decision for a request against an ACL document.
238#[derive(Debug, Clone, Copy, PartialEq, Eq)]
239pub enum OriginDecision {
240 /// No authorisation in the ACL declares `acl:origin`. Permissive:
241 /// backward-compatible with pre-F4 ACLs.
242 NoPolicySet,
243 /// Request origin matches at least one authorisation's pattern.
244 Permitted,
245 /// Policies exist and the request origin does not match any of them.
246 RejectedMismatch,
247 /// Policies exist and the request carries no `Origin` header.
248 RejectedNoOrigin,
249}
250
251/// Check whether the request origin is permitted by any authorisation's
252/// origin patterns in the supplied ACL document.
253///
254/// Semantics:
255///
256/// - If no authorisation declares `acl:origin` → [`OriginDecision::NoPolicySet`]
257/// (gate inactive; backward compatible).
258/// - If any authorisation declares patterns and the request origin
259/// matches at least one → [`OriginDecision::Permitted`].
260/// - If patterns exist but the request origin doesn't match any →
261/// [`OriginDecision::RejectedMismatch`].
262/// - If patterns exist and no `Origin` header was supplied →
263/// [`OriginDecision::RejectedNoOrigin`].
264///
265/// This check is ACL-document-wide, matching the doc's integration
266/// semantics for v0.4.0; rule-level short-circuiting is performed by
267/// the evaluator when combined with agent matching.
268pub fn check_origin(
269 acl: &crate::wac::AclDocument,
270 request_origin: Option<&Origin>,
271) -> OriginDecision {
272 let graph = match acl.graph.as_ref() {
273 Some(g) => g,
274 None => return OriginDecision::NoPolicySet,
275 };
276 let mut any_patterns = false;
277 let mut matched = false;
278 // Deduplicate patterns via a HashSet of their canonical string forms
279 // to avoid quadratic work when the same origin is repeated across
280 // many rules. Using Vec would also work; HashSet documents intent.
281 let mut seen: HashSet<String> = HashSet::new();
282 for auth in graph {
283 for pattern in extract_origin_patterns(auth) {
284 let key = pattern_key(&pattern);
285 if !seen.insert(key) {
286 continue;
287 }
288 any_patterns = true;
289 if let Some(req) = request_origin {
290 if pattern.matches(req) {
291 matched = true;
292 }
293 }
294 }
295 }
296 if !any_patterns {
297 OriginDecision::NoPolicySet
298 } else if matched {
299 OriginDecision::Permitted
300 } else if request_origin.is_none() {
301 OriginDecision::RejectedNoOrigin
302 } else {
303 OriginDecision::RejectedMismatch
304 }
305}
306
307fn pattern_key(p: &OriginPattern) -> String {
308 match p {
309 OriginPattern::Any => "*".to_string(),
310 OriginPattern::Exact(o) => format!("exact:{}", o.as_str()),
311 OriginPattern::Wildcard { scheme, suffix } => {
312 format!("wild:{scheme}://*.{suffix}")
313 }
314 }
315}
316
317// ---------------------------------------------------------------------------
318// Tests (module-local; broader integration lives in tests/acl_origin_test.rs)
319// ---------------------------------------------------------------------------
320
321#[cfg(test)]
322mod tests {
323 use super::*;
324
325 #[test]
326 fn origin_parse_strips_default_https_port() {
327 let o = Origin::parse("https://example.org:443/foo").unwrap();
328 assert_eq!(o.as_str(), "https://example.org");
329 }
330
331 #[test]
332 fn origin_parse_preserves_non_default_port() {
333 let o = Origin::parse("https://example.org:8443/foo").unwrap();
334 assert_eq!(o.as_str(), "https://example.org:8443");
335 }
336
337 #[test]
338 fn origin_parse_lowercases_host_and_scheme() {
339 let o = Origin::parse("HTTPS://Example.ORG").unwrap();
340 assert_eq!(o.as_str(), "https://example.org");
341 }
342
343 #[test]
344 fn origin_parse_rejects_empty_and_opaque() {
345 assert!(Origin::parse("").is_none());
346 assert!(Origin::parse("not a url").is_none());
347 assert!(Origin::parse("data:text/plain,hello").is_none());
348 }
349
350 #[test]
351 fn pattern_any_matches_everything() {
352 let any = OriginPattern::parse("*").unwrap();
353 assert!(any.matches(&Origin::parse("https://example.org").unwrap()));
354 assert!(any.matches(&Origin::parse("http://foo.test:9000").unwrap()));
355 }
356
357 #[test]
358 fn pattern_exact_requires_canonical_input() {
359 assert!(OriginPattern::parse("https://example.org/").is_none());
360 let p = OriginPattern::parse("https://example.org").unwrap();
361 match p {
362 OriginPattern::Exact(o) => assert_eq!(o.as_str(), "https://example.org"),
363 _ => panic!("expected Exact"),
364 }
365 }
366
367 #[test]
368 fn pattern_wildcard_rejects_bare_apex() {
369 let p = OriginPattern::parse("https://*.example.org").unwrap();
370 assert!(!p.matches(&Origin::parse("https://example.org").unwrap()));
371 assert!(p.matches(&Origin::parse("https://app.example.org").unwrap()));
372 assert!(p.matches(&Origin::parse("https://a.b.example.org:8443").unwrap()));
373 }
374}