Skip to main content

solid_pod_rs/
multitenant.rs

1//! Pod resolution from request host (Sprint 7 §6.3, ADR-057).
2//!
3//! JSS parity: `src/utils/url.js::urlToPathWithPod` and the
4//! `subdomainsEnabled && podName` branch in `getPathFromRequest`.
5//! We lift the policy ("which pod owns this request?") out of the
6//! URL-to-filesystem mapper so that call sites (LDP, WAC, quota) can
7//! consult it uniformly without each re-parsing the Host header.
8//!
9//! # Model
10//!
11//! - [`PathResolver`] — default single-tenant behaviour. The URL path
12//!   is the storage path verbatim and `pod` is `None`.
13//! - [`SubdomainResolver`] — `<pod>.<base_domain>` maps the first label
14//!   to a pod identifier; bare `<base_domain>` returns the root pod
15//!   (`pod: None`). Anything else (unknown subdomain tree) falls back
16//!   to path-based semantics.
17//!
18//! # Security
19//!
20//! Pod labels are scrubbed of `..` sequences with the same **double-pass**
21//! algorithm JSS uses in `urlToPathWithPod` (`..` is replaced until the
22//! string stops changing, defeating the `....//` bypass). Any resulting
23//! empty or path-containing label is rejected by falling back to path
24//! mode with `pod: None`.
25
26/// Result of resolving a request to a pod + storage path.
27#[derive(Debug, Clone, PartialEq, Eq)]
28pub struct ResolvedPath {
29    /// Pod identifier, or `None` for single-tenant / root pod.
30    pub pod: Option<String>,
31    /// Storage path relative to the pod root (or global root when
32    /// `pod` is `None`). Verbatim from the URL — no percent-decoding
33    /// here; callers handle encoding via their storage trait.
34    pub storage_path: String,
35}
36
37/// Policy that maps `(host, url_path)` onto a [`ResolvedPath`].
38pub trait PodResolver: Send + Sync {
39    fn resolve(&self, host: &str, url_path: &str) -> ResolvedPath;
40}
41
42// ---------------------------------------------------------------------------
43// PathResolver — single-tenant pass-through.
44// ---------------------------------------------------------------------------
45
46/// Single-tenant / path-based resolver. Equivalent to JSS's
47/// `subdomainsEnabled=false` mode: the URL path *is* the storage path
48/// and there is no notion of a per-host pod.
49pub struct PathResolver;
50
51impl PodResolver for PathResolver {
52    fn resolve(&self, _host: &str, url_path: &str) -> ResolvedPath {
53        ResolvedPath {
54            pod: None,
55            storage_path: url_path.to_string(),
56        }
57    }
58}
59
60// ---------------------------------------------------------------------------
61// SubdomainResolver — `<pod>.<base_domain>` → pod = first label.
62// ---------------------------------------------------------------------------
63
64/// Subdomain-based resolver. Matches hosts of the form
65/// `<pod>.<base_domain>` and yields `pod = Some(<pod>)`. The bare
66/// base domain yields `pod = None` (root pod). Hosts outside the base
67/// domain tree fall back to path-based semantics.
68pub struct SubdomainResolver {
69    /// Authoritative base domain (e.g. `"example.org"`). Port is
70    /// ignored at match time; see [`strip_port`].
71    pub base_domain: String,
72}
73
74impl PodResolver for SubdomainResolver {
75    fn resolve(&self, host: &str, url_path: &str) -> ResolvedPath {
76        let host_no_port = strip_port(host);
77        let base = self.base_domain.trim().to_ascii_lowercase();
78        let host_lc = host_no_port.to_ascii_lowercase();
79
80        // Bare base domain → root pod.
81        if host_lc == base {
82            return ResolvedPath {
83                pod: None,
84                storage_path: url_path.to_string(),
85            };
86        }
87
88        // `<pod>.<base_domain>` — peel the suffix. Require the
89        // separator dot so `fooexample.org` doesn't match `example.org`.
90        let suffix = format!(".{base}");
91        if let Some(stripped) = host_lc.strip_suffix(&suffix) {
92            // Sprint 11 (row 125/162, JSS PR #307 commit 6d43e66):
93            // "subdomain mode: don't rewrite file-like paths as pod
94            // subdomains". If the leftmost label looks like a filename
95            // (ends in a common web asset extension), pass it through
96            // to the base apex instead of treating it as a pod name.
97            // This prevents `favicon.ico.pods.example.com` requests
98            // being rerouted to a non-existent pod named `favicon.ico`.
99            if is_file_like_label(stripped) {
100                return ResolvedPath {
101                    pod: None,
102                    storage_path: url_path.to_string(),
103                };
104            }
105
106            // Scrub `..` *first* (JSS double-pass) so that a label
107            // like `al..ice` normalises to `alice` before we decide
108            // whether it is a multi-label subdomain.
109            let safe = scrub_dotdot(stripped);
110            // Only accept single-label subdomains after scrubbing;
111            // multi-level subdomains (`a.b.example.org`) fall back to
112            // path mode so we don't accidentally promote DNS labels to
113            // pod names. Reject labels containing `/` or any residual
114            // `..` that somehow survived scrubbing.
115            if !safe.is_empty()
116                && !safe.contains('.')
117                && !safe.contains('/')
118                && !safe.contains("..")
119            {
120                return ResolvedPath {
121                    pod: Some(safe),
122                    storage_path: url_path.to_string(),
123                };
124            }
125        }
126
127        // Fallback policy: unknown host → path-based semantics. This
128        // mirrors JSS's `subdomainsEnabled && podName` guard: when no
129        // pod can be derived the server still serves from the shared
130        // root instead of rejecting.
131        ResolvedPath {
132            pod: None,
133            storage_path: url_path.to_string(),
134        }
135    }
136}
137
138/// Sprint 11 (row 125/162, JSS PR #307 `6d43e66`): return `true` when
139/// the hostname label looks like a filename that should be served from
140/// the base apex rather than promoted to a pod subdomain.
141///
142/// The heuristic is intentionally conservative: only a small list of
143/// common web-asset extensions matches. DNS labels are case-insensitive,
144/// so matching is case-insensitive too.
145///
146/// Matching extensions (case-insensitive):
147/// `.ttl`, `.html`, `.ico`, `.svg`, `.json`, `.jsonld`, `.png`, `.jpg`,
148/// `.jpeg`, `.gif`, `.css`, `.js`, `.woff`, `.woff2`, `.txt`.
149pub fn is_file_like_label(label: &str) -> bool {
150    // A DNS label with no dot cannot contain an extension, so cannot
151    // match. Normalise to lowercase once for the scan.
152    let lower = label.to_ascii_lowercase();
153    if !lower.contains('.') {
154        return false;
155    }
156
157    // Known web-asset extensions that JSS routes to static-serve rather
158    // than pod-rewrite.
159    const FILE_EXTENSIONS: &[&str] = &[
160        ".ttl", ".html", ".ico", ".svg", ".json", ".jsonld", ".png", ".jpg", ".jpeg", ".gif",
161        ".css", ".js", ".woff", ".woff2", ".txt",
162    ];
163
164    FILE_EXTENSIONS.iter().any(|ext| lower.ends_with(ext))
165}
166
167// ---------------------------------------------------------------------------
168// Helpers
169// ---------------------------------------------------------------------------
170
171/// Strip an optional `:<port>` suffix. IPv6 literals (which include
172/// colons) are not currently supported in subdomain mode — operators
173/// running IPv6-native setups should prefer [`PathResolver`].
174fn strip_port(host: &str) -> &str {
175    match host.rfind(':') {
176        Some(i) => &host[..i],
177        None => host,
178    }
179}
180
181/// Double-pass `..` scrub (JSS parity: `urlToPathWithPod` lines 62-66
182/// and 70-74). Repeats until the string stops shrinking, defeating the
183/// `....//` bypass.
184fn scrub_dotdot(s: &str) -> String {
185    let mut cur = s.to_string();
186    loop {
187        let next = cur.replace("..", "");
188        if next == cur {
189            return next;
190        }
191        cur = next;
192    }
193}
194
195// ---------------------------------------------------------------------------
196// Unit tests — exercise helpers; integration coverage lives in
197// `tests/tenancy_subdomain.rs`.
198// ---------------------------------------------------------------------------
199
200#[cfg(test)]
201mod tests {
202    use super::*;
203
204    #[test]
205    fn strip_port_handles_missing_port() {
206        assert_eq!(strip_port("example.org"), "example.org");
207        assert_eq!(strip_port("example.org:8080"), "example.org");
208    }
209
210    #[test]
211    fn scrub_dotdot_is_double_pass() {
212        assert_eq!(scrub_dotdot("al..ice"), "alice");
213        // `....` would become `..` after a single pass; second pass
214        // must strip it completely.
215        assert_eq!(scrub_dotdot("al....ice"), "alice");
216        assert_eq!(scrub_dotdot("safe"), "safe");
217    }
218
219    /// Sprint 12 security hardening (JSS commit 2569811): the bypass
220    /// `"....//foo"` must NOT produce `"../foo"` — iterative removal
221    /// collapses all `..` sequences so the final result is `"//foo"`.
222    #[test]
223    fn scrub_dotdot_iterative_defeats_bypass() {
224        // `....//foo` → single pass yields `..//foo` (still has `..`);
225        // iterative pass yields `//foo` (all `..` removed).
226        let result = scrub_dotdot("....//foo");
227        assert!(
228            !result.contains(".."),
229            "iterative scrub must eliminate all `..`: got {result:?}"
230        );
231        assert_eq!(result, "//foo");
232    }
233
234    /// Verify the full subdomain resolver rejects the bypass attempt.
235    /// `"....//foo"` as a subdomain label, after scrubbing, contains `/`
236    /// and therefore falls back to path mode (pod: None).
237    #[test]
238    fn subdomain_rejects_dotdot_bypass_as_pod() {
239        let r = SubdomainResolver {
240            base_domain: "pods.example.com".into(),
241        };
242        // Host header: `....//foo.pods.example.com`
243        let got = r.resolve("....//foo.pods.example.com", "/index.html");
244        assert_eq!(
245            got.pod, None,
246            "bypass attempt must not produce a pod name"
247        );
248    }
249
250    #[test]
251    fn path_resolver_ignores_host() {
252        let r = PathResolver;
253        let a = r.resolve("anything", "/x");
254        let b = r.resolve("", "/x");
255        assert_eq!(a, b);
256        assert_eq!(a.pod, None);
257    }
258
259    // -----------------------------------------------------------------
260    // Sprint 11 (row 125, 162): subdomain hardening — JSS PR #307.
261    // -----------------------------------------------------------------
262
263    #[test]
264    fn subdomain_extracts_pod_name() {
265        let r = SubdomainResolver {
266            base_domain: "pods.example.com".into(),
267        };
268        let got = r.resolve("alice.pods.example.com", "/index.html");
269        assert_eq!(got.pod.as_deref(), Some("alice"));
270        assert_eq!(got.storage_path, "/index.html");
271    }
272
273    #[test]
274    fn subdomain_file_like_label_passes_through() {
275        // PR #307 regression: `favicon.ico.pods.example.com` must NOT
276        // be rewritten to a pod named `favicon.ico`.
277        let r = SubdomainResolver {
278            base_domain: "pods.example.com".into(),
279        };
280        let got = r.resolve("favicon.ico.pods.example.com", "/");
281        assert_eq!(got.pod, None, "file-like label must pass through");
282        assert_eq!(got.storage_path, "/");
283    }
284
285    #[test]
286    fn subdomain_html_label_passes_through() {
287        let r = SubdomainResolver {
288            base_domain: "pods.example.com".into(),
289        };
290        let got = r.resolve("index.html.pods.example.com", "/");
291        assert_eq!(got.pod, None);
292    }
293
294    #[test]
295    fn subdomain_base_domain_root() {
296        let r = SubdomainResolver {
297            base_domain: "pods.example.com".into(),
298        };
299        let got = r.resolve("pods.example.com", "/hello");
300        assert_eq!(got.pod, None);
301        assert_eq!(got.storage_path, "/hello");
302    }
303
304    #[test]
305    fn is_file_like_label_matches_known_extensions() {
306        assert!(is_file_like_label("favicon.ico"));
307        assert!(is_file_like_label("style.css"));
308        assert!(is_file_like_label("bundle.js"));
309        assert!(is_file_like_label("icon.SVG"));
310        assert!(is_file_like_label("profile.jsonld"));
311        assert!(!is_file_like_label("hero.webp"), "unknown ext must not match");
312        assert!(!is_file_like_label("alice"));
313        assert!(!is_file_like_label("bob-smith"));
314        // A label with a dot but unknown extension must not match.
315        assert!(!is_file_like_label("foo.bar"));
316    }
317}