solid_pod_rs/multitenant.rs
1//! Pod resolution from request host (Sprint 7 §6.3, ADR-057).
2//!
3//! JSS parity: `src/utils/url.js::urlToPathWithPod` and the
4//! `subdomainsEnabled && podName` branch in `getPathFromRequest`.
5//! We lift the policy ("which pod owns this request?") out of the
6//! URL-to-filesystem mapper so that call sites (LDP, WAC, quota) can
7//! consult it uniformly without each re-parsing the Host header.
8//!
9//! # Model
10//!
11//! - [`PathResolver`] — default single-tenant behaviour. The URL path
12//! is the storage path verbatim and `pod` is `None`.
13//! - [`SubdomainResolver`] — `<pod>.<base_domain>` maps the first label
14//! to a pod identifier; bare `<base_domain>` returns the root pod
15//! (`pod: None`). Anything else (unknown subdomain tree) falls back
16//! to path-based semantics.
17//!
18//! # Security
19//!
20//! Pod labels are scrubbed of `..` sequences with the same **double-pass**
21//! algorithm JSS uses in `urlToPathWithPod` (`..` is replaced until the
22//! string stops changing, defeating the `....//` bypass). Any resulting
23//! empty or path-containing label is rejected by falling back to path
24//! mode with `pod: None`.
25
26/// Result of resolving a request to a pod + storage path.
27#[derive(Debug, Clone, PartialEq, Eq)]
28pub struct ResolvedPath {
29 /// Pod identifier, or `None` for single-tenant / root pod.
30 pub pod: Option<String>,
31 /// Storage path relative to the pod root (or global root when
32 /// `pod` is `None`). Verbatim from the URL — no percent-decoding
33 /// here; callers handle encoding via their storage trait.
34 pub storage_path: String,
35}
36
37/// Policy that maps `(host, url_path)` onto a [`ResolvedPath`].
38pub trait PodResolver: Send + Sync {
39 fn resolve(&self, host: &str, url_path: &str) -> ResolvedPath;
40}
41
42// ---------------------------------------------------------------------------
43// PathResolver — single-tenant pass-through.
44// ---------------------------------------------------------------------------
45
46/// Single-tenant / path-based resolver. Equivalent to JSS's
47/// `subdomainsEnabled=false` mode: the URL path *is* the storage path
48/// and there is no notion of a per-host pod.
49pub struct PathResolver;
50
51impl PodResolver for PathResolver {
52 fn resolve(&self, _host: &str, url_path: &str) -> ResolvedPath {
53 ResolvedPath {
54 pod: None,
55 storage_path: url_path.to_string(),
56 }
57 }
58}
59
60// ---------------------------------------------------------------------------
61// SubdomainResolver — `<pod>.<base_domain>` → pod = first label.
62// ---------------------------------------------------------------------------
63
64/// Subdomain-based resolver. Matches hosts of the form
65/// `<pod>.<base_domain>` and yields `pod = Some(<pod>)`. The bare
66/// base domain yields `pod = None` (root pod). Hosts outside the base
67/// domain tree fall back to path-based semantics.
68pub struct SubdomainResolver {
69 /// Authoritative base domain (e.g. `"example.org"`). Port is
70 /// ignored at match time; see [`strip_port`].
71 pub base_domain: String,
72}
73
74impl PodResolver for SubdomainResolver {
75 fn resolve(&self, host: &str, url_path: &str) -> ResolvedPath {
76 let host_no_port = strip_port(host);
77 let base = self.base_domain.trim().to_ascii_lowercase();
78 let host_lc = host_no_port.to_ascii_lowercase();
79
80 // Bare base domain → root pod.
81 if host_lc == base {
82 return ResolvedPath {
83 pod: None,
84 storage_path: url_path.to_string(),
85 };
86 }
87
88 // `<pod>.<base_domain>` — peel the suffix. Require the
89 // separator dot so `fooexample.org` doesn't match `example.org`.
90 let suffix = format!(".{base}");
91 if let Some(stripped) = host_lc.strip_suffix(&suffix) {
92 // Sprint 11 (row 125/162, JSS PR #307 commit 6d43e66):
93 // "subdomain mode: don't rewrite file-like paths as pod
94 // subdomains". If the leftmost label looks like a filename
95 // (ends in a common web asset extension), pass it through
96 // to the base apex instead of treating it as a pod name.
97 // This prevents `favicon.ico.pods.example.com` requests
98 // being rerouted to a non-existent pod named `favicon.ico`.
99 if is_file_like_label(stripped) {
100 return ResolvedPath {
101 pod: None,
102 storage_path: url_path.to_string(),
103 };
104 }
105
106 // Scrub `..` *first* (JSS double-pass) so that a label
107 // like `al..ice` normalises to `alice` before we decide
108 // whether it is a multi-label subdomain.
109 let safe = scrub_dotdot(stripped);
110 // Only accept single-label subdomains after scrubbing;
111 // multi-level subdomains (`a.b.example.org`) fall back to
112 // path mode so we don't accidentally promote DNS labels to
113 // pod names. Reject labels containing `/` or any residual
114 // `..` that somehow survived scrubbing.
115 if !safe.is_empty()
116 && !safe.contains('.')
117 && !safe.contains('/')
118 && !safe.contains("..")
119 {
120 return ResolvedPath {
121 pod: Some(safe),
122 storage_path: url_path.to_string(),
123 };
124 }
125 }
126
127 // Fallback policy: unknown host → path-based semantics. This
128 // mirrors JSS's `subdomainsEnabled && podName` guard: when no
129 // pod can be derived the server still serves from the shared
130 // root instead of rejecting.
131 ResolvedPath {
132 pod: None,
133 storage_path: url_path.to_string(),
134 }
135 }
136}
137
138/// Sprint 11 (row 125/162, JSS PR #307 `6d43e66`): return `true` when
139/// the hostname label looks like a filename that should be served from
140/// the base apex rather than promoted to a pod subdomain.
141///
142/// The heuristic is intentionally conservative: only a small list of
143/// common web-asset extensions matches. DNS labels are case-insensitive,
144/// so matching is case-insensitive too.
145///
146/// Matching extensions (case-insensitive):
147/// `.ttl`, `.html`, `.ico`, `.svg`, `.json`, `.jsonld`, `.png`, `.jpg`,
148/// `.jpeg`, `.gif`, `.css`, `.js`, `.woff`, `.woff2`, `.txt`.
149pub fn is_file_like_label(label: &str) -> bool {
150 // A DNS label with no dot cannot contain an extension, so cannot
151 // match. Normalise to lowercase once for the scan.
152 let lower = label.to_ascii_lowercase();
153 if !lower.contains('.') {
154 return false;
155 }
156
157 // Known web-asset extensions that JSS routes to static-serve rather
158 // than pod-rewrite.
159 const FILE_EXTENSIONS: &[&str] = &[
160 ".ttl", ".html", ".ico", ".svg", ".json", ".jsonld", ".png", ".jpg", ".jpeg", ".gif",
161 ".css", ".js", ".woff", ".woff2", ".txt",
162 ];
163
164 FILE_EXTENSIONS.iter().any(|ext| lower.ends_with(ext))
165}
166
167// ---------------------------------------------------------------------------
168// Helpers
169// ---------------------------------------------------------------------------
170
171/// Strip an optional `:<port>` suffix. IPv6 literals (which include
172/// colons) are not currently supported in subdomain mode — operators
173/// running IPv6-native setups should prefer [`PathResolver`].
174fn strip_port(host: &str) -> &str {
175 match host.rfind(':') {
176 Some(i) => &host[..i],
177 None => host,
178 }
179}
180
181/// Double-pass `..` scrub (JSS parity: `urlToPathWithPod` lines 62-66
182/// and 70-74). Repeats until the string stops shrinking, defeating the
183/// `....//` bypass.
184fn scrub_dotdot(s: &str) -> String {
185 let mut cur = s.to_string();
186 loop {
187 let next = cur.replace("..", "");
188 if next == cur {
189 return next;
190 }
191 cur = next;
192 }
193}
194
195// ---------------------------------------------------------------------------
196// Unit tests — exercise helpers; integration coverage lives in
197// `tests/tenancy_subdomain.rs`.
198// ---------------------------------------------------------------------------
199
200#[cfg(test)]
201mod tests {
202 use super::*;
203
204 #[test]
205 fn strip_port_handles_missing_port() {
206 assert_eq!(strip_port("example.org"), "example.org");
207 assert_eq!(strip_port("example.org:8080"), "example.org");
208 }
209
210 #[test]
211 fn scrub_dotdot_is_double_pass() {
212 assert_eq!(scrub_dotdot("al..ice"), "alice");
213 // `....` would become `..` after a single pass; second pass
214 // must strip it completely.
215 assert_eq!(scrub_dotdot("al....ice"), "alice");
216 assert_eq!(scrub_dotdot("safe"), "safe");
217 }
218
219 /// Sprint 12 security hardening (JSS commit 2569811): the bypass
220 /// `"....//foo"` must NOT produce `"../foo"` — iterative removal
221 /// collapses all `..` sequences so the final result is `"//foo"`.
222 #[test]
223 fn scrub_dotdot_iterative_defeats_bypass() {
224 // `....//foo` → single pass yields `..//foo` (still has `..`);
225 // iterative pass yields `//foo` (all `..` removed).
226 let result = scrub_dotdot("....//foo");
227 assert!(
228 !result.contains(".."),
229 "iterative scrub must eliminate all `..`: got {result:?}"
230 );
231 assert_eq!(result, "//foo");
232 }
233
234 /// Verify the full subdomain resolver rejects the bypass attempt.
235 /// `"....//foo"` as a subdomain label, after scrubbing, contains `/`
236 /// and therefore falls back to path mode (pod: None).
237 #[test]
238 fn subdomain_rejects_dotdot_bypass_as_pod() {
239 let r = SubdomainResolver {
240 base_domain: "pods.example.com".into(),
241 };
242 // Host header: `....//foo.pods.example.com`
243 let got = r.resolve("....//foo.pods.example.com", "/index.html");
244 assert_eq!(
245 got.pod, None,
246 "bypass attempt must not produce a pod name"
247 );
248 }
249
250 #[test]
251 fn path_resolver_ignores_host() {
252 let r = PathResolver;
253 let a = r.resolve("anything", "/x");
254 let b = r.resolve("", "/x");
255 assert_eq!(a, b);
256 assert_eq!(a.pod, None);
257 }
258
259 // -----------------------------------------------------------------
260 // Sprint 11 (row 125, 162): subdomain hardening — JSS PR #307.
261 // -----------------------------------------------------------------
262
263 #[test]
264 fn subdomain_extracts_pod_name() {
265 let r = SubdomainResolver {
266 base_domain: "pods.example.com".into(),
267 };
268 let got = r.resolve("alice.pods.example.com", "/index.html");
269 assert_eq!(got.pod.as_deref(), Some("alice"));
270 assert_eq!(got.storage_path, "/index.html");
271 }
272
273 #[test]
274 fn subdomain_file_like_label_passes_through() {
275 // PR #307 regression: `favicon.ico.pods.example.com` must NOT
276 // be rewritten to a pod named `favicon.ico`.
277 let r = SubdomainResolver {
278 base_domain: "pods.example.com".into(),
279 };
280 let got = r.resolve("favicon.ico.pods.example.com", "/");
281 assert_eq!(got.pod, None, "file-like label must pass through");
282 assert_eq!(got.storage_path, "/");
283 }
284
285 #[test]
286 fn subdomain_html_label_passes_through() {
287 let r = SubdomainResolver {
288 base_domain: "pods.example.com".into(),
289 };
290 let got = r.resolve("index.html.pods.example.com", "/");
291 assert_eq!(got.pod, None);
292 }
293
294 #[test]
295 fn subdomain_base_domain_root() {
296 let r = SubdomainResolver {
297 base_domain: "pods.example.com".into(),
298 };
299 let got = r.resolve("pods.example.com", "/hello");
300 assert_eq!(got.pod, None);
301 assert_eq!(got.storage_path, "/hello");
302 }
303
304 #[test]
305 fn is_file_like_label_matches_known_extensions() {
306 assert!(is_file_like_label("favicon.ico"));
307 assert!(is_file_like_label("style.css"));
308 assert!(is_file_like_label("bundle.js"));
309 assert!(is_file_like_label("icon.SVG"));
310 assert!(is_file_like_label("profile.jsonld"));
311 assert!(!is_file_like_label("hero.webp"), "unknown ext must not match");
312 assert!(!is_file_like_label("alice"));
313 assert!(!is_file_like_label("bob-smith"));
314 // A label with a dot but unknown extension must not match.
315 assert!(!is_file_like_label("foo.bar"));
316 }
317}