multistore_path_mapping/lib.rs
1//! Hierarchical path mapping for the multistore S3 proxy gateway.
2//!
3//! S3 uses a flat namespace: each bucket is an independent container resolved
4//! to a single backend. Some applications need a *hierarchical* URL scheme
5//! where multiple path segments determine which backend to use. For example,
6//! a data catalog might expose `/{account}/{product}/{key}` but store each
7//! account/product pair in its own backend bucket.
8//!
9//! This crate bridges those two worlds:
10//!
11//! - **[`PathMapping`]** defines *how many* leading URL segments form the
12//! logical "bucket", what separator joins them into an internal name, and
13//! how many segments appear as the display name in S3 XML responses.
14//!
15//! - **[`PathMapping::rewrite_request`]** rewrites an incoming `(path, query)`
16//! pair so the gateway sees a single-segment bucket. It handles both
17//! path-based routing (`/{a}/{b}/{key}` → `/{a:b}/{key}`) and query-based
18//! prefix routing (`/{a}?prefix=b/sub/` → `/{a:b}?prefix=sub/`).
19//!
20//! - **[`MappedRegistry`]** wraps any [`BucketRegistry`] and automatically
21//! applies display-name and list-rewrite rules so XML responses show the
22//! original hierarchical names to clients.
23//!
24//! # Example
25//!
26//! ```rust
27//! use multistore_path_mapping::PathMapping;
28//!
29//! let mapping = PathMapping {
30//! bucket_segments: 2,
31//! bucket_separator: ":".into(),
32//! display_bucket_segments: 1,
33//! };
34//!
35//! // Path-based: two segments become one internal bucket
36//! let mapped = mapping.parse("/acme/data/report.csv").unwrap();
37//! assert_eq!(mapped.bucket, "acme:data");
38//! assert_eq!(mapped.key, Some("report.csv".to_string()));
39//! assert_eq!(mapped.display_bucket, "acme");
40//!
41//! // Full request rewrite (path + query)
42//! let result = mapping.rewrite_request(
43//! "/acme/data/report.csv",
44//! None,
45//! );
46//! assert_eq!(result.path, "/acme:data/report.csv");
47//! assert_eq!(result.query, None);
48//! assert_eq!(result.signing_path, "/acme/data/report.csv");
49//!
50//! // Prefix-based list rewrite
51//! let result = mapping.rewrite_request(
52//! "/acme",
53//! Some("list-type=2&prefix=data/subdir/"),
54//! );
55//! assert_eq!(result.path, "/acme:data");
56//! assert_eq!(result.query, Some("list-type=2&prefix=subdir/".to_string()));
57//! assert_eq!(result.signing_query, Some("list-type=2&prefix=data/subdir/".to_string()));
58//! ```
59
60use multistore::api::list_rewrite::ListRewrite;
61use multistore::registry::{BucketRegistry, ResolvedBucket};
62
63/// Defines how URL path segments map to internal bucket names.
64#[derive(Debug, Clone)]
65pub struct PathMapping {
66 /// Number of path segments that form the "bucket" portion.
67 /// E.g., 2 for `/{account}/{product}/...`
68 pub bucket_segments: usize,
69
70 /// Separator to join segments into an internal bucket name.
71 /// E.g., ":" produces `account:product`.
72 pub bucket_separator: String,
73
74 /// How many leading segments form the "display bucket" name for XML responses.
75 /// E.g., 1 means `<Name>` shows just `account`.
76 pub display_bucket_segments: usize,
77}
78
79/// Result of rewriting a request path and query string.
80///
81/// Contains both the rewritten values (for S3 operation parsing) and the
82/// original values (for SigV4 signature verification).
83#[derive(Debug, Clone, PartialEq, Eq)]
84pub struct RewriteResult {
85 /// Rewritten path for S3 operation parsing.
86 pub path: String,
87 /// Rewritten query for operation parsing.
88 pub query: Option<String>,
89 /// Original client path for SigV4 verification.
90 pub signing_path: String,
91 /// Original client query for SigV4 verification.
92 pub signing_query: Option<String>,
93}
94
95/// Result of mapping a request path.
96#[derive(Debug, Clone, PartialEq, Eq)]
97pub struct MappedPath {
98 /// Internal bucket name (e.g., "account:product")
99 pub bucket: String,
100 /// Remaining key after bucket segments (e.g., "file.parquet")
101 pub key: Option<String>,
102 /// Display bucket name for XML responses (e.g., "account")
103 pub display_bucket: String,
104 /// Key prefix to add in XML responses (e.g., "product/")
105 pub key_prefix: String,
106 /// The individual path segments that formed the bucket
107 pub segments: Vec<String>,
108}
109
110impl PathMapping {
111 /// Parse a URL path into a [`MappedPath`].
112 ///
113 /// The path is expected to start with `/`. Segments are split on `/`,
114 /// and the first `bucket_segments` segments form the internal bucket name.
115 /// Any remaining content becomes the key.
116 ///
117 /// Returns `None` if there are fewer than `bucket_segments` non-empty segments.
118 pub fn parse(&self, path: &str) -> Option<MappedPath> {
119 let trimmed = path.strip_prefix('/').unwrap_or(path);
120 if trimmed.is_empty() {
121 return None;
122 }
123
124 // Split into at most bucket_segments + 1 parts so the key portion
125 // preserves any internal `/` characters.
126 let parts: Vec<&str> = trimmed.splitn(self.bucket_segments + 1, '/').collect();
127
128 if parts.len() < self.bucket_segments {
129 return None;
130 }
131
132 // Verify none of the bucket segments are empty.
133 for part in &parts[..self.bucket_segments] {
134 if part.is_empty() {
135 return None;
136 }
137 }
138
139 let segments: Vec<String> = parts[..self.bucket_segments]
140 .iter()
141 .map(|s| s.to_string())
142 .collect();
143
144 let bucket = segments.join(&self.bucket_separator);
145
146 let key = if parts.len() > self.bucket_segments {
147 let k = parts[self.bucket_segments];
148 if k.is_empty() {
149 None
150 } else {
151 Some(k.to_string())
152 }
153 } else {
154 None
155 };
156
157 let display_bucket = segments[..self.display_bucket_segments].join("/");
158
159 let key_prefix = if self.display_bucket_segments < self.bucket_segments {
160 let prefix_parts = &segments[self.display_bucket_segments..self.bucket_segments];
161 format!("{}/", prefix_parts.join("/"))
162 } else {
163 String::new()
164 };
165
166 Some(MappedPath {
167 bucket,
168 key,
169 display_bucket,
170 key_prefix,
171 segments,
172 })
173 }
174
175 /// Parse a bucket name (e.g., "account:product") back into a [`MappedPath`].
176 ///
177 /// Used by [`MappedRegistry`] when it receives an already-mapped bucket name.
178 /// Returns `None` if the bucket name does not split into exactly `bucket_segments` parts.
179 pub fn parse_bucket_name(&self, bucket_name: &str) -> Option<MappedPath> {
180 let segments: Vec<String> = bucket_name
181 .split(&self.bucket_separator)
182 .map(|s| s.to_string())
183 .collect();
184
185 if segments.len() != self.bucket_segments {
186 return None;
187 }
188
189 // Verify none of the segments are empty.
190 for seg in &segments {
191 if seg.is_empty() {
192 return None;
193 }
194 }
195
196 let display_bucket = segments[..self.display_bucket_segments].join("/");
197
198 let key_prefix = if self.display_bucket_segments < self.bucket_segments {
199 let prefix_parts = &segments[self.display_bucket_segments..self.bucket_segments];
200 format!("{}/", prefix_parts.join("/"))
201 } else {
202 String::new()
203 };
204
205 Some(MappedPath {
206 bucket: bucket_name.to_string(),
207 key: None,
208 display_bucket,
209 key_prefix,
210 segments,
211 })
212 }
213
214 /// Rewrite an incoming request path and query string for the gateway.
215 ///
216 /// Translates hierarchical paths into internal single-segment bucket paths:
217 ///
218 /// 1. **Path-based**: if the path has enough segments, they are joined into
219 /// a single bucket name.
220 /// `/{a}/{b}/{key}` → `/{a:b}/{key}`
221 ///
222 /// 2. **Prefix-based**: if the path has fewer segments than required but the
223 /// query string contains a `list-type=` param with a non-empty `prefix=`,
224 /// the first component of the prefix is folded into the bucket name.
225 /// `/{a}?list-type=2&prefix=b/sub/` → `/{a:b}?list-type=2&prefix=sub/`
226 ///
227 /// 3. **Pass-through**: all other paths are returned unchanged. Route handlers
228 /// or the gateway itself will handle them.
229 pub fn rewrite_request(&self, path: &str, query: Option<&str>) -> RewriteResult {
230 let signing_path = path.to_string();
231 let signing_query = query.map(|q| q.to_string());
232
233 // Case 1: enough path segments to map directly
234 if let Some(mapped) = self.parse(path) {
235 let rewritten_path = match mapped.key {
236 Some(ref key) => format!("/{}/{}", mapped.bucket, key),
237 None => format!("/{}", mapped.bucket),
238 };
239 return RewriteResult {
240 path: rewritten_path,
241 query: query.map(|q| q.to_string()),
242 signing_path,
243 signing_query,
244 };
245 }
246
247 // Case 2: single-segment path with a list-type query and non-empty prefix
248 let trimmed = path.trim_matches('/');
249 if !trimmed.is_empty() && !trimmed.contains('/') {
250 let query_str = query.unwrap_or("");
251 if is_list_request(query_str) {
252 if let Some(prefix) = extract_query_param(query_str, "prefix") {
253 if !prefix.is_empty() {
254 let (rewritten_path, rewritten_query) =
255 self.rewrite_prefix_to_bucket(trimmed, &prefix, query_str);
256 return RewriteResult {
257 path: rewritten_path,
258 query: rewritten_query,
259 signing_path,
260 signing_query,
261 };
262 }
263 }
264 }
265 }
266
267 // Case 3: pass through unchanged
268 RewriteResult {
269 path: path.to_string(),
270 query: query.map(|q| q.to_string()),
271 signing_path,
272 signing_query,
273 }
274 }
275
276 /// Fold the first prefix component into the bucket name.
277 ///
278 /// `/{account}?prefix=product/sub/` → `/{account:product}?prefix=sub/`
279 fn rewrite_prefix_to_bucket(
280 &self,
281 account: &str,
282 prefix: &str,
283 query_str: &str,
284 ) -> (String, Option<String>) {
285 let (product, remaining_prefix) = if let Some(slash_pos) = prefix.find('/') {
286 (&prefix[..slash_pos], &prefix[slash_pos + 1..])
287 } else {
288 (prefix, "")
289 };
290
291 let bucket = format!("{}{}{}", account, self.bucket_separator, product);
292 let new_query = rewrite_prefix_in_query(query_str, remaining_prefix);
293 (format!("/{}", bucket), Some(new_query))
294 }
295}
296
297// ── Query-string helpers (private) ──────────────────────────────────
298
299/// Check whether a query string contains a `list-type=` parameter.
300fn is_list_request(query: &str) -> bool {
301 query.split('&').any(|p| p.starts_with("list-type="))
302}
303
304/// Extract and percent-decode a single query parameter value.
305fn extract_query_param(query: &str, key: &str) -> Option<String> {
306 query.split('&').find_map(|pair| {
307 pair.split_once('=')
308 .filter(|(k, _)| *k == key)
309 .map(|(_, v)| {
310 percent_encoding::percent_decode_str(v)
311 .decode_utf8_lossy()
312 .into_owned()
313 })
314 })
315}
316
317/// Characters that must be percent-encoded when placed in a query parameter value.
318const QUERY_VALUE_ENCODE: &percent_encoding::AsciiSet = &percent_encoding::CONTROLS
319 .add(b' ')
320 .add(b'#')
321 .add(b'&')
322 .add(b'=')
323 .add(b'+');
324
325/// Replace the `prefix=` value in a query string, percent-encoding the new value.
326fn rewrite_prefix_in_query(query: &str, new_prefix: &str) -> String {
327 let encoded: String =
328 percent_encoding::utf8_percent_encode(new_prefix, QUERY_VALUE_ENCODE).to_string();
329 query
330 .split('&')
331 .map(|pair| {
332 if pair.starts_with("prefix=") {
333 format!("prefix={}", encoded)
334 } else {
335 pair.to_string()
336 }
337 })
338 .collect::<Vec<_>>()
339 .join("&")
340}
341
342#[cfg(test)]
343mod tests {
344 use super::*;
345
346 #[test]
347 fn is_list_request_detects_list_type() {
348 assert!(is_list_request("list-type=2"));
349 assert!(is_list_request("foo=bar&list-type=2&baz=qux"));
350 assert!(!is_list_request("foo=bar"));
351 assert!(!is_list_request(""));
352 }
353
354 #[test]
355 fn is_list_request_rejects_substring_match() {
356 assert!(!is_list_request("not-list-type=2"));
357 assert!(!is_list_request("foo=bar¬-list-type=2"));
358 }
359
360 #[test]
361 fn extract_query_param_finds_value() {
362 assert_eq!(
363 extract_query_param("list-type=2&prefix=foo/", "prefix"),
364 Some("foo/".to_string())
365 );
366 }
367
368 #[test]
369 fn extract_query_param_missing() {
370 assert_eq!(extract_query_param("list-type=2", "prefix"), None);
371 }
372
373 #[test]
374 fn extract_query_param_decodes_percent() {
375 assert_eq!(
376 extract_query_param("prefix=hello%20world", "prefix"),
377 Some("hello world".to_string())
378 );
379 }
380
381 #[test]
382 fn rewrite_prefix_replaces_value() {
383 assert_eq!(
384 rewrite_prefix_in_query("list-type=2&prefix=old/", "new/"),
385 "list-type=2&prefix=new/"
386 );
387 }
388
389 #[test]
390 fn rewrite_prefix_to_empty() {
391 assert_eq!(
392 rewrite_prefix_in_query("prefix=old/&max-keys=100", ""),
393 "prefix=&max-keys=100"
394 );
395 }
396
397 #[test]
398 fn rewrite_prefix_encodes_special_chars() {
399 assert_eq!(
400 rewrite_prefix_in_query("list-type=2&prefix=old/", "sub dir/"),
401 "list-type=2&prefix=sub%20dir/"
402 );
403 }
404}
405
406// ── MappedRegistry ──────────────────────────────────────────────────
407
408/// Wraps a [`BucketRegistry`] to add path-based routing.
409///
410/// When `get_bucket` is called, the bucket name is parsed via
411/// [`PathMapping::parse_bucket_name`] and the resulting [`ListRewrite`]
412/// and `display_name` are applied to the resolved bucket. This allows the
413/// gateway to present hierarchical names in S3 XML responses while storing
414/// data in flat internal buckets.
415#[derive(Debug, Clone)]
416pub struct MappedRegistry<R> {
417 inner: R,
418 mapping: PathMapping,
419}
420
421impl<R> MappedRegistry<R> {
422 /// Create a new `MappedRegistry` wrapping the given registry with a path mapping.
423 pub fn new(inner: R, mapping: PathMapping) -> Self {
424 Self { inner, mapping }
425 }
426}
427
428impl<R: BucketRegistry> BucketRegistry for MappedRegistry<R> {
429 async fn get_bucket(
430 &self,
431 name: &str,
432 identity: &multistore::types::ResolvedIdentity,
433 operation: &multistore::types::S3Operation,
434 ) -> Result<ResolvedBucket, multistore::error::ProxyError> {
435 let mapped = self.mapping.parse_bucket_name(name);
436
437 let mut resolved = self.inner.get_bucket(name, identity, operation).await?;
438
439 if let Some(mapped) = mapped {
440 tracing::debug!(
441 bucket = %name,
442 display_name = %mapped.display_bucket,
443 key_prefix = %mapped.key_prefix,
444 "Applying path mapping to resolved bucket"
445 );
446
447 resolved.display_name = Some(mapped.display_bucket);
448
449 if !mapped.key_prefix.is_empty() {
450 resolved.list_rewrite = Some(ListRewrite {
451 strip_prefix: String::new(),
452 add_prefix: mapped.key_prefix,
453 });
454 }
455 }
456
457 Ok(resolved)
458 }
459
460 async fn list_buckets(
461 &self,
462 identity: &multistore::types::ResolvedIdentity,
463 ) -> Result<Vec<multistore::api::response::BucketEntry>, multistore::error::ProxyError> {
464 self.inner.list_buckets(identity).await
465 }
466
467 fn bucket_owner(&self) -> multistore::types::BucketOwner {
468 self.inner.bucket_owner()
469 }
470}