Skip to main content

multistore_path_mapping/
lib.rs

1//! Hierarchical path mapping for the multistore S3 proxy gateway.
2//!
3//! S3 uses a flat namespace: each bucket is an independent container resolved
4//! to a single backend. Some applications need a *hierarchical* URL scheme
5//! where multiple path segments determine which backend to use. For example,
6//! a data catalog might expose `/{account}/{product}/{key}` but store each
7//! account/product pair in its own backend bucket.
8//!
9//! This crate bridges those two worlds:
10//!
11//! - **[`PathMapping`]** defines *how many* leading URL segments form the
12//!   logical "bucket", what separator joins them into an internal name, and
13//!   how many segments appear as the display name in S3 XML responses.
14//!
15//! - **[`PathMapping::rewrite_request`]** rewrites an incoming `(path, query)`
16//!   pair so the gateway sees a single-segment bucket. It handles both
17//!   path-based routing (`/{a}/{b}/{key}` → `/{a:b}/{key}`) and query-based
18//!   prefix routing (`/{a}?prefix=b/sub/` → `/{a:b}?prefix=sub/`).
19//!
20//! - **[`MappedRegistry`]** wraps any [`BucketRegistry`] and automatically
21//!   applies display-name and list-rewrite rules so XML responses show the
22//!   original hierarchical names to clients.
23//!
24//! # Example
25//!
26//! ```rust
27//! use multistore_path_mapping::PathMapping;
28//!
29//! let mapping = PathMapping {
30//!     bucket_segments: 2,
31//!     bucket_separator: ":".into(),
32//!     display_bucket_segments: 1,
33//! };
34//!
35//! // Path-based: two segments become one internal bucket
36//! let mapped = mapping.parse("/acme/data/report.csv").unwrap();
37//! assert_eq!(mapped.bucket, "acme:data");
38//! assert_eq!(mapped.key, Some("report.csv".to_string()));
39//! assert_eq!(mapped.display_bucket, "acme");
40//!
41//! // Full request rewrite (path + query)
42//! let result = mapping.rewrite_request(
43//!     "/acme/data/report.csv",
44//!     None,
45//! );
46//! assert_eq!(result.path, "/acme:data/report.csv");
47//! assert_eq!(result.query, None);
48//! assert_eq!(result.signing_path, "/acme/data/report.csv");
49//!
50//! // Prefix-based list rewrite
51//! let result = mapping.rewrite_request(
52//!     "/acme",
53//!     Some("list-type=2&prefix=data/subdir/"),
54//! );
55//! assert_eq!(result.path, "/acme:data");
56//! assert_eq!(result.query, Some("list-type=2&prefix=subdir/".to_string()));
57//! assert_eq!(result.signing_query, Some("list-type=2&prefix=data/subdir/".to_string()));
58//! ```
59
60use multistore::api::list_rewrite::ListRewrite;
61use multistore::registry::{BucketRegistry, ResolvedBucket};
62
63/// Defines how URL path segments map to internal bucket names.
64#[derive(Debug, Clone)]
65pub struct PathMapping {
66    /// Number of path segments that form the "bucket" portion.
67    /// E.g., 2 for `/{account}/{product}/...`
68    pub bucket_segments: usize,
69
70    /// Separator to join segments into an internal bucket name.
71    /// E.g., ":" produces `account:product`.
72    pub bucket_separator: String,
73
74    /// How many leading segments form the "display bucket" name for XML responses.
75    /// E.g., 1 means `<Name>` shows just `account`.
76    pub display_bucket_segments: usize,
77}
78
79/// Result of rewriting a request path and query string.
80///
81/// Contains both the rewritten values (for S3 operation parsing) and the
82/// original values (for SigV4 signature verification).
83#[derive(Debug, Clone, PartialEq, Eq)]
84pub struct RewriteResult {
85    /// Rewritten path for S3 operation parsing.
86    pub path: String,
87    /// Rewritten query for operation parsing.
88    pub query: Option<String>,
89    /// Original client path for SigV4 verification.
90    pub signing_path: String,
91    /// Original client query for SigV4 verification.
92    pub signing_query: Option<String>,
93}
94
95/// Result of mapping a request path.
96#[derive(Debug, Clone, PartialEq, Eq)]
97pub struct MappedPath {
98    /// Internal bucket name (e.g., "account:product")
99    pub bucket: String,
100    /// Remaining key after bucket segments (e.g., "file.parquet")
101    pub key: Option<String>,
102    /// Display bucket name for XML responses (e.g., "account")
103    pub display_bucket: String,
104    /// Key prefix to add in XML responses (e.g., "product/")
105    pub key_prefix: String,
106    /// The individual path segments that formed the bucket
107    pub segments: Vec<String>,
108}
109
110impl PathMapping {
111    /// Parse a URL path into a [`MappedPath`].
112    ///
113    /// The path is expected to start with `/`. Segments are split on `/`,
114    /// and the first `bucket_segments` segments form the internal bucket name.
115    /// Any remaining content becomes the key.
116    ///
117    /// Returns `None` if there are fewer than `bucket_segments` non-empty segments.
118    pub fn parse(&self, path: &str) -> Option<MappedPath> {
119        let trimmed = path.strip_prefix('/').unwrap_or(path);
120        if trimmed.is_empty() {
121            return None;
122        }
123
124        // Split into at most bucket_segments + 1 parts so the key portion
125        // preserves any internal `/` characters.
126        let parts: Vec<&str> = trimmed.splitn(self.bucket_segments + 1, '/').collect();
127
128        if parts.len() < self.bucket_segments {
129            return None;
130        }
131
132        // Verify none of the bucket segments are empty.
133        for part in &parts[..self.bucket_segments] {
134            if part.is_empty() {
135                return None;
136            }
137        }
138
139        let segments: Vec<String> = parts[..self.bucket_segments]
140            .iter()
141            .map(|s| s.to_string())
142            .collect();
143
144        let bucket = segments.join(&self.bucket_separator);
145
146        let key = if parts.len() > self.bucket_segments {
147            let k = parts[self.bucket_segments];
148            if k.is_empty() {
149                None
150            } else {
151                Some(k.to_string())
152            }
153        } else {
154            None
155        };
156
157        let display_bucket = segments[..self.display_bucket_segments].join("/");
158
159        let key_prefix = if self.display_bucket_segments < self.bucket_segments {
160            let prefix_parts = &segments[self.display_bucket_segments..self.bucket_segments];
161            format!("{}/", prefix_parts.join("/"))
162        } else {
163            String::new()
164        };
165
166        Some(MappedPath {
167            bucket,
168            key,
169            display_bucket,
170            key_prefix,
171            segments,
172        })
173    }
174
175    /// Parse a bucket name (e.g., "account:product") back into a [`MappedPath`].
176    ///
177    /// Used by [`MappedRegistry`] when it receives an already-mapped bucket name.
178    /// Returns `None` if the bucket name does not split into exactly `bucket_segments` parts.
179    pub fn parse_bucket_name(&self, bucket_name: &str) -> Option<MappedPath> {
180        let segments: Vec<String> = bucket_name
181            .split(&self.bucket_separator)
182            .map(|s| s.to_string())
183            .collect();
184
185        if segments.len() != self.bucket_segments {
186            return None;
187        }
188
189        // Verify none of the segments are empty.
190        for seg in &segments {
191            if seg.is_empty() {
192                return None;
193            }
194        }
195
196        let display_bucket = segments[..self.display_bucket_segments].join("/");
197
198        let key_prefix = if self.display_bucket_segments < self.bucket_segments {
199            let prefix_parts = &segments[self.display_bucket_segments..self.bucket_segments];
200            format!("{}/", prefix_parts.join("/"))
201        } else {
202            String::new()
203        };
204
205        Some(MappedPath {
206            bucket: bucket_name.to_string(),
207            key: None,
208            display_bucket,
209            key_prefix,
210            segments,
211        })
212    }
213
214    /// Rewrite an incoming request path and query string for the gateway.
215    ///
216    /// Translates hierarchical paths into internal single-segment bucket paths:
217    ///
218    /// 1. **Path-based**: if the path has enough segments, they are joined into
219    ///    a single bucket name.
220    ///    `/{a}/{b}/{key}` → `/{a:b}/{key}`
221    ///
222    /// 2. **Prefix-based**: if the path has fewer segments than required but the
223    ///    query string contains a `list-type=` param with a non-empty `prefix=`,
224    ///    the first component of the prefix is folded into the bucket name.
225    ///    `/{a}?list-type=2&prefix=b/sub/` → `/{a:b}?list-type=2&prefix=sub/`
226    ///
227    /// 3. **Pass-through**: all other paths are returned unchanged. Route handlers
228    ///    or the gateway itself will handle them.
229    pub fn rewrite_request(&self, path: &str, query: Option<&str>) -> RewriteResult {
230        let signing_path = path.to_string();
231        let signing_query = query.map(|q| q.to_string());
232
233        // Case 1: enough path segments to map directly
234        if let Some(mapped) = self.parse(path) {
235            let rewritten_path = match mapped.key {
236                Some(ref key) => format!("/{}/{}", mapped.bucket, key),
237                None => format!("/{}", mapped.bucket),
238            };
239            return RewriteResult {
240                path: rewritten_path,
241                query: query.map(|q| q.to_string()),
242                signing_path,
243                signing_query,
244            };
245        }
246
247        // Case 2: single-segment path with a list-type query and non-empty prefix
248        let trimmed = path.trim_matches('/');
249        if !trimmed.is_empty() && !trimmed.contains('/') {
250            let query_str = query.unwrap_or("");
251            if is_list_request(query_str) {
252                if let Some(prefix) = extract_query_param(query_str, "prefix") {
253                    if !prefix.is_empty() {
254                        let (rewritten_path, rewritten_query) =
255                            self.rewrite_prefix_to_bucket(trimmed, &prefix, query_str);
256                        return RewriteResult {
257                            path: rewritten_path,
258                            query: rewritten_query,
259                            signing_path,
260                            signing_query,
261                        };
262                    }
263                }
264            }
265        }
266
267        // Case 3: pass through unchanged
268        RewriteResult {
269            path: path.to_string(),
270            query: query.map(|q| q.to_string()),
271            signing_path,
272            signing_query,
273        }
274    }
275
276    /// Fold the first prefix component into the bucket name.
277    ///
278    /// `/{account}?prefix=product/sub/` → `/{account:product}?prefix=sub/`
279    fn rewrite_prefix_to_bucket(
280        &self,
281        account: &str,
282        prefix: &str,
283        query_str: &str,
284    ) -> (String, Option<String>) {
285        let (product, remaining_prefix) = if let Some(slash_pos) = prefix.find('/') {
286            (&prefix[..slash_pos], &prefix[slash_pos + 1..])
287        } else {
288            (prefix, "")
289        };
290
291        let bucket = format!("{}{}{}", account, self.bucket_separator, product);
292        let new_query = rewrite_prefix_in_query(query_str, remaining_prefix);
293        (format!("/{}", bucket), Some(new_query))
294    }
295}
296
297// ── Query-string helpers (private) ──────────────────────────────────
298
299/// Check whether a query string contains a `list-type=` parameter.
300fn is_list_request(query: &str) -> bool {
301    query.split('&').any(|p| p.starts_with("list-type="))
302}
303
304/// Extract and percent-decode a single query parameter value.
305fn extract_query_param(query: &str, key: &str) -> Option<String> {
306    query.split('&').find_map(|pair| {
307        pair.split_once('=')
308            .filter(|(k, _)| *k == key)
309            .map(|(_, v)| {
310                percent_encoding::percent_decode_str(v)
311                    .decode_utf8_lossy()
312                    .into_owned()
313            })
314    })
315}
316
317/// Characters that must be percent-encoded when placed in a query parameter value.
318const QUERY_VALUE_ENCODE: &percent_encoding::AsciiSet = &percent_encoding::CONTROLS
319    .add(b' ')
320    .add(b'#')
321    .add(b'&')
322    .add(b'=')
323    .add(b'+');
324
325/// Replace the `prefix=` value in a query string, percent-encoding the new value.
326fn rewrite_prefix_in_query(query: &str, new_prefix: &str) -> String {
327    let encoded: String =
328        percent_encoding::utf8_percent_encode(new_prefix, QUERY_VALUE_ENCODE).to_string();
329    query
330        .split('&')
331        .map(|pair| {
332            if pair.starts_with("prefix=") {
333                format!("prefix={}", encoded)
334            } else {
335                pair.to_string()
336            }
337        })
338        .collect::<Vec<_>>()
339        .join("&")
340}
341
342#[cfg(test)]
343mod tests {
344    use super::*;
345
346    #[test]
347    fn is_list_request_detects_list_type() {
348        assert!(is_list_request("list-type=2"));
349        assert!(is_list_request("foo=bar&list-type=2&baz=qux"));
350        assert!(!is_list_request("foo=bar"));
351        assert!(!is_list_request(""));
352    }
353
354    #[test]
355    fn is_list_request_rejects_substring_match() {
356        assert!(!is_list_request("not-list-type=2"));
357        assert!(!is_list_request("foo=bar&not-list-type=2"));
358    }
359
360    #[test]
361    fn extract_query_param_finds_value() {
362        assert_eq!(
363            extract_query_param("list-type=2&prefix=foo/", "prefix"),
364            Some("foo/".to_string())
365        );
366    }
367
368    #[test]
369    fn extract_query_param_missing() {
370        assert_eq!(extract_query_param("list-type=2", "prefix"), None);
371    }
372
373    #[test]
374    fn extract_query_param_decodes_percent() {
375        assert_eq!(
376            extract_query_param("prefix=hello%20world", "prefix"),
377            Some("hello world".to_string())
378        );
379    }
380
381    #[test]
382    fn rewrite_prefix_replaces_value() {
383        assert_eq!(
384            rewrite_prefix_in_query("list-type=2&prefix=old/", "new/"),
385            "list-type=2&prefix=new/"
386        );
387    }
388
389    #[test]
390    fn rewrite_prefix_to_empty() {
391        assert_eq!(
392            rewrite_prefix_in_query("prefix=old/&max-keys=100", ""),
393            "prefix=&max-keys=100"
394        );
395    }
396
397    #[test]
398    fn rewrite_prefix_encodes_special_chars() {
399        assert_eq!(
400            rewrite_prefix_in_query("list-type=2&prefix=old/", "sub dir/"),
401            "list-type=2&prefix=sub%20dir/"
402        );
403    }
404}
405
406// ── MappedRegistry ──────────────────────────────────────────────────
407
408/// Wraps a [`BucketRegistry`] to add path-based routing.
409///
410/// When `get_bucket` is called, the bucket name is parsed via
411/// [`PathMapping::parse_bucket_name`] and the resulting [`ListRewrite`]
412/// and `display_name` are applied to the resolved bucket. This allows the
413/// gateway to present hierarchical names in S3 XML responses while storing
414/// data in flat internal buckets.
415#[derive(Debug, Clone)]
416pub struct MappedRegistry<R> {
417    inner: R,
418    mapping: PathMapping,
419}
420
421impl<R> MappedRegistry<R> {
422    /// Create a new `MappedRegistry` wrapping the given registry with a path mapping.
423    pub fn new(inner: R, mapping: PathMapping) -> Self {
424        Self { inner, mapping }
425    }
426}
427
428impl<R: BucketRegistry> BucketRegistry for MappedRegistry<R> {
429    async fn get_bucket(
430        &self,
431        name: &str,
432        identity: &multistore::types::ResolvedIdentity,
433        operation: &multistore::types::S3Operation,
434    ) -> Result<ResolvedBucket, multistore::error::ProxyError> {
435        let mapped = self.mapping.parse_bucket_name(name);
436
437        let mut resolved = self.inner.get_bucket(name, identity, operation).await?;
438
439        if let Some(mapped) = mapped {
440            tracing::debug!(
441                bucket = %name,
442                display_name = %mapped.display_bucket,
443                key_prefix = %mapped.key_prefix,
444                "Applying path mapping to resolved bucket"
445            );
446
447            resolved.display_name = Some(mapped.display_bucket);
448
449            if !mapped.key_prefix.is_empty() {
450                resolved.list_rewrite = Some(ListRewrite {
451                    strip_prefix: String::new(),
452                    add_prefix: mapped.key_prefix,
453                });
454            }
455        }
456
457        Ok(resolved)
458    }
459
460    async fn list_buckets(
461        &self,
462        identity: &multistore::types::ResolvedIdentity,
463    ) -> Result<Vec<multistore::api::response::BucketEntry>, multistore::error::ProxyError> {
464        self.inner.list_buckets(identity).await
465    }
466
467    fn bucket_owner(&self) -> multistore::types::BucketOwner {
468        self.inner.bucket_owner()
469    }
470}