Skip to main content

multistore_path_mapping/
lib.rs

1//! Hierarchical path mapping for the multistore S3 proxy gateway.
2//!
3//! S3 uses a flat namespace: each bucket is an independent container resolved
4//! to a single backend. Some applications need a *hierarchical* URL scheme
5//! where multiple path segments determine which backend to use. For example,
6//! a data catalog might expose `/{account}/{product}/{key}` but store each
7//! account/product pair in its own backend bucket.
8//!
9//! This crate bridges those two worlds:
10//!
11//! - **[`PathMapping`]** defines *how many* leading URL segments form the
12//!   logical "bucket", what separator joins them into an internal name, and
13//!   how many segments appear as the display name in S3 XML responses.
14//!
15//! - **[`PathMapping::rewrite_request`]** rewrites an incoming `(path, query)`
16//!   pair so the gateway sees a single-segment bucket. It handles both
17//!   path-based routing (`/{a}/{b}/{key}` → `/{a:b}/{key}`) and query-based
18//!   prefix routing (`/{a}?prefix=b/sub/` → `/{a:b}?prefix=sub/`).
19//!
20//! - **[`MappedRegistry`]** wraps any [`BucketRegistry`] and automatically
21//!   applies display-name and list-rewrite rules so XML responses show the
22//!   original hierarchical names to clients.
23//!
24//! # Example
25//!
26//! ```rust
27//! use multistore_path_mapping::PathMapping;
28//!
29//! let mapping = PathMapping {
30//!     bucket_segments: 2,
31//!     bucket_separator: ":".into(),
32//!     display_bucket_segments: 1,
33//! };
34//!
35//! // Path-based: two segments become one internal bucket
36//! let mapped = mapping.parse("/acme/data/report.csv").unwrap();
37//! assert_eq!(mapped.bucket, "acme:data");
38//! assert_eq!(mapped.key, Some("report.csv".to_string()));
39//! assert_eq!(mapped.display_bucket, "acme");
40//!
41//! // Full request rewrite (path + query)
42//! let (path, query) = mapping.rewrite_request(
43//!     "/acme/data/report.csv",
44//!     None,
45//! );
46//! assert_eq!(path, "/acme:data/report.csv");
47//! assert_eq!(query, None);
48//!
49//! // Prefix-based list rewrite
50//! let (path, query) = mapping.rewrite_request(
51//!     "/acme",
52//!     Some("list-type=2&prefix=data/subdir/"),
53//! );
54//! assert_eq!(path, "/acme:data");
55//! assert_eq!(query, Some("list-type=2&prefix=subdir/".to_string()));
56//! ```
57
58use multistore::api::list_rewrite::ListRewrite;
59use multistore::registry::{BucketRegistry, ResolvedBucket};
60
61/// Defines how URL path segments map to internal bucket names.
62#[derive(Debug, Clone)]
63pub struct PathMapping {
64    /// Number of path segments that form the "bucket" portion.
65    /// E.g., 2 for `/{account}/{product}/...`
66    pub bucket_segments: usize,
67
68    /// Separator to join segments into an internal bucket name.
69    /// E.g., ":" produces `account:product`.
70    pub bucket_separator: String,
71
72    /// How many leading segments form the "display bucket" name for XML responses.
73    /// E.g., 1 means `<Name>` shows just `account`.
74    pub display_bucket_segments: usize,
75}
76
77/// Result of mapping a request path.
78#[derive(Debug, Clone, PartialEq, Eq)]
79pub struct MappedPath {
80    /// Internal bucket name (e.g., "account:product")
81    pub bucket: String,
82    /// Remaining key after bucket segments (e.g., "file.parquet")
83    pub key: Option<String>,
84    /// Display bucket name for XML responses (e.g., "account")
85    pub display_bucket: String,
86    /// Key prefix to add in XML responses (e.g., "product/")
87    pub key_prefix: String,
88    /// The individual path segments that formed the bucket
89    pub segments: Vec<String>,
90}
91
92impl PathMapping {
93    /// Parse a URL path into a [`MappedPath`].
94    ///
95    /// The path is expected to start with `/`. Segments are split on `/`,
96    /// and the first `bucket_segments` segments form the internal bucket name.
97    /// Any remaining content becomes the key.
98    ///
99    /// Returns `None` if there are fewer than `bucket_segments` non-empty segments.
100    pub fn parse(&self, path: &str) -> Option<MappedPath> {
101        let trimmed = path.strip_prefix('/').unwrap_or(path);
102        if trimmed.is_empty() {
103            return None;
104        }
105
106        // Split into at most bucket_segments + 1 parts so the key portion
107        // preserves any internal `/` characters.
108        let parts: Vec<&str> = trimmed.splitn(self.bucket_segments + 1, '/').collect();
109
110        if parts.len() < self.bucket_segments {
111            return None;
112        }
113
114        // Verify none of the bucket segments are empty.
115        for part in &parts[..self.bucket_segments] {
116            if part.is_empty() {
117                return None;
118            }
119        }
120
121        let segments: Vec<String> = parts[..self.bucket_segments]
122            .iter()
123            .map(|s| s.to_string())
124            .collect();
125
126        let bucket = segments.join(&self.bucket_separator);
127
128        let key = if parts.len() > self.bucket_segments {
129            let k = parts[self.bucket_segments];
130            if k.is_empty() {
131                None
132            } else {
133                Some(k.to_string())
134            }
135        } else {
136            None
137        };
138
139        let display_bucket = segments[..self.display_bucket_segments].join("/");
140
141        let key_prefix = if self.display_bucket_segments < self.bucket_segments {
142            let prefix_parts = &segments[self.display_bucket_segments..self.bucket_segments];
143            format!("{}/", prefix_parts.join("/"))
144        } else {
145            String::new()
146        };
147
148        Some(MappedPath {
149            bucket,
150            key,
151            display_bucket,
152            key_prefix,
153            segments,
154        })
155    }
156
157    /// Parse a bucket name (e.g., "account:product") back into a [`MappedPath`].
158    ///
159    /// Used by [`MappedRegistry`] when it receives an already-mapped bucket name.
160    /// Returns `None` if the bucket name does not split into exactly `bucket_segments` parts.
161    pub fn parse_bucket_name(&self, bucket_name: &str) -> Option<MappedPath> {
162        let segments: Vec<String> = bucket_name
163            .split(&self.bucket_separator)
164            .map(|s| s.to_string())
165            .collect();
166
167        if segments.len() != self.bucket_segments {
168            return None;
169        }
170
171        // Verify none of the segments are empty.
172        for seg in &segments {
173            if seg.is_empty() {
174                return None;
175            }
176        }
177
178        let display_bucket = segments[..self.display_bucket_segments].join("/");
179
180        let key_prefix = if self.display_bucket_segments < self.bucket_segments {
181            let prefix_parts = &segments[self.display_bucket_segments..self.bucket_segments];
182            format!("{}/", prefix_parts.join("/"))
183        } else {
184            String::new()
185        };
186
187        Some(MappedPath {
188            bucket: bucket_name.to_string(),
189            key: None,
190            display_bucket,
191            key_prefix,
192            segments,
193        })
194    }
195
196    /// Rewrite an incoming request path and query string for the gateway.
197    ///
198    /// Translates hierarchical paths into internal single-segment bucket paths:
199    ///
200    /// 1. **Path-based**: if the path has enough segments, they are joined into
201    ///    a single bucket name.
202    ///    `/{a}/{b}/{key}` → `/{a:b}/{key}`
203    ///
204    /// 2. **Prefix-based**: if the path has fewer segments than required but the
205    ///    query string contains a `list-type=` param with a non-empty `prefix=`,
206    ///    the first component of the prefix is folded into the bucket name.
207    ///    `/{a}?list-type=2&prefix=b/sub/` → `/{a:b}?list-type=2&prefix=sub/`
208    ///
209    /// 3. **Pass-through**: all other paths are returned unchanged. Route handlers
210    ///    or the gateway itself will handle them.
211    pub fn rewrite_request(&self, path: &str, query: Option<&str>) -> (String, Option<String>) {
212        // Case 1: enough path segments to map directly
213        if let Some(mapped) = self.parse(path) {
214            let rewritten_path = match mapped.key {
215                Some(ref key) => format!("/{}/{}", mapped.bucket, key),
216                None => format!("/{}", mapped.bucket),
217            };
218            return (rewritten_path, query.map(|q| q.to_string()));
219        }
220
221        // Case 2: single-segment path with a list-type query and non-empty prefix
222        let trimmed = path.trim_matches('/');
223        if !trimmed.is_empty() && !trimmed.contains('/') {
224            let query_str = query.unwrap_or("");
225            if is_list_request(query_str) {
226                if let Some(prefix) = extract_query_param(query_str, "prefix") {
227                    if !prefix.is_empty() {
228                        return self.rewrite_prefix_to_bucket(trimmed, &prefix, query_str);
229                    }
230                }
231            }
232        }
233
234        // Case 3: pass through unchanged
235        (path.to_string(), query.map(|q| q.to_string()))
236    }
237
238    /// Fold the first prefix component into the bucket name.
239    ///
240    /// `/{account}?prefix=product/sub/` → `/{account:product}?prefix=sub/`
241    fn rewrite_prefix_to_bucket(
242        &self,
243        account: &str,
244        prefix: &str,
245        query_str: &str,
246    ) -> (String, Option<String>) {
247        let (product, remaining_prefix) = if let Some(slash_pos) = prefix.find('/') {
248            (&prefix[..slash_pos], &prefix[slash_pos + 1..])
249        } else {
250            (prefix, "")
251        };
252
253        let bucket = format!("{}{}{}", account, self.bucket_separator, product);
254        let new_query = rewrite_prefix_in_query(query_str, remaining_prefix);
255        (format!("/{}", bucket), Some(new_query))
256    }
257}
258
259// ── Query-string helpers (private) ──────────────────────────────────
260
261/// Check whether a query string contains a `list-type=` parameter.
262fn is_list_request(query: &str) -> bool {
263    query.split('&').any(|p| p.starts_with("list-type="))
264}
265
266/// Extract and percent-decode a single query parameter value.
267fn extract_query_param(query: &str, key: &str) -> Option<String> {
268    query.split('&').find_map(|pair| {
269        pair.split_once('=')
270            .filter(|(k, _)| *k == key)
271            .map(|(_, v)| {
272                percent_encoding::percent_decode_str(v)
273                    .decode_utf8_lossy()
274                    .into_owned()
275            })
276    })
277}
278
279/// Characters that must be percent-encoded when placed in a query parameter value.
280const QUERY_VALUE_ENCODE: &percent_encoding::AsciiSet = &percent_encoding::CONTROLS
281    .add(b' ')
282    .add(b'#')
283    .add(b'&')
284    .add(b'=')
285    .add(b'+');
286
287/// Replace the `prefix=` value in a query string, percent-encoding the new value.
288fn rewrite_prefix_in_query(query: &str, new_prefix: &str) -> String {
289    let encoded: String =
290        percent_encoding::utf8_percent_encode(new_prefix, QUERY_VALUE_ENCODE).to_string();
291    query
292        .split('&')
293        .map(|pair| {
294            if pair.starts_with("prefix=") {
295                format!("prefix={}", encoded)
296            } else {
297                pair.to_string()
298            }
299        })
300        .collect::<Vec<_>>()
301        .join("&")
302}
303
304#[cfg(test)]
305mod tests {
306    use super::*;
307
308    #[test]
309    fn is_list_request_detects_list_type() {
310        assert!(is_list_request("list-type=2"));
311        assert!(is_list_request("foo=bar&list-type=2&baz=qux"));
312        assert!(!is_list_request("foo=bar"));
313        assert!(!is_list_request(""));
314    }
315
316    #[test]
317    fn is_list_request_rejects_substring_match() {
318        assert!(!is_list_request("not-list-type=2"));
319        assert!(!is_list_request("foo=bar&not-list-type=2"));
320    }
321
322    #[test]
323    fn extract_query_param_finds_value() {
324        assert_eq!(
325            extract_query_param("list-type=2&prefix=foo/", "prefix"),
326            Some("foo/".to_string())
327        );
328    }
329
330    #[test]
331    fn extract_query_param_missing() {
332        assert_eq!(extract_query_param("list-type=2", "prefix"), None);
333    }
334
335    #[test]
336    fn extract_query_param_decodes_percent() {
337        assert_eq!(
338            extract_query_param("prefix=hello%20world", "prefix"),
339            Some("hello world".to_string())
340        );
341    }
342
343    #[test]
344    fn rewrite_prefix_replaces_value() {
345        assert_eq!(
346            rewrite_prefix_in_query("list-type=2&prefix=old/", "new/"),
347            "list-type=2&prefix=new/"
348        );
349    }
350
351    #[test]
352    fn rewrite_prefix_to_empty() {
353        assert_eq!(
354            rewrite_prefix_in_query("prefix=old/&max-keys=100", ""),
355            "prefix=&max-keys=100"
356        );
357    }
358
359    #[test]
360    fn rewrite_prefix_encodes_special_chars() {
361        assert_eq!(
362            rewrite_prefix_in_query("list-type=2&prefix=old/", "sub dir/"),
363            "list-type=2&prefix=sub%20dir/"
364        );
365    }
366}
367
368// ── MappedRegistry ──────────────────────────────────────────────────
369
370/// Wraps a [`BucketRegistry`] to add path-based routing.
371///
372/// When `get_bucket` is called, the bucket name is parsed via
373/// [`PathMapping::parse_bucket_name`] and the resulting [`ListRewrite`]
374/// and `display_name` are applied to the resolved bucket. This allows the
375/// gateway to present hierarchical names in S3 XML responses while storing
376/// data in flat internal buckets.
377#[derive(Debug, Clone)]
378pub struct MappedRegistry<R> {
379    inner: R,
380    mapping: PathMapping,
381}
382
383impl<R> MappedRegistry<R> {
384    /// Create a new `MappedRegistry` wrapping the given registry with a path mapping.
385    pub fn new(inner: R, mapping: PathMapping) -> Self {
386        Self { inner, mapping }
387    }
388}
389
390impl<R: BucketRegistry> BucketRegistry for MappedRegistry<R> {
391    async fn get_bucket(
392        &self,
393        name: &str,
394        identity: &multistore::types::ResolvedIdentity,
395        operation: &multistore::types::S3Operation,
396    ) -> Result<ResolvedBucket, multistore::error::ProxyError> {
397        let mapped = self.mapping.parse_bucket_name(name);
398
399        let mut resolved = self.inner.get_bucket(name, identity, operation).await?;
400
401        if let Some(mapped) = mapped {
402            tracing::debug!(
403                bucket = %name,
404                display_name = %mapped.display_bucket,
405                key_prefix = %mapped.key_prefix,
406                "Applying path mapping to resolved bucket"
407            );
408
409            resolved.display_name = Some(mapped.display_bucket);
410
411            if !mapped.key_prefix.is_empty() {
412                resolved.list_rewrite = Some(ListRewrite {
413                    strip_prefix: String::new(),
414                    add_prefix: mapped.key_prefix,
415                });
416            }
417        }
418
419        Ok(resolved)
420    }
421
422    async fn list_buckets(
423        &self,
424        identity: &multistore::types::ResolvedIdentity,
425    ) -> Result<Vec<multistore::api::response::BucketEntry>, multistore::error::ProxyError> {
426        self.inner.list_buckets(identity).await
427    }
428
429    fn bucket_owner(&self) -> multistore::types::BucketOwner {
430        self.inner.bucket_owner()
431    }
432}