multistore_path_mapping/lib.rs
1//! Hierarchical path mapping for the multistore S3 proxy gateway.
2//!
3//! S3 uses a flat namespace: each bucket is an independent container resolved
4//! to a single backend. Some applications need a *hierarchical* URL scheme
5//! where multiple path segments determine which backend to use. For example,
6//! a data catalog might expose `/{account}/{product}/{key}` but store each
7//! account/product pair in its own backend bucket.
8//!
9//! This crate bridges those two worlds:
10//!
11//! - **[`PathMapping`]** defines *how many* leading URL segments form the
12//! logical "bucket", what separator joins them into an internal name, and
13//! how many segments appear as the display name in S3 XML responses.
14//!
15//! - **[`PathMapping::rewrite_request`]** rewrites an incoming `(path, query)`
16//! pair so the gateway sees a single-segment bucket. It handles both
17//! path-based routing (`/{a}/{b}/{key}` → `/{a:b}/{key}`) and query-based
18//! prefix routing (`/{a}?prefix=b/sub/` → `/{a:b}?prefix=sub/`).
19//!
20//! - **[`MappedRegistry`]** wraps any [`BucketRegistry`] and automatically
21//! applies display-name and list-rewrite rules so XML responses show the
22//! original hierarchical names to clients.
23//!
24//! # Example
25//!
26//! ```rust
27//! use multistore_path_mapping::PathMapping;
28//!
29//! let mapping = PathMapping {
30//! bucket_segments: 2,
31//! bucket_separator: ":".into(),
32//! display_bucket_segments: 1,
33//! };
34//!
35//! // Path-based: two segments become one internal bucket
36//! let mapped = mapping.parse("/acme/data/report.csv").unwrap();
37//! assert_eq!(mapped.bucket, "acme:data");
38//! assert_eq!(mapped.key, Some("report.csv".to_string()));
39//! assert_eq!(mapped.display_bucket, "acme");
40//!
41//! // Full request rewrite (path + query)
42//! let (path, query) = mapping.rewrite_request(
43//! "/acme/data/report.csv",
44//! None,
45//! );
46//! assert_eq!(path, "/acme:data/report.csv");
47//! assert_eq!(query, None);
48//!
49//! // Prefix-based list rewrite
50//! let (path, query) = mapping.rewrite_request(
51//! "/acme",
52//! Some("list-type=2&prefix=data/subdir/"),
53//! );
54//! assert_eq!(path, "/acme:data");
55//! assert_eq!(query, Some("list-type=2&prefix=subdir/".to_string()));
56//! ```
57
58use multistore::api::list_rewrite::ListRewrite;
59use multistore::registry::{BucketRegistry, ResolvedBucket};
60
61/// Defines how URL path segments map to internal bucket names.
62#[derive(Debug, Clone)]
63pub struct PathMapping {
64 /// Number of path segments that form the "bucket" portion.
65 /// E.g., 2 for `/{account}/{product}/...`
66 pub bucket_segments: usize,
67
68 /// Separator to join segments into an internal bucket name.
69 /// E.g., ":" produces `account:product`.
70 pub bucket_separator: String,
71
72 /// How many leading segments form the "display bucket" name for XML responses.
73 /// E.g., 1 means `<Name>` shows just `account`.
74 pub display_bucket_segments: usize,
75}
76
77/// Result of mapping a request path.
78#[derive(Debug, Clone, PartialEq, Eq)]
79pub struct MappedPath {
80 /// Internal bucket name (e.g., "account:product")
81 pub bucket: String,
82 /// Remaining key after bucket segments (e.g., "file.parquet")
83 pub key: Option<String>,
84 /// Display bucket name for XML responses (e.g., "account")
85 pub display_bucket: String,
86 /// Key prefix to add in XML responses (e.g., "product/")
87 pub key_prefix: String,
88 /// The individual path segments that formed the bucket
89 pub segments: Vec<String>,
90}
91
92impl PathMapping {
93 /// Parse a URL path into a [`MappedPath`].
94 ///
95 /// The path is expected to start with `/`. Segments are split on `/`,
96 /// and the first `bucket_segments` segments form the internal bucket name.
97 /// Any remaining content becomes the key.
98 ///
99 /// Returns `None` if there are fewer than `bucket_segments` non-empty segments.
100 pub fn parse(&self, path: &str) -> Option<MappedPath> {
101 let trimmed = path.strip_prefix('/').unwrap_or(path);
102 if trimmed.is_empty() {
103 return None;
104 }
105
106 // Split into at most bucket_segments + 1 parts so the key portion
107 // preserves any internal `/` characters.
108 let parts: Vec<&str> = trimmed.splitn(self.bucket_segments + 1, '/').collect();
109
110 if parts.len() < self.bucket_segments {
111 return None;
112 }
113
114 // Verify none of the bucket segments are empty.
115 for part in &parts[..self.bucket_segments] {
116 if part.is_empty() {
117 return None;
118 }
119 }
120
121 let segments: Vec<String> = parts[..self.bucket_segments]
122 .iter()
123 .map(|s| s.to_string())
124 .collect();
125
126 let bucket = segments.join(&self.bucket_separator);
127
128 let key = if parts.len() > self.bucket_segments {
129 let k = parts[self.bucket_segments];
130 if k.is_empty() {
131 None
132 } else {
133 Some(k.to_string())
134 }
135 } else {
136 None
137 };
138
139 let display_bucket = segments[..self.display_bucket_segments].join("/");
140
141 let key_prefix = if self.display_bucket_segments < self.bucket_segments {
142 let prefix_parts = &segments[self.display_bucket_segments..self.bucket_segments];
143 format!("{}/", prefix_parts.join("/"))
144 } else {
145 String::new()
146 };
147
148 Some(MappedPath {
149 bucket,
150 key,
151 display_bucket,
152 key_prefix,
153 segments,
154 })
155 }
156
157 /// Parse a bucket name (e.g., "account:product") back into a [`MappedPath`].
158 ///
159 /// Used by [`MappedRegistry`] when it receives an already-mapped bucket name.
160 /// Returns `None` if the bucket name does not split into exactly `bucket_segments` parts.
161 pub fn parse_bucket_name(&self, bucket_name: &str) -> Option<MappedPath> {
162 let segments: Vec<String> = bucket_name
163 .split(&self.bucket_separator)
164 .map(|s| s.to_string())
165 .collect();
166
167 if segments.len() != self.bucket_segments {
168 return None;
169 }
170
171 // Verify none of the segments are empty.
172 for seg in &segments {
173 if seg.is_empty() {
174 return None;
175 }
176 }
177
178 let display_bucket = segments[..self.display_bucket_segments].join("/");
179
180 let key_prefix = if self.display_bucket_segments < self.bucket_segments {
181 let prefix_parts = &segments[self.display_bucket_segments..self.bucket_segments];
182 format!("{}/", prefix_parts.join("/"))
183 } else {
184 String::new()
185 };
186
187 Some(MappedPath {
188 bucket: bucket_name.to_string(),
189 key: None,
190 display_bucket,
191 key_prefix,
192 segments,
193 })
194 }
195
196 /// Rewrite an incoming request path and query string for the gateway.
197 ///
198 /// Translates hierarchical paths into internal single-segment bucket paths:
199 ///
200 /// 1. **Path-based**: if the path has enough segments, they are joined into
201 /// a single bucket name.
202 /// `/{a}/{b}/{key}` → `/{a:b}/{key}`
203 ///
204 /// 2. **Prefix-based**: if the path has fewer segments than required but the
205 /// query string contains a `list-type=` param with a non-empty `prefix=`,
206 /// the first component of the prefix is folded into the bucket name.
207 /// `/{a}?list-type=2&prefix=b/sub/` → `/{a:b}?list-type=2&prefix=sub/`
208 ///
209 /// 3. **Pass-through**: all other paths are returned unchanged. Route handlers
210 /// or the gateway itself will handle them.
211 pub fn rewrite_request(&self, path: &str, query: Option<&str>) -> (String, Option<String>) {
212 // Case 1: enough path segments to map directly
213 if let Some(mapped) = self.parse(path) {
214 let rewritten_path = match mapped.key {
215 Some(ref key) => format!("/{}/{}", mapped.bucket, key),
216 None => format!("/{}", mapped.bucket),
217 };
218 return (rewritten_path, query.map(|q| q.to_string()));
219 }
220
221 // Case 2: single-segment path with a list-type query and non-empty prefix
222 let trimmed = path.trim_matches('/');
223 if !trimmed.is_empty() && !trimmed.contains('/') {
224 let query_str = query.unwrap_or("");
225 if is_list_request(query_str) {
226 if let Some(prefix) = extract_query_param(query_str, "prefix") {
227 if !prefix.is_empty() {
228 return self.rewrite_prefix_to_bucket(trimmed, &prefix, query_str);
229 }
230 }
231 }
232 }
233
234 // Case 3: pass through unchanged
235 (path.to_string(), query.map(|q| q.to_string()))
236 }
237
238 /// Fold the first prefix component into the bucket name.
239 ///
240 /// `/{account}?prefix=product/sub/` → `/{account:product}?prefix=sub/`
241 fn rewrite_prefix_to_bucket(
242 &self,
243 account: &str,
244 prefix: &str,
245 query_str: &str,
246 ) -> (String, Option<String>) {
247 let (product, remaining_prefix) = if let Some(slash_pos) = prefix.find('/') {
248 (&prefix[..slash_pos], &prefix[slash_pos + 1..])
249 } else {
250 (prefix, "")
251 };
252
253 let bucket = format!("{}{}{}", account, self.bucket_separator, product);
254 let new_query = rewrite_prefix_in_query(query_str, remaining_prefix);
255 (format!("/{}", bucket), Some(new_query))
256 }
257}
258
259// ── Query-string helpers (private) ──────────────────────────────────
260
261/// Check whether a query string contains a `list-type=` parameter.
262fn is_list_request(query: &str) -> bool {
263 query.split('&').any(|p| p.starts_with("list-type="))
264}
265
266/// Extract and percent-decode a single query parameter value.
267fn extract_query_param(query: &str, key: &str) -> Option<String> {
268 query.split('&').find_map(|pair| {
269 pair.split_once('=')
270 .filter(|(k, _)| *k == key)
271 .map(|(_, v)| {
272 percent_encoding::percent_decode_str(v)
273 .decode_utf8_lossy()
274 .into_owned()
275 })
276 })
277}
278
279/// Characters that must be percent-encoded when placed in a query parameter value.
280const QUERY_VALUE_ENCODE: &percent_encoding::AsciiSet = &percent_encoding::CONTROLS
281 .add(b' ')
282 .add(b'#')
283 .add(b'&')
284 .add(b'=')
285 .add(b'+');
286
287/// Replace the `prefix=` value in a query string, percent-encoding the new value.
288fn rewrite_prefix_in_query(query: &str, new_prefix: &str) -> String {
289 let encoded: String =
290 percent_encoding::utf8_percent_encode(new_prefix, QUERY_VALUE_ENCODE).to_string();
291 query
292 .split('&')
293 .map(|pair| {
294 if pair.starts_with("prefix=") {
295 format!("prefix={}", encoded)
296 } else {
297 pair.to_string()
298 }
299 })
300 .collect::<Vec<_>>()
301 .join("&")
302}
303
304#[cfg(test)]
305mod tests {
306 use super::*;
307
308 #[test]
309 fn is_list_request_detects_list_type() {
310 assert!(is_list_request("list-type=2"));
311 assert!(is_list_request("foo=bar&list-type=2&baz=qux"));
312 assert!(!is_list_request("foo=bar"));
313 assert!(!is_list_request(""));
314 }
315
316 #[test]
317 fn is_list_request_rejects_substring_match() {
318 assert!(!is_list_request("not-list-type=2"));
319 assert!(!is_list_request("foo=bar¬-list-type=2"));
320 }
321
322 #[test]
323 fn extract_query_param_finds_value() {
324 assert_eq!(
325 extract_query_param("list-type=2&prefix=foo/", "prefix"),
326 Some("foo/".to_string())
327 );
328 }
329
330 #[test]
331 fn extract_query_param_missing() {
332 assert_eq!(extract_query_param("list-type=2", "prefix"), None);
333 }
334
335 #[test]
336 fn extract_query_param_decodes_percent() {
337 assert_eq!(
338 extract_query_param("prefix=hello%20world", "prefix"),
339 Some("hello world".to_string())
340 );
341 }
342
343 #[test]
344 fn rewrite_prefix_replaces_value() {
345 assert_eq!(
346 rewrite_prefix_in_query("list-type=2&prefix=old/", "new/"),
347 "list-type=2&prefix=new/"
348 );
349 }
350
351 #[test]
352 fn rewrite_prefix_to_empty() {
353 assert_eq!(
354 rewrite_prefix_in_query("prefix=old/&max-keys=100", ""),
355 "prefix=&max-keys=100"
356 );
357 }
358
359 #[test]
360 fn rewrite_prefix_encodes_special_chars() {
361 assert_eq!(
362 rewrite_prefix_in_query("list-type=2&prefix=old/", "sub dir/"),
363 "list-type=2&prefix=sub%20dir/"
364 );
365 }
366}
367
368// ── MappedRegistry ──────────────────────────────────────────────────
369
370/// Wraps a [`BucketRegistry`] to add path-based routing.
371///
372/// When `get_bucket` is called, the bucket name is parsed via
373/// [`PathMapping::parse_bucket_name`] and the resulting [`ListRewrite`]
374/// and `display_name` are applied to the resolved bucket. This allows the
375/// gateway to present hierarchical names in S3 XML responses while storing
376/// data in flat internal buckets.
377#[derive(Debug, Clone)]
378pub struct MappedRegistry<R> {
379 inner: R,
380 mapping: PathMapping,
381}
382
383impl<R> MappedRegistry<R> {
384 /// Create a new `MappedRegistry` wrapping the given registry with a path mapping.
385 pub fn new(inner: R, mapping: PathMapping) -> Self {
386 Self { inner, mapping }
387 }
388}
389
390impl<R: BucketRegistry> BucketRegistry for MappedRegistry<R> {
391 async fn get_bucket(
392 &self,
393 name: &str,
394 identity: &multistore::types::ResolvedIdentity,
395 operation: &multistore::types::S3Operation,
396 ) -> Result<ResolvedBucket, multistore::error::ProxyError> {
397 let mapped = self.mapping.parse_bucket_name(name);
398
399 let mut resolved = self.inner.get_bucket(name, identity, operation).await?;
400
401 if let Some(mapped) = mapped {
402 tracing::debug!(
403 bucket = %name,
404 display_name = %mapped.display_bucket,
405 key_prefix = %mapped.key_prefix,
406 "Applying path mapping to resolved bucket"
407 );
408
409 resolved.display_name = Some(mapped.display_bucket);
410
411 if !mapped.key_prefix.is_empty() {
412 resolved.list_rewrite = Some(ListRewrite {
413 strip_prefix: String::new(),
414 add_prefix: mapped.key_prefix,
415 });
416 }
417 }
418
419 Ok(resolved)
420 }
421
422 async fn list_buckets(
423 &self,
424 identity: &multistore::types::ResolvedIdentity,
425 ) -> Result<Vec<multistore::api::response::BucketEntry>, multistore::error::ProxyError> {
426 self.inner.list_buckets(identity).await
427 }
428
429 fn bucket_owner(&self) -> multistore::types::BucketOwner {
430 self.inner.bucket_owner()
431 }
432}