Skip to main content

openjd_expr/
uri_path.rs

1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// Copyright by contributors to this project.
3// SPDX-License-Identifier: (Apache-2.0 OR MIT)
4
5//! URI-aware path operations.
6//!
7//! Mirrors Python `openjd.expr._uri_path`. When a path value contains a URI
8//! (scheme://authority/path), these functions handle path operations instead of
9//! `std::path`. The scheme+authority prefix is preserved as an opaque root.
10
11/// Parsed URI: authority (`scheme://host`) and path segments.
12#[derive(Debug, Clone)]
13pub struct UriParts {
14    pub authority: String,
15    pub path_parts: Vec<String>,
16}
17
18/// Return `true` if `path` has a `scheme://` prefix.
19pub fn is_uri(path: &str) -> bool {
20    parse(path).is_some()
21}
22
23/// Parse a URI into authority + path parts, or `None` if not a URI.
24pub fn parse(path: &str) -> Option<UriParts> {
25    let scheme_end = path.find("://")?;
26    let scheme = &path[..scheme_end];
27    if scheme.is_empty() || !scheme.as_bytes()[0].is_ascii_alphabetic() {
28        return None;
29    }
30    if !scheme
31        .chars()
32        .all(|c| c.is_ascii_alphanumeric() || c == '+' || c == '.' || c == '-')
33    {
34        return None;
35    }
36    let after_scheme = &path[scheme_end + 3..];
37    let (authority_part, path_part) = match after_scheme.find('/') {
38        Some(i) => (&after_scheme[..i], &after_scheme[i + 1..]),
39        None => (after_scheme, ""),
40    };
41    let authority = format!("{}://{}", scheme, authority_part);
42    let path_parts = if path_part.is_empty() {
43        Vec::new()
44    } else {
45        path_part.split('/').map(|s| s.to_string()).collect()
46    };
47    Some(UriParts {
48        authority,
49        path_parts,
50    })
51}
52
53/// Final component of a URI path (empty string if no path segments).
54pub fn name(path: &str) -> String {
55    parse(path)
56        .and_then(|u| u.path_parts.last().cloned())
57        .unwrap_or_default()
58}
59
60/// Parent of a URI path.
61pub fn parent(path: &str) -> String {
62    let Some(uri) = parse(path) else {
63        return path.to_string();
64    };
65    if uri.path_parts.is_empty() {
66        return uri.authority;
67    }
68    let parent_parts = &uri.path_parts[..uri.path_parts.len() - 1];
69    if parent_parts.is_empty() {
70        uri.authority
71    } else {
72        format!("{}/{}", uri.authority, parent_parts.join("/"))
73    }
74}
75
76/// File extension of the final component (including the dot), or empty string.
77///
78/// Matches Python pathlib's rule (see
79/// `crate::functions::path_parse::extension`): the suffix is empty
80/// when the rightmost `.` is at the start of the name (`.hidden`)
81/// or at the end (`foo.`).
82pub fn suffix(path: &str) -> String {
83    let n = name(path);
84    n.rfind('.')
85        .filter(|&i| i > 0 && i + 1 < n.len())
86        .map(|i| n[i..].to_string())
87        .unwrap_or_default()
88}
89
90/// All file extensions of the final component.
91///
92/// Matches Python pathlib's algorithm exactly — see the doc on
93/// `crate::functions::path_parse::suffixes` for the algorithm and
94/// its corollaries (trailing-dot names, leading-dot names, the
95/// `..foo` quirk).
96pub fn suffixes(path: &str) -> Vec<String> {
97    let n = name(path);
98    if n.ends_with('.') {
99        return Vec::new();
100    }
101    let trimmed = n.trim_start_matches('.');
102    let parts: Vec<&str> = trimmed.split('.').collect();
103    if parts.len() <= 1 {
104        return Vec::new();
105    }
106    parts[1..].iter().map(|p| format!(".{p}")).collect()
107}
108
109/// Final component without the last extension.
110///
111/// Same trailing-dot rule as `suffix` — see that function.
112pub fn stem(path: &str) -> String {
113    let n = name(path);
114    n.rfind('.')
115        .filter(|&i| i > 0 && i + 1 < n.len())
116        .map(|i| n[..i].to_string())
117        .unwrap_or(n)
118}
119
120/// Split into parts: first element is `scheme://authority`, rest are path segments.
121pub fn parts(path: &str) -> Vec<String> {
122    let Some(uri) = parse(path) else {
123        return vec![path.to_string()];
124    };
125    let mut result = vec![uri.authority];
126    result.extend(uri.path_parts);
127    result
128}
129
130/// Join a URI path with child segments.
131pub fn join(path: &str, child: &str) -> String {
132    let Some(uri) = parse(path) else {
133        return format!("{path}/{child}");
134    };
135    let mut p = uri.path_parts;
136    // Remove trailing empty part (from trailing slash) before appending
137    if p.last().is_some_and(|s| s.is_empty()) {
138        p.pop();
139    }
140    format!("{}/{}/{child}", uri.authority, p.join("/"))
141}
142
143/// Reconstruct a URI from parts (first element is `scheme://authority`).
144pub fn from_parts(parts: &[String]) -> String {
145    if parts.is_empty() {
146        return String::new();
147    }
148    if parts.len() == 1 {
149        return parts[0].clone();
150    }
151    format!("{}/{}", parts[0], parts[1..].join("/"))
152}
153
154#[cfg(test)]
155mod tests {
156    use super::*;
157
158    #[test]
159    fn not_uri() {
160        assert!(!is_uri("/local/path"));
161    }
162    #[test]
163    fn not_uri_windows() {
164        assert!(!is_uri("C:\\path"));
165    }
166    #[test]
167    fn s3_is_uri() {
168        assert!(is_uri("s3://bucket/key"));
169    }
170    #[test]
171    fn https_is_uri() {
172        assert!(is_uri("https://host/path"));
173    }
174
175    #[test]
176    fn parse_s3() {
177        let u = parse("s3://bucket/dir/file.txt").unwrap();
178        assert_eq!(u.authority, "s3://bucket");
179        assert_eq!(u.path_parts, vec!["dir", "file.txt"]);
180    }
181    #[test]
182    fn parse_bare() {
183        let u = parse("s3://bucket").unwrap();
184        assert_eq!(u.authority, "s3://bucket");
185        assert!(u.path_parts.is_empty());
186    }
187
188    #[test]
189    fn name_basic() {
190        assert_eq!(name("s3://bucket/dir/file.txt"), "file.txt");
191    }
192    #[test]
193    fn name_bare() {
194        assert_eq!(name("s3://bucket"), "");
195    }
196    #[test]
197    fn name_trailing_slash() {
198        assert_eq!(name("s3://bucket/dir/"), "");
199    }
200
201    #[test]
202    fn parent_basic() {
203        assert_eq!(parent("s3://bucket/dir/file.txt"), "s3://bucket/dir");
204    }
205    #[test]
206    fn parent_single() {
207        assert_eq!(parent("s3://bucket/file.txt"), "s3://bucket");
208    }
209    #[test]
210    fn parent_bare() {
211        assert_eq!(parent("s3://bucket"), "s3://bucket");
212    }
213
214    #[test]
215    fn suffix_basic() {
216        assert_eq!(suffix("s3://bucket/file.tar.gz"), ".gz");
217    }
218    #[test]
219    fn suffix_none() {
220        assert_eq!(suffix("s3://bucket/file"), "");
221    }
222
223    #[test]
224    fn suffixes_compound() {
225        assert_eq!(suffixes("s3://bucket/file.tar.gz"), vec![".tar", ".gz"]);
226    }
227    #[test]
228    fn suffixes_none() {
229        assert_eq!(suffixes("s3://bucket/file"), Vec::<String>::new());
230    }
231
232    #[test]
233    fn stem_basic() {
234        assert_eq!(stem("s3://bucket/file.tar.gz"), "file.tar");
235    }
236    #[test]
237    fn stem_no_ext() {
238        assert_eq!(stem("s3://bucket/file"), "file");
239    }
240
241    #[test]
242    fn parts_basic() {
243        assert_eq!(
244            parts("s3://bucket/dir/file"),
245            vec!["s3://bucket", "dir", "file"]
246        );
247    }
248    #[test]
249    fn parts_bare() {
250        assert_eq!(parts("s3://bucket"), vec!["s3://bucket"]);
251    }
252
253    #[test]
254    fn from_parts_basic() {
255        assert_eq!(
256            from_parts(&["s3://bucket".into(), "dir".into(), "file".into()]),
257            "s3://bucket/dir/file"
258        );
259    }
260    #[test]
261    fn from_parts_bare() {
262        assert_eq!(from_parts(&["s3://bucket".into()]), "s3://bucket");
263    }
264}