Skip to main content

openjd_expr/functions/
path_parse.rs

1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// Copyright by contributors to this project.
3// SPDX-License-Identifier: (Apache-2.0 OR MIT)
4
5//! Format-aware path parsing — replaces `std::path::Path` for cross-platform correctness.
6//!
7//! `std::path::Path` uses the host OS's path rules, which gives wrong results when
8//! parsing POSIX paths on Windows or vice versa. These functions use the path's
9//! `PathFormat` to determine separator handling.
10
11use crate::path_mapping::PathFormat;
12
13/// Check if a character is a path separator for the given format.
14fn is_sep(c: char, fmt: PathFormat) -> bool {
15    match fmt {
16        PathFormat::Windows => c == '\\' || c == '/',
17        PathFormat::Posix | PathFormat::Uri => c == '/',
18    }
19}
20
21/// Get the canonical separator for the given format.
22pub fn sep(fmt: PathFormat) -> char {
23    match fmt {
24        PathFormat::Windows => '\\',
25        _ => '/',
26    }
27}
28
29/// Strip trailing separators, but not if they are part of the root/anchor.
30/// Returns the normalized path and the anchor length (bytes that must not be stripped).
31fn normalize(path: &str, fmt: PathFormat) -> &str {
32    if path.is_empty() || path == "." {
33        return path;
34    }
35    let anchor_len = anchor_len(path, fmt);
36    let mut end = path.len();
37    while end > anchor_len && is_sep(path.as_bytes()[end - 1] as char, fmt) {
38        end -= 1;
39    }
40    &path[..end]
41}
42
43/// Canonicalize a filesystem path the same way Python's `pathlib`
44/// does on construction.
45///
46/// Mirrors `pathlib.PurePath`'s `_format_parsed_parts` /
47/// `_load_parts` machinery for filesystem inputs (POSIX and
48/// Windows). The transformation is intentionally distinct from
49/// `normalize` (the private trailing-separator stripper above) —
50/// `pathlib_normalize` is what the public `path()` constructor
51/// runs on its argument so that downstream operations see a
52/// canonical form, exactly as pathlib does.
53///
54/// Rules (POSIX):
55/// - empty input → `"."`
56/// - exactly two leading `/` → preserved (POSIX double-slash root)
57/// - 1 or 3+ leading `/` → collapsed to a single `/`
58/// - drop interior `.` segments (a/./b → a/b)
59/// - drop empty segments from runs of separators (a//b → a/b)
60/// - strip trailing separators (a/b/ → a/b)
61/// - `..` segments are kept verbatim (a/../b is NOT simplified;
62///   pathlib does not resolve `..` because it cannot do so safely
63///   without filesystem access)
64/// - dot path `.` and empty input both render as `"."`
65///
66/// Rules (Windows):
67/// - same dot/empty rules as POSIX
68/// - all `/` are converted to `\`
69/// - drive-relative anchors `C:` (no separator) are preserved as
70///   `C:`, drive-absolute anchors `C:\` keep their trailing `\`
71/// - UNC anchors `\\server\share` always end with `\`
72/// - a single leading `\` (rooted-no-drive) is preserved
73///
74/// URIs are NOT handled here — the caller routes URI inputs
75/// (`uri_path::is_uri`) to the URI codepath which preserves
76/// every byte verbatim. URIs are opaque per the OpenJD spec
77/// (see specifications wiki §1.2.1 path types).
78pub fn pathlib_normalize(path: &str, fmt: PathFormat) -> String {
79    if path.is_empty() {
80        return ".".to_string();
81    }
82    if fmt == PathFormat::Uri {
83        // URIs are opaque — never normalize. The caller is
84        // expected to keep URI inputs out of this function, but
85        // guard defensively in case PathFormat::Uri sneaks in.
86        return path.to_string();
87    }
88
89    // On Windows, normalize forward slashes to backslashes
90    // before processing. Pathlib treats `/` and `\` as
91    // equivalent separators on Windows but emits `\`.
92    let buf;
93    let working: &str = if fmt == PathFormat::Windows {
94        buf = path.replace('/', "\\");
95        &buf
96    } else {
97        path
98    };
99
100    // Compute the anchor and the remainder.
101    let anchor = anchor_len(working, fmt).min(working.len());
102    let mut anchor_str = working[..anchor].to_string();
103
104    // Normalize the anchor:
105    //
106    // POSIX:
107    //   - collapse 3+ leading `/` to a single `/` (anchor was
108    //     already detected as the leading `/`-run; adjust).
109    //   - exactly 2 leading slashes → preserve as `//`.
110    //
111    // Windows:
112    //   - UNC anchors must end with `\` (already enforced by
113    //     `anchor_len`, but a UNC-without-separator-after-share
114    //     like `\\srv\share` returns anchor_len equal to the
115    //     full string; we add the trailing `\` here for parity
116    //     with pathlib which always renders UNC anchors with a
117    //     trailing separator).
118    //   - drive anchors `C:` and `C:\` unchanged.
119    if fmt != PathFormat::Windows {
120        // POSIX: collapse the leading slash run.
121        let leading_slashes = working
122            .as_bytes()
123            .iter()
124            .take_while(|&&b| b == b'/')
125            .count();
126        if leading_slashes >= 3 {
127            anchor_str = "/".to_string();
128        } else if leading_slashes == 2 {
129            anchor_str = "//".to_string();
130        } else if leading_slashes == 1 {
131            anchor_str = "/".to_string();
132        } else {
133            anchor_str = String::new();
134        }
135    } else {
136        // Windows: ensure UNC anchors end with `\`.
137        if anchor_str.starts_with("\\\\")
138            && anchor_str.matches('\\').count() >= 3
139            && !anchor_str.ends_with('\\')
140        {
141            anchor_str.push('\\');
142        }
143    }
144
145    // Re-derive the leading-anchor offset in `working` so we know
146    // where the remainder starts. For POSIX, the anchor always
147    // covers exactly the leading slash run.
148    let remainder_start = if fmt == PathFormat::Windows {
149        anchor
150    } else {
151        working
152            .as_bytes()
153            .iter()
154            .take_while(|&&b| b == b'/')
155            .count()
156    };
157    let remainder = &working[remainder_start..];
158
159    // Split, drop empty and `.` segments. `..` is kept.
160    let segments: Vec<&str> = remainder
161        .split(|c: char| is_sep(c, fmt))
162        .filter(|s| !s.is_empty() && *s != ".")
163        .collect();
164
165    if anchor_str.is_empty() && segments.is_empty() {
166        return ".".to_string();
167    }
168
169    let sep = match fmt {
170        PathFormat::Windows => "\\",
171        PathFormat::Posix | PathFormat::Uri => "/",
172    };
173    if segments.is_empty() {
174        return anchor_str;
175    }
176    if anchor_str.is_empty() {
177        return segments.join(sep);
178    }
179    // If anchor already ends with the separator (filesystem
180    // root, drive-absolute, UNC), join directly. Otherwise
181    // (drive-relative `C:`), join directly too — pathlib does
182    // not insert an extra separator after `C:`.
183    let anchor_terminated = match fmt {
184        PathFormat::Windows => {
185            anchor_str.ends_with('\\') || anchor_str.ends_with('/') || {
186                // Drive-relative anchor like `C:` (no separator
187                // after the colon). Pathlib joins these without
188                // inserting a separator: `C:` + `foo` → `C:foo`.
189                let b = anchor_str.as_bytes();
190                b.len() == 2 && b[0].is_ascii_alphabetic() && b[1] == b':'
191            }
192        }
193        PathFormat::Posix | PathFormat::Uri => anchor_str.ends_with('/'),
194    };
195    if anchor_terminated {
196        format!("{anchor_str}{}", segments.join(sep))
197    } else {
198        format!("{anchor_str}{sep}{}", segments.join(sep))
199    }
200}
201
202/// Return the byte length of the anchor (root/drive/UNC prefix) in a path.
203fn anchor_len(path: &str, fmt: PathFormat) -> usize {
204    let bytes = path.as_bytes();
205    match fmt {
206        PathFormat::Windows => {
207            if bytes.len() >= 2 && bytes[0].is_ascii_alphabetic() && bytes[1] == b':' {
208                // C:\ or C:
209                if bytes.len() > 2 && is_sep(bytes[2] as char, fmt) {
210                    3
211                } else {
212                    2
213                }
214            } else if bytes.len() >= 2
215                && is_sep(bytes[0] as char, fmt)
216                && is_sep(bytes[1] as char, fmt)
217            {
218                // UNC: \\server\share\ is the full anchor
219                let rest = &path[2..];
220                let server_end = rest.find(|c: char| is_sep(c, fmt)).unwrap_or(rest.len());
221                let after_server = 2 + server_end;
222                if after_server < path.len() {
223                    // Have share part
224                    let share_rest = &path[after_server + 1..];
225                    let share_end = share_rest
226                        .find(|c: char| is_sep(c, fmt))
227                        .unwrap_or(share_rest.len());
228                    let end = after_server + 1 + share_end;
229                    // anchor = \\server\share\ — always include trailing sep conceptually
230                    // If there's a sep after share, include it; otherwise anchor extends to end
231                    // (the trailing sep will be added by parts/parent as needed)
232                    if end < path.len() {
233                        end + 1
234                    } else {
235                        end
236                    }
237                } else {
238                    // Just \\server, no share
239                    after_server
240                }
241            } else if !bytes.is_empty() && is_sep(bytes[0] as char, fmt) {
242                1
243            } else {
244                0
245            }
246        }
247        PathFormat::Posix | PathFormat::Uri => {
248            if bytes.len() >= 2
249                && bytes[0] == b'/'
250                && bytes[1] == b'/'
251                && (bytes.len() < 3 || bytes[2] != b'/')
252            {
253                2 // POSIX // is special
254            } else if !bytes.is_empty() && bytes[0] == b'/' {
255                1
256            } else {
257                0
258            }
259        }
260    }
261}
262
263/// Split a path into (parent, file_name), matching pathlib behavior.
264pub fn split(path: &str, fmt: PathFormat) -> (&str, &str) {
265    let path = normalize(path, fmt);
266    if path == "." {
267        return (".", "");
268    }
269    let anchor = anchor_len(path, fmt);
270    // For Windows UNC, the anchor includes the trailing sep; clamp to path length
271    let anchor = anchor.min(path.len());
272
273    // If path is entirely the anchor, name is empty
274    if path.len() <= anchor {
275        return (path, "");
276    }
277
278    let last_sep = path[anchor..].rfind(|c: char| is_sep(c, fmt));
279    match last_sep {
280        Some(i) => {
281            let sep_pos = anchor + i;
282            let parent = &path[..sep_pos];
283            let name = &path[sep_pos + 1..];
284            // If parent would be empty or shorter than anchor, use anchor
285            if parent.len() < anchor {
286                (&path[..anchor], name)
287            } else {
288                (parent, name)
289            }
290        }
291        None => {
292            // No separator after anchor — name is everything after anchor
293            (&path[..anchor], &path[anchor..])
294        }
295    }
296}
297
298/// Get the file name (last component after the last separator).
299pub fn file_name(path: &str, fmt: PathFormat) -> &str {
300    split(path, fmt).1
301}
302
303/// Get the parent (everything before the last separator).
304/// Returns the path itself if there's no parent (like "/" returns "/").
305pub fn parent(path: &str, fmt: PathFormat) -> String {
306    let (p, _name) = split(path, fmt);
307    let base = p;
308    // For Windows UNC with share, ensure trailing backslash
309    if fmt == PathFormat::Windows {
310        let s = base.replace('/', "\\");
311        if s.starts_with("\\\\") && s.matches('\\').count() >= 3 && !s.ends_with('\\') {
312            return format!("{}\\", s);
313        }
314        // Pathlib parent of a relative single-component path is
315        // `.` (not the empty string). E.g.
316        // `PureWindowsPath("foo").parent == "."`. Apply the same
317        // here when the parent is empty AND the original path is
318        // non-empty (we still preserve the empty-parent contract
319        // when the input itself was empty / `.`).
320        if s.is_empty() && !path.is_empty() && path != "." {
321            return ".".to_string();
322        }
323        return s;
324    }
325    if base.is_empty() && !path.is_empty() && path != "." {
326        return ".".to_string();
327    }
328    base.to_string()
329}
330
331/// Get the file stem (file_name without the last extension).
332///
333/// Matches Python pathlib's rule: a suffix exists only when the
334/// rightmost `.` is neither at the start of the name (which marks
335/// a hidden-file name like `.hidden`) nor at the end (which is a
336/// trailing dot that does not delimit a real extension — `foo.`
337/// has stem `foo.` and no suffix).
338pub fn file_stem(path: &str, fmt: PathFormat) -> &str {
339    let name = file_name(path, fmt);
340    match name.rfind('.') {
341        // No dot, dot at start (`.hidden`), or dot at end (`foo.`)
342        // → the whole name is the stem.
343        Some(0) | None => name,
344        Some(i) if i + 1 == name.len() => name,
345        Some(i) => &name[..i],
346    }
347}
348
349/// Get the extension (last ".ext" including the dot).
350///
351/// Matches Python pathlib's rule: see `file_stem` for the
352/// detailed criteria. The extension is empty for names with no
353/// dot, names starting with a dot (`.hidden`), and names ending
354/// with a dot (`foo.`).
355pub fn extension(path: &str, fmt: PathFormat) -> &str {
356    let name = file_name(path, fmt);
357    match name.rfind('.') {
358        Some(0) | None => "",
359        Some(i) if i + 1 == name.len() => "",
360        Some(i) => &name[i..],
361    }
362}
363
364/// Get the extension without the dot (for compatibility with std::path).
365pub fn extension_no_dot(path: &str, fmt: PathFormat) -> &str {
366    let ext = extension(path, fmt);
367    ext.strip_prefix('.').unwrap_or("")
368}
369
370/// Split a path into its components (like Python's PurePath.parts).
371///
372/// For POSIX: "/mnt/renders/scene.exr" → ["/", "mnt", "renders", "scene.exr"]
373/// For Windows: "C:\mnt\file" → ["C:\", "mnt", "file"]
374/// For Windows: "\mnt\file" → ["\", "mnt", "file"]
375pub fn parts(path: &str, fmt: PathFormat) -> Vec<String> {
376    let path = normalize(path, fmt);
377    if path.is_empty() || path == "." {
378        return Vec::new();
379    }
380
381    let mut result = Vec::new();
382    let anchor = anchor_len(path, fmt).min(path.len());
383
384    if anchor > 0 {
385        let mut anchor_str = path[..anchor].to_string();
386        // Normalize separators in anchor for Windows
387        if fmt == PathFormat::Windows {
388            anchor_str = anchor_str.replace('/', "\\");
389            // UNC root (\\server\share) must have trailing backslash
390            if anchor_str.starts_with("\\\\")
391                && anchor_str.matches('\\').count() >= 3
392                && !anchor_str.ends_with('\\')
393            {
394                anchor_str.push('\\');
395            }
396        }
397        result.push(anchor_str);
398    }
399
400    let remaining = &path[anchor..];
401    for part in remaining.split(|c: char| is_sep(c, fmt)) {
402        if !part.is_empty() {
403            result.push(part.to_string());
404        }
405    }
406
407    result
408}
409
410/// Get all suffixes (like Python's PurePath.suffixes).
411/// "file.tar.gz" → [".tar", ".gz"]
412/// Get all suffixes (extensions) of the file name.
413///
414/// Matches Python pathlib's algorithm:
415/// ```text
416///   name = self.name
417///   if name.endswith('.'):
418///       return []
419///   name = name.lstrip('.')
420///   return ['.' + suffix for suffix in name.split('.')[1:]]
421/// ```
422///
423/// Notable corollaries:
424/// - Trailing-dot names have no suffixes (`foo.` → `[]`).
425/// - Names that consist entirely of leading dots (`.`, `..`,
426///   `...`) yield `[]` because `lstrip('.')` produces the empty
427///   string.
428/// - `.tar.gz` (a name that is itself a suffix-looking
429///   filename) yields `['.gz']`, not `['.tar', '.gz']` —
430///   the leading-dot prefix is stripped before splitting.
431/// - `..foo` yields `[]` even though `suffix` returns `.foo`,
432///   because `lstrip('.')` produces `'foo'` which has no inner
433///   dot. This is a pathlib quirk we preserve for parity.
434pub fn suffixes(path: &str, fmt: PathFormat) -> Vec<String> {
435    let name = file_name(path, fmt);
436    if name.ends_with('.') {
437        return Vec::new();
438    }
439    let trimmed = name.trim_start_matches('.');
440    let parts: Vec<&str> = trimmed.split('.').collect();
441    if parts.len() <= 1 {
442        return Vec::new();
443    }
444    parts[1..].iter().map(|s| format!(".{s}")).collect()
445}
446
447/// Join path parts using Python pathlib constructor semantics.
448///
449/// Matches `PurePosixPath(*parts)` or `PureWindowsPath(*parts)` behavior:
450/// - Absolute components reset the accumulator
451/// - Empty strings and `.` segments are removed
452/// - Duplicate separators are collapsed
453/// - `..` is preserved (not resolved)
454pub fn join_pathlib(parts: &[String], fmt: PathFormat) -> String {
455    match fmt {
456        PathFormat::Posix | PathFormat::Uri => join_pathlib_posix(parts),
457        PathFormat::Windows => join_pathlib_windows(parts),
458    }
459}
460
461fn join_pathlib_posix(parts: &[String]) -> String {
462    let mut segments: Vec<&str> = Vec::new();
463    let mut is_absolute = false;
464
465    for part in parts {
466        if part.is_empty() {
467            continue;
468        }
469        let sub_components: Vec<&str> = part.split('/').collect();
470        for (i, c) in sub_components.iter().enumerate() {
471            if c.is_empty() && i == 0 {
472                // Leading empty = this part starts with '/'
473                is_absolute = true;
474                segments.clear();
475            } else if *c == "." {
476                // Skip '.' segments
477            } else if !c.is_empty() {
478                segments.push(c);
479            }
480        }
481    }
482
483    if is_absolute {
484        if segments.is_empty() {
485            "/".to_string()
486        } else {
487            format!("/{}", segments.join("/"))
488        }
489    } else if segments.is_empty() {
490        ".".to_string()
491    } else {
492        segments.join("/")
493    }
494}
495
496/// Parse Windows drive from a path string. Returns (drive, rest).
497/// Drive can be "C:" or "\\\\server\\share" (UNC).
498fn win_parse_drive(s: &str) -> (&str, &str) {
499    let bytes = s.as_bytes();
500    if bytes.len() >= 2 && bytes[0].is_ascii_alphabetic() && bytes[1] == b':' {
501        (&s[..2], &s[2..])
502    } else if bytes.len() >= 2
503        && is_sep(bytes[0] as char, PathFormat::Windows)
504        && is_sep(bytes[1] as char, PathFormat::Windows)
505    {
506        // UNC: \\server\share
507        let rest = &s[2..];
508        let server_end = rest
509            .find(|c: char| is_sep(c, PathFormat::Windows))
510            .unwrap_or(rest.len());
511        let after_server = 2 + server_end;
512        if after_server < s.len() {
513            let share_rest = &s[after_server + 1..];
514            let share_end = share_rest
515                .find(|c: char| is_sep(c, PathFormat::Windows))
516                .unwrap_or(share_rest.len());
517            let end = after_server + 1 + share_end;
518            (&s[..end], &s[end..])
519        } else {
520            (s, "")
521        }
522    } else {
523        ("", s)
524    }
525}
526
527fn join_pathlib_windows(parts: &[String]) -> String {
528    // Track accumulated drive, root, and relative parts separately.
529    // For each new part, parse its drive and root, then apply pathlib rules.
530    let mut drive = String::new();
531    let mut root = String::new();
532    let mut segments: Vec<String> = Vec::new();
533
534    for part in parts {
535        let (new_drive, after_drive) = win_parse_drive(part);
536        let has_root = !after_drive.is_empty()
537            && is_sep(after_drive.as_bytes()[0] as char, PathFormat::Windows);
538        let new_root = if has_root { "\\" } else { "" };
539        let rel = if has_root {
540            &after_drive[1..]
541        } else {
542            after_drive
543        };
544
545        if !new_drive.is_empty() {
546            if !new_drive.is_empty() && !drive.is_empty() && !new_drive.eq_ignore_ascii_case(&drive)
547            {
548                // Different drive → replace everything
549                drive = new_drive.to_string();
550                root = new_root.to_string();
551                segments.clear();
552            } else {
553                // Same drive (or first drive)
554                drive = new_drive.to_string();
555                if !new_root.is_empty() {
556                    root = new_root.to_string();
557                    segments.clear();
558                }
559            }
560        } else if !new_root.is_empty() {
561            // Root without drive → keep existing drive, replace from root
562            root = new_root.to_string();
563            segments.clear();
564        }
565        // Append relative components, filtering empty and '.'
566        for c in rel.split(|c: char| is_sep(c, PathFormat::Windows)) {
567            if c == "." || c.is_empty() {
568                continue;
569            }
570            segments.push(c.to_string());
571        }
572    }
573
574    // Reconstruct
575    let mut result = format!("{}{}", drive, root);
576    if !segments.is_empty() {
577        if !result.is_empty() && !result.ends_with('\\') && !result.ends_with(':') {
578            result.push('\\');
579        }
580        result.push_str(&segments.join("\\"));
581    }
582
583    if result.is_empty() {
584        ".".to_string()
585    } else {
586        result
587    }
588}
589
590#[cfg(test)]
591mod tests {
592    use super::*;
593
594    // ── Existing tests ──
595
596    #[test]
597    fn posix_parts_absolute() {
598        assert_eq!(
599            parts("/mnt/renders/scene.exr", PathFormat::Posix),
600            vec!["/", "mnt", "renders", "scene.exr"]
601        );
602    }
603
604    #[test]
605    fn posix_parts_relative() {
606        assert_eq!(
607            parts("sub/file.exr", PathFormat::Posix),
608            vec!["sub", "file.exr"]
609        );
610    }
611
612    #[test]
613    fn posix_parts_root() {
614        assert_eq!(parts("/", PathFormat::Posix), vec!["/"]);
615    }
616
617    #[test]
618    fn windows_parts_drive() {
619        assert_eq!(
620            parts(r"C:\mnt\file.txt", PathFormat::Windows),
621            vec![r"C:\", "mnt", "file.txt"]
622        );
623    }
624
625    #[test]
626    fn windows_parts_root_backslash() {
627        assert_eq!(
628            parts(r"\mnt\data\file.txt", PathFormat::Windows),
629            vec![r"\", "mnt", "data", "file.txt"]
630        );
631    }
632
633    #[test]
634    fn windows_parts_unc() {
635        assert_eq!(
636            parts(r"\\server\share\dir", PathFormat::Windows),
637            vec![r"\\server\share\", "dir"]
638        );
639    }
640
641    #[test]
642    fn posix_file_name() {
643        assert_eq!(
644            file_name("/mnt/renders/scene.exr", PathFormat::Posix),
645            "scene.exr"
646        );
647    }
648
649    #[test]
650    fn posix_parent() {
651        assert_eq!(
652            parent("/mnt/renders/scene.exr", PathFormat::Posix),
653            "/mnt/renders"
654        );
655    }
656
657    #[test]
658    fn posix_parent_root() {
659        assert_eq!(parent("/", PathFormat::Posix), "/");
660    }
661
662    #[test]
663    fn posix_file_stem() {
664        assert_eq!(
665            file_stem("/mnt/renders/scene.exr", PathFormat::Posix),
666            "scene"
667        );
668    }
669
670    #[test]
671    fn posix_extension() {
672        assert_eq!(
673            extension("/mnt/renders/scene.exr", PathFormat::Posix),
674            ".exr"
675        );
676    }
677
678    #[test]
679    fn no_extension() {
680        assert_eq!(extension("/mnt/renders/Makefile", PathFormat::Posix), "");
681    }
682
683    #[test]
684    fn posix_suffixes_single() {
685        assert_eq!(suffixes("scene.exr", PathFormat::Posix), vec![".exr"]);
686    }
687
688    #[test]
689    fn posix_suffixes_compound() {
690        assert_eq!(
691            suffixes("archive.tar.gz", PathFormat::Posix),
692            vec![".tar", ".gz"]
693        );
694    }
695
696    #[test]
697    fn posix_suffixes_none() {
698        assert_eq!(
699            suffixes("Makefile", PathFormat::Posix),
700            Vec::<String>::new()
701        );
702    }
703
704    #[test]
705    fn windows_parent_backslash() {
706        assert_eq!(
707            parent(r"\mnt\renders\scene.exr", PathFormat::Windows),
708            r"\mnt\renders"
709        );
710    }
711
712    #[test]
713    fn windows_file_name_mixed_sep() {
714        assert_eq!(
715            file_name(r"C:\mnt/renders\scene.exr", PathFormat::Windows),
716            "scene.exr"
717        );
718    }
719
720    // ── POSIX parts: pathlib ground truth ──
721
722    #[test]
723    fn posix_parts_single_component() {
724        assert_eq!(parts("/mnt", PathFormat::Posix), vec!["/", "mnt"]);
725    }
726
727    #[test]
728    fn posix_parts_dot() {
729        // PurePosixPath('.').parts == ()
730        let empty: Vec<String> = vec![];
731        assert_eq!(parts(".", PathFormat::Posix), empty);
732    }
733
734    #[test]
735    fn posix_parts_dotdot() {
736        assert_eq!(parts("..", PathFormat::Posix), vec![".."]);
737    }
738
739    #[test]
740    fn posix_parts_dotdot_foo() {
741        assert_eq!(parts("../foo", PathFormat::Posix), vec!["..", "foo"]);
742    }
743
744    #[test]
745    fn posix_parts_repeated_separators() {
746        // Collapses repeated /
747        assert_eq!(
748            parts("/mnt//renders///scene.exr", PathFormat::Posix),
749            vec!["/", "mnt", "renders", "scene.exr"]
750        );
751    }
752
753    #[test]
754    fn posix_parts_double_slash_root() {
755        // pathlib treats // as a special root
756        assert_eq!(
757            parts("//mnt/file", PathFormat::Posix),
758            vec!["//", "mnt", "file"]
759        );
760    }
761
762    #[test]
763    fn posix_parts_trailing_slash() {
764        // Trailing slash stripped
765        assert_eq!(
766            parts("/mnt/renders/", PathFormat::Posix),
767            vec!["/", "mnt", "renders"]
768        );
769    }
770
771    #[test]
772    fn posix_parts_deep() {
773        assert_eq!(
774            parts("/a/b/c/d/e", PathFormat::Posix),
775            vec!["/", "a", "b", "c", "d", "e"]
776        );
777    }
778
779    #[test]
780    fn posix_parts_bare_file() {
781        assert_eq!(parts("file.txt", PathFormat::Posix), vec!["file.txt"]);
782    }
783
784    #[test]
785    fn posix_parts_empty() {
786        let empty: Vec<String> = vec![];
787        assert_eq!(parts("", PathFormat::Posix), empty);
788    }
789
790    // ── POSIX properties: pathlib ground truth ──
791
792    #[test]
793    fn posix_dot_name() {
794        assert_eq!(file_name(".", PathFormat::Posix), "");
795    }
796
797    #[test]
798    fn posix_dot_stem() {
799        assert_eq!(file_stem(".", PathFormat::Posix), "");
800    }
801
802    #[test]
803    fn posix_dot_suffix() {
804        assert_eq!(extension(".", PathFormat::Posix), "");
805    }
806
807    #[test]
808    fn posix_dot_parent() {
809        assert_eq!(parent(".", PathFormat::Posix), ".");
810    }
811
812    #[test]
813    fn posix_trailing_slash_name() {
814        // PurePosixPath('/mnt/renders/').name == 'renders'
815        assert_eq!(file_name("/mnt/renders/", PathFormat::Posix), "renders");
816    }
817
818    #[test]
819    fn posix_trailing_slash_stem() {
820        assert_eq!(file_stem("/mnt/renders/", PathFormat::Posix), "renders");
821    }
822
823    #[test]
824    fn posix_trailing_slash_suffix() {
825        assert_eq!(extension("/mnt/renders/", PathFormat::Posix), "");
826    }
827
828    #[test]
829    fn posix_trailing_slash_parent() {
830        // PurePosixPath('/mnt/renders/').parent == PurePosixPath('/mnt')
831        assert_eq!(parent("/mnt/renders/", PathFormat::Posix), "/mnt");
832    }
833
834    #[test]
835    fn posix_hidden_tar_gz_suffixes() {
836        assert_eq!(
837            suffixes(".hidden.tar.gz", PathFormat::Posix),
838            vec![".tar", ".gz"]
839        );
840    }
841
842    #[test]
843    fn posix_hidden_tar_gz_stem() {
844        assert_eq!(
845            file_stem(".hidden.tar.gz", PathFormat::Posix),
846            ".hidden.tar"
847        );
848    }
849
850    #[test]
851    fn posix_hidden_tar_gz_suffix() {
852        assert_eq!(extension(".hidden.tar.gz", PathFormat::Posix), ".gz");
853    }
854
855    // ── Windows parts: pathlib ground truth ──
856
857    #[test]
858    fn windows_parts_drive_root() {
859        // PureWindowsPath('C:\\').parts == ('C:\\',)
860        assert_eq!(parts(r"C:\", PathFormat::Windows), vec![r"C:\"]);
861    }
862
863    #[test]
864    fn windows_parts_drive_file() {
865        assert_eq!(
866            parts(r"C:\mnt\file.txt", PathFormat::Windows),
867            vec![r"C:\", "mnt", "file.txt"]
868        );
869    }
870
871    #[test]
872    fn windows_parts_forward_slash() {
873        // Forward slashes accepted, normalized to backslash in root
874        assert_eq!(
875            parts("C:/path/to/file", PathFormat::Windows),
876            vec![r"C:\", "path", "to", "file"]
877        );
878    }
879
880    #[test]
881    fn windows_parts_repeated_separators() {
882        assert_eq!(
883            parts("C:/path//to///file", PathFormat::Windows),
884            vec![r"C:\", "path", "to", "file"]
885        );
886    }
887
888    #[test]
889    fn windows_parts_unc_root() {
890        // PureWindowsPath('\\\\server\\share').parts == ('\\\\server\\share\\',)
891        assert_eq!(
892            parts(r"\\server\share", PathFormat::Windows),
893            vec![r"\\server\share\"]
894        );
895    }
896
897    #[test]
898    fn windows_parts_unc_dir() {
899        assert_eq!(
900            parts(r"\\server\share\dir", PathFormat::Windows),
901            vec![r"\\server\share\", "dir"]
902        );
903    }
904
905    #[test]
906    fn windows_parts_unc_dir_file() {
907        assert_eq!(
908            parts(r"\\server\share\dir\file.txt", PathFormat::Windows),
909            vec![r"\\server\share\", "dir", "file.txt"]
910        );
911    }
912
913    #[test]
914    fn windows_parts_root_only() {
915        // PureWindowsPath('\\mnt\\data\\file.txt').parts == ('\\', 'mnt', 'data', 'file.txt')
916        assert_eq!(
917            parts(r"\mnt\data\file.txt", PathFormat::Windows),
918            vec![r"\", "mnt", "data", "file.txt"]
919        );
920    }
921
922    #[test]
923    fn windows_parts_unc_no_share() {
924        // PureWindowsPath('\\\\server').parts == ('\\\\server',)
925        // Note: no trailing backslash when there's no share
926        assert_eq!(parts(r"\\server", PathFormat::Windows), vec![r"\\server"]);
927    }
928
929    #[test]
930    fn windows_parts_relative_drive() {
931        // PureWindowsPath('C:').parts == ('C:',)
932        assert_eq!(parts("C:", PathFormat::Windows), vec!["C:"]);
933    }
934
935    #[test]
936    fn windows_parts_trailing_slash() {
937        // PureWindowsPath('C:\\mnt\\').parts == ('C:\\', 'mnt')
938        assert_eq!(parts(r"C:\mnt\", PathFormat::Windows), vec![r"C:\", "mnt"]);
939    }
940
941    #[test]
942    fn windows_parts_unc_trailing_slash() {
943        // PureWindowsPath('\\\\server\\share\\').parts == ('\\\\server\\share\\',)
944        assert_eq!(
945            parts(r"\\server\share\", PathFormat::Windows),
946            vec![r"\\server\share\"]
947        );
948    }
949
950    // ── Windows properties: pathlib ground truth ──
951
952    #[test]
953    fn windows_drive_root_name() {
954        // PureWindowsPath('C:\\').name == ''
955        assert_eq!(file_name(r"C:\", PathFormat::Windows), "");
956    }
957
958    #[test]
959    fn windows_drive_root_parent() {
960        // PureWindowsPath('C:\\').parent == PureWindowsPath('C:\\')
961        assert_eq!(parent(r"C:\", PathFormat::Windows), r"C:\");
962    }
963
964    #[test]
965    fn windows_unc_name() {
966        // PureWindowsPath('\\\\server\\share').name == ''
967        assert_eq!(file_name(r"\\server\share", PathFormat::Windows), "");
968    }
969
970    #[test]
971    fn windows_unc_parent() {
972        // PureWindowsPath('\\\\server\\share').parent == PureWindowsPath('\\\\server\\share\\')
973        // The parent of UNC root is itself (with trailing backslash)
974        assert_eq!(
975            parent(r"\\server\share", PathFormat::Windows),
976            r"\\server\share\"
977        );
978    }
979
980    #[test]
981    fn windows_unc_dir_name() {
982        // PureWindowsPath('\\\\server\\share\\dir').name == 'dir'
983        assert_eq!(file_name(r"\\server\share\dir", PathFormat::Windows), "dir");
984    }
985
986    #[test]
987    fn windows_unc_dir_parent() {
988        // PureWindowsPath('\\\\server\\share\\dir').parent == PureWindowsPath('\\\\server\\share\\')
989        assert_eq!(
990            parent(r"\\server\share\dir", PathFormat::Windows),
991            r"\\server\share\"
992        );
993    }
994
995    // ── Forward-slash paths with Windows format ──
996    // Forward slashes are valid separators on Windows. These paths must
997    // parse identically to their backslash equivalents.
998
999    #[test]
1000    fn windows_forward_slash_file_name() {
1001        assert_eq!(
1002            file_name("/input/scene.exr", PathFormat::Windows),
1003            "scene.exr"
1004        );
1005    }
1006
1007    #[test]
1008    fn windows_forward_slash_file_stem() {
1009        assert_eq!(file_stem("/input/scene.exr", PathFormat::Windows), "scene");
1010    }
1011
1012    #[test]
1013    fn windows_forward_slash_extension() {
1014        assert_eq!(extension("/input/scene.exr", PathFormat::Windows), ".exr");
1015    }
1016
1017    #[test]
1018    fn windows_forward_slash_parent() {
1019        assert_eq!(parent("/input/scene.exr", PathFormat::Windows), r"\input");
1020    }
1021
1022    #[test]
1023    fn windows_forward_slash_parts() {
1024        assert_eq!(
1025            parts("/input/scene.exr", PathFormat::Windows),
1026            vec![r"\", "input", "scene.exr"]
1027        );
1028    }
1029}