gix_path/
convert.rs

1use std::{
2    borrow::Cow,
3    ffi::{OsStr, OsString},
4    path::{Component, Path, PathBuf},
5};
6
7use bstr::{BStr, BString};
8
9#[derive(Debug)]
10/// The error type returned by [`into_bstr()`] and others may suffer from failed conversions from or to bytes.
11pub struct Utf8Error;
12
13impl std::fmt::Display for Utf8Error {
14    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
15        f.write_str("Could not convert to UTF8 or from UTF8 due to ill-formed input")
16    }
17}
18
19impl std::error::Error for Utf8Error {}
20
21/// Like [`into_bstr()`], but takes `OsStr` as input for a lossless, but fallible, conversion.
22pub fn os_str_into_bstr(path: &OsStr) -> Result<&BStr, Utf8Error> {
23    let path = try_into_bstr(Cow::Borrowed(path.as_ref()))?;
24    match path {
25        Cow::Borrowed(path) => Ok(path),
26        Cow::Owned(_) => unreachable!("borrowed cows stay borrowed"),
27    }
28}
29
30/// Like [`into_bstr()`], but takes `OsString` as input for a lossless, but fallible, conversion.
31pub fn os_string_into_bstring(path: OsString) -> Result<BString, Utf8Error> {
32    let path = try_into_bstr(Cow::Owned(path.into()))?;
33    match path {
34        Cow::Borrowed(_path) => unreachable!("borrowed cows stay borrowed"),
35        Cow::Owned(path) => Ok(path),
36    }
37}
38
39/// Like [`into_bstr()`], but takes `Cow<OsStr>` as input for a lossless, but fallible, conversion.
40pub fn try_os_str_into_bstr(path: Cow<'_, OsStr>) -> Result<Cow<'_, BStr>, Utf8Error> {
41    match path {
42        Cow::Borrowed(path) => os_str_into_bstr(path).map(Cow::Borrowed),
43        Cow::Owned(path) => os_string_into_bstring(path).map(Cow::Owned),
44    }
45}
46
47/// Convert the given path either into its raw bytes on Unix or its UTF-8 encoded counterpart on Windows.
48///
49/// On Windows, if the source `Path`` contains ill-formed, lone surrogates, the UTF-8 conversion will fail
50/// causing `Utf8Error` to be returned.
51pub fn try_into_bstr<'a>(path: impl Into<Cow<'a, Path>>) -> Result<Cow<'a, BStr>, Utf8Error> {
52    let path = path.into();
53    let path_str = match path {
54        Cow::Owned(path) => Cow::Owned({
55            #[cfg(unix)]
56            let p: BString = {
57                use std::os::unix::ffi::OsStringExt;
58                path.into_os_string().into_vec().into()
59            };
60            #[cfg(target_os = "wasi")]
61            let p: BString = {
62                use std::os::wasi::ffi::OsStringExt;
63                path.into_os_string().into_vec().into()
64            };
65            #[cfg(not(any(unix, target_os = "wasi")))]
66            let p: BString = path.into_os_string().into_string().map_err(|_| Utf8Error)?.into();
67            p
68        }),
69        Cow::Borrowed(path) => Cow::Borrowed({
70            #[cfg(unix)]
71            let p: &BStr = {
72                use std::os::unix::ffi::OsStrExt;
73                path.as_os_str().as_bytes().into()
74            };
75            #[cfg(target_os = "wasi")]
76            let p: &BStr = {
77                use std::os::wasi::ffi::OsStrExt;
78                path.as_os_str().as_bytes().into()
79            };
80            #[cfg(not(any(unix, target_os = "wasi")))]
81            let p: &BStr = path.to_str().ok_or(Utf8Error)?.as_bytes().into();
82            p
83        }),
84    };
85    Ok(path_str)
86}
87
88/// Similar to [`try_into_bstr()`] but **panics** if malformed surrogates are encountered on Windows.
89pub fn into_bstr<'a>(path: impl Into<Cow<'a, Path>>) -> Cow<'a, BStr> {
90    try_into_bstr(path).expect("prefix path doesn't contain ill-formed UTF-8")
91}
92
93/// Join `path` to `base` such that they are separated with a `/`, i.e. `base/path`.
94pub fn join_bstr_unix_pathsep<'a, 'b>(base: impl Into<Cow<'a, BStr>>, path: impl Into<&'b BStr>) -> Cow<'a, BStr> {
95    let mut base = base.into();
96    if !base.is_empty() && base.last() != Some(&b'/') {
97        base.to_mut().push(b'/');
98    }
99    base.to_mut().extend_from_slice(path.into());
100    base
101}
102
103/// Given `input` bytes, produce a `Path` from them ignoring encoding entirely if on Unix.
104///
105/// On Windows, the input is required to be valid UTF-8, which is guaranteed if we wrote it before.
106/// There are some potential Git versions and Windows installations which produce malformed UTF-16
107/// if certain emojis are in the path. It's as rare as it sounds, but possible.
108pub fn try_from_byte_slice(input: &[u8]) -> Result<&Path, Utf8Error> {
109    #[cfg(unix)]
110    let p = {
111        use std::os::unix::ffi::OsStrExt;
112        OsStr::from_bytes(input).as_ref()
113    };
114    #[cfg(target_os = "wasi")]
115    let p: &Path = {
116        use std::os::wasi::ffi::OsStrExt;
117        OsStr::from_bytes(input).as_ref()
118    };
119    #[cfg(not(any(unix, target_os = "wasi")))]
120    let p = Path::new(std::str::from_utf8(input).map_err(|_| Utf8Error)?);
121    Ok(p)
122}
123
124/// Similar to [`from_byte_slice()`], but takes either borrowed or owned `input`.
125pub fn try_from_bstr<'a>(input: impl Into<Cow<'a, BStr>>) -> Result<Cow<'a, Path>, Utf8Error> {
126    let input = input.into();
127    match input {
128        Cow::Borrowed(input) => try_from_byte_slice(input).map(Cow::Borrowed),
129        Cow::Owned(input) => try_from_bstring(input).map(Cow::Owned),
130    }
131}
132
133/// Similar to [`try_from_bstr()`], but **panics** if malformed surrogates are encountered on Windows.
134pub fn from_bstr<'a>(input: impl Into<Cow<'a, BStr>>) -> Cow<'a, Path> {
135    try_from_bstr(input).expect("prefix path doesn't contain ill-formed UTF-8")
136}
137
138/// Similar to [`try_from_bstr()`], but takes and produces owned data.
139pub fn try_from_bstring(input: impl Into<BString>) -> Result<PathBuf, Utf8Error> {
140    let input = input.into();
141    #[cfg(unix)]
142    let p = {
143        use std::os::unix::ffi::OsStringExt;
144        std::ffi::OsString::from_vec(input.into()).into()
145    };
146    #[cfg(target_os = "wasi")]
147    let p: PathBuf = {
148        use std::os::wasi::ffi::OsStringExt;
149        std::ffi::OsString::from_vec(input.into()).into()
150    };
151    #[cfg(not(any(unix, target_os = "wasi")))]
152    let p = {
153        use bstr::ByteVec;
154        PathBuf::from(
155            {
156                let v: Vec<_> = input.into();
157                v
158            }
159            .into_string()
160            .map_err(|_| Utf8Error)?,
161        )
162    };
163    Ok(p)
164}
165
166/// Similar to [`try_from_bstring()`], but will **panic** if there is ill-formed UTF-8 in the `input`.
167pub fn from_bstring(input: impl Into<BString>) -> PathBuf {
168    try_from_bstring(input).expect("well-formed UTF-8 on windows")
169}
170
171/// Similar to [`try_from_byte_slice()`], but will **panic** if there is ill-formed UTF-8 in the `input`.
172pub fn from_byte_slice(input: &[u8]) -> &Path {
173    try_from_byte_slice(input).expect("well-formed UTF-8 on windows")
174}
175
176fn replace<'a>(path: impl Into<Cow<'a, BStr>>, find: u8, replace: u8) -> Cow<'a, BStr> {
177    let path = path.into();
178    match path {
179        Cow::Owned(mut path) => {
180            for b in path.iter_mut().filter(|b| **b == find) {
181                *b = replace;
182            }
183            path.into()
184        }
185        Cow::Borrowed(path) => {
186            if !path.contains(&find) {
187                return path.into();
188            }
189            let mut path = path.to_owned();
190            for b in path.iter_mut().filter(|b| **b == find) {
191                *b = replace;
192            }
193            path.into()
194        }
195    }
196}
197
198/// Assures the given bytes use the native path separator.
199pub fn to_native_separators<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
200    #[cfg(not(windows))]
201    let p = to_unix_separators(path);
202    #[cfg(windows)]
203    let p = to_windows_separators(path);
204    p
205}
206
207/// Convert paths with slashes to backslashes on Windows and do nothing on Unix,
208/// but **panic** if unpaired surrogates are encountered on Windows.
209pub fn to_native_path_on_windows<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, std::path::Path> {
210    #[cfg(not(windows))]
211    {
212        crate::from_bstr(path)
213    }
214    #[cfg(windows)]
215    {
216        crate::from_bstr(to_windows_separators(path))
217    }
218}
219
220/// Replace Windows path separators with slashes, but only do so on Windows.
221pub fn to_unix_separators_on_windows<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
222    #[cfg(windows)]
223    {
224        to_unix_separators(path)
225    }
226    #[cfg(not(windows))]
227    {
228        path.into()
229    }
230}
231
232/// Replace Windows path separators with slashes, which typically resembles a Unix path, unconditionally.
233///
234/// **Note** Do not use these and prefer the conditional versions of this method.
235pub fn to_unix_separators<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
236    replace(path, b'\\', b'/')
237}
238
239/// Find slashes and replace them with backslashes, unconditionally.
240///
241/// **Note** Do not use these and prefer the conditional versions of this method.
242pub fn to_windows_separators<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
243    replace(path, b'/', b'\\')
244}
245
246/// Resolve relative components virtually, eliminating intermediate `..` without accessing the filesystem.
247///
248/// For example, this turns `a/./b/c/.././..` into `a`, and turns `/a/../b/..` into `/`.
249///
250/// If the input path was relative and ends up being the `current_dir`, `.` is returned instead of
251/// the full path to `current_dir`.
252///
253/// Single `.` components as well as duplicate separators are left untouched.
254///
255/// This is particularly useful when manipulating paths that are based on user input, and not
256/// resolving intermediate symlinks keeps the path similar to what the user provided. If that's not
257/// desirable, use `[realpath()][crate::realpath()` instead.
258///
259/// Note that we might access the `current_dir` if we run out of path components to pop off, which
260/// is expected to be absolute as typical return value of `std::env::current_dir()` or
261/// `gix_fs::current_dir(…)` when `core.precomposeUnicode` is known. As a `current_dir` like `/c`
262/// can be exhausted by paths like `../../r`, `None` will be returned to indicate the inability to
263/// produce a logically consistent path.
264pub fn normalize<'a>(path: Cow<'a, Path>, current_dir: &Path) -> Option<Cow<'a, Path>> {
265    use std::path::Component::ParentDir;
266
267    if !path.components().any(|c| matches!(c, ParentDir)) {
268        return Some(path);
269    }
270    let mut current_dir_opt = Some(current_dir);
271    let was_relative = path.is_relative();
272    let components = path.components();
273    let mut path = PathBuf::new();
274    for component in components {
275        if let ParentDir = component {
276            let path_was_dot = path == Path::new(".");
277            if path.as_os_str().is_empty() || path_was_dot {
278                path.push(current_dir_opt.take()?);
279            }
280            if !path.pop() {
281                return None;
282            }
283        } else {
284            path.push(component);
285        }
286    }
287
288    if (path.as_os_str().is_empty() || path == current_dir) && was_relative {
289        Cow::Borrowed(Path::new("."))
290    } else {
291        path.into()
292    }
293    .into()
294}
295
296/// Rebuild the worktree-relative `relative_path` to be relative to `prefix`, which is the
297/// worktree-relative path equivalent to the position of the user, or current working directory.
298///
299/// This is a no-op if `prefix` is empty.
300///
301/// Note that both `relative_path` and `prefix` are assumed to be [normalized](normalize()), and
302/// failure to do so will lead to incorrect results.
303///
304/// Note that both input paths are expected to be equal in terms of case too, as comparisons will
305/// be case-sensitive.
306pub fn relativize_with_prefix<'a>(relative_path: &'a Path, prefix: &Path) -> Cow<'a, Path> {
307    if prefix.as_os_str().is_empty() {
308        return Cow::Borrowed(relative_path);
309    }
310    debug_assert!(
311        relative_path.components().all(|c| matches!(c, Component::Normal(_))),
312        "BUG: all input is expected to be normalized, but relative_path was not"
313    );
314    debug_assert!(
315        prefix.components().all(|c| matches!(c, Component::Normal(_))),
316        "BUG: all input is expected to be normalized, but prefix was not"
317    );
318
319    let mut buf = PathBuf::new();
320    let mut rpc = relative_path.components().peekable();
321    let mut equal_thus_far = true;
322    for pcomp in prefix.components() {
323        if equal_thus_far {
324            if let (Component::Normal(pname), Some(Component::Normal(rpname))) = (pcomp, rpc.peek()) {
325                if &pname == rpname {
326                    rpc.next();
327                    continue;
328                } else {
329                    equal_thus_far = false;
330                }
331            }
332        }
333        buf.push(Component::ParentDir);
334    }
335    buf.extend(rpc);
336    if buf.as_os_str().is_empty() {
337        Cow::Borrowed(Path::new("."))
338    } else {
339        Cow::Owned(buf)
340    }
341}