git_path/
convert.rs

1use std::{
2    borrow::Cow,
3    ffi::{OsStr, OsString},
4    path::{Path, PathBuf},
5};
6
7use bstr::{BStr, BString};
8
9#[derive(Debug)]
10/// The error type returned by [`into_bstr()`] and others may suffer from failed conversions from or to bytes.
11pub struct Utf8Error;
12
13impl std::fmt::Display for Utf8Error {
14    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
15        f.write_str("Could not convert to UTF8 or from UTF8 due to ill-formed input")
16    }
17}
18
19impl std::error::Error for Utf8Error {}
20
21/// Like [`into_bstr()`], but takes `OsStr` as input for a lossless, but fallible, conversion.
22pub fn os_str_into_bstr(path: &OsStr) -> Result<&BStr, Utf8Error> {
23    let path = try_into_bstr(Cow::Borrowed(path.as_ref()))?;
24    match path {
25        Cow::Borrowed(path) => Ok(path),
26        Cow::Owned(_) => unreachable!("borrowed cows stay borrowed"),
27    }
28}
29
30/// Like [`into_bstr()`], but takes `OsString` as input for a lossless, but fallible, conversion.
31pub fn os_string_into_bstring(path: OsString) -> Result<BString, Utf8Error> {
32    let path = try_into_bstr(Cow::Owned(path.into()))?;
33    match path {
34        Cow::Borrowed(_path) => unreachable!("borrowed cows stay borrowed"),
35        Cow::Owned(path) => Ok(path),
36    }
37}
38
39/// Convert the given path either into its raw bytes on unix or its UTF8 encoded counterpart on windows.
40///
41/// On windows, if the source Path contains ill-formed, lone surrogates, the UTF-8 conversion will fail
42/// causing `Utf8Error` to be returned.
43pub fn try_into_bstr<'a>(path: impl Into<Cow<'a, Path>>) -> Result<Cow<'a, BStr>, Utf8Error> {
44    let path = path.into();
45    let path_str = match path {
46        Cow::Owned(path) => Cow::Owned({
47            #[cfg(unix)]
48            let p: BString = {
49                use std::os::unix::ffi::OsStringExt;
50                path.into_os_string().into_vec().into()
51            };
52            #[cfg(target_os = "wasi")]
53            let p: BString = {
54                use std::os::wasi::ffi::OsStringExt;
55                path.into_os_string().into_vec().into()
56            };
57            #[cfg(not(any(unix, target_os = "wasi")))]
58            let p: BString = path.into_os_string().into_string().map_err(|_| Utf8Error)?.into();
59            p
60        }),
61        Cow::Borrowed(path) => Cow::Borrowed({
62            #[cfg(unix)]
63            let p: &BStr = {
64                use std::os::unix::ffi::OsStrExt;
65                path.as_os_str().as_bytes().into()
66            };
67            #[cfg(target_os = "wasi")]
68            let p: &BStr = {
69                use std::os::wasi::ffi::OsStrExt;
70                path.as_os_str().as_bytes().into()
71            };
72            #[cfg(not(any(unix, target_os = "wasi")))]
73            let p: &BStr = path.to_str().ok_or(Utf8Error)?.as_bytes().into();
74            p
75        }),
76    };
77    Ok(path_str)
78}
79
80/// Similar to [`try_into_bstr()`] but **panics** if malformed surrogates are encountered on windows.
81pub fn into_bstr<'a>(path: impl Into<Cow<'a, Path>>) -> Cow<'a, BStr> {
82    try_into_bstr(path).expect("prefix path doesn't contain ill-formed UTF-8")
83}
84
85/// Given `input` bytes, produce a `Path` from them ignoring encoding entirely if on unix.
86///
87/// On windows, the input is required to be valid UTF-8, which is guaranteed if we wrote it before. There are some potential
88/// git versions and windows installation which produce mal-formed UTF-16 if certain emojies are in the path. It's as rare as
89/// it sounds, but possible.
90pub fn try_from_byte_slice(input: &[u8]) -> Result<&Path, Utf8Error> {
91    #[cfg(unix)]
92    let p = {
93        use std::os::unix::ffi::OsStrExt;
94        OsStr::from_bytes(input).as_ref()
95    };
96    #[cfg(target_os = "wasi")]
97    let p: &Path = {
98        use std::os::wasi::ffi::OsStrExt;
99        OsStr::from_bytes(input).as_ref()
100    };
101    #[cfg(not(any(unix, target_os = "wasi")))]
102    let p = Path::new(std::str::from_utf8(input).map_err(|_| Utf8Error)?);
103    Ok(p)
104}
105
106/// Similar to [`from_byte_slice()`], but takes either borrowed or owned `input`.
107pub fn try_from_bstr<'a>(input: impl Into<Cow<'a, BStr>>) -> Result<Cow<'a, Path>, Utf8Error> {
108    let input = input.into();
109    match input {
110        Cow::Borrowed(input) => try_from_byte_slice(input).map(Cow::Borrowed),
111        Cow::Owned(input) => try_from_bstring(input).map(Cow::Owned),
112    }
113}
114
115/// Similar to [`try_from_bstr()`], but **panics** if malformed surrogates are encountered on windows.
116pub fn from_bstr<'a>(input: impl Into<Cow<'a, BStr>>) -> Cow<'a, Path> {
117    try_from_bstr(input).expect("prefix path doesn't contain ill-formed UTF-8")
118}
119
120/// Similar to [`try_from_bstr()`], but takes and produces owned data.
121pub fn try_from_bstring(input: impl Into<BString>) -> Result<PathBuf, Utf8Error> {
122    let input = input.into();
123    #[cfg(unix)]
124    let p = {
125        use std::os::unix::ffi::OsStringExt;
126        std::ffi::OsString::from_vec(input.into()).into()
127    };
128    #[cfg(target_os = "wasi")]
129    let p: PathBuf = {
130        use std::os::wasi::ffi::OsStringExt;
131        std::ffi::OsString::from_vec(input.into()).into()
132    };
133    #[cfg(not(any(unix, target_os = "wasi")))]
134    let p = {
135        use bstr::ByteVec;
136        PathBuf::from(
137            {
138                let v: Vec<_> = input.into();
139                v
140            }
141            .into_string()
142            .map_err(|_| Utf8Error)?,
143        )
144    };
145    Ok(p)
146}
147
148/// Similar to [`try_from_bstring()`], but will **panic** if there is ill-formed UTF-8 in the `input`.
149pub fn from_bstring(input: impl Into<BString>) -> PathBuf {
150    try_from_bstring(input).expect("well-formed UTF-8 on windows")
151}
152
153/// Similar to [`try_from_byte_slice()`], but will **panic** if there is ill-formed UTF-8 in the `input`.
154pub fn from_byte_slice(input: &[u8]) -> &Path {
155    try_from_byte_slice(input).expect("well-formed UTF-8 on windows")
156}
157
158fn replace<'a>(path: impl Into<Cow<'a, BStr>>, find: u8, replace: u8) -> Cow<'a, BStr> {
159    let path = path.into();
160    match path {
161        Cow::Owned(mut path) => {
162            for b in path.iter_mut().filter(|b| **b == find) {
163                *b = replace;
164            }
165            path.into()
166        }
167        Cow::Borrowed(path) => {
168            if !path.contains(&find) {
169                return path.into();
170            }
171            let mut path = path.to_owned();
172            for b in path.iter_mut().filter(|b| **b == find) {
173                *b = replace;
174            }
175            path.into()
176        }
177    }
178}
179
180/// Assures the given bytes use the native path separator.
181pub fn to_native_separators<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
182    #[cfg(not(windows))]
183    let p = to_unix_separators(path);
184    #[cfg(windows)]
185    let p = to_windows_separators(path);
186    p
187}
188
189/// Convert paths with slashes to backslashes on windows and do nothing on unix, but **panics** if malformed surrogates are encountered on windows.
190pub fn to_native_path_on_windows<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, std::path::Path> {
191    #[cfg(not(windows))]
192    {
193        crate::from_bstr(path)
194    }
195    #[cfg(windows)]
196    {
197        crate::from_bstr(to_windows_separators(path))
198    }
199}
200
201/// Replaces windows path separators with slashes, but only do so on windows.
202pub fn to_unix_separators_on_windows<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
203    #[cfg(windows)]
204    {
205        replace(path, b'\\', b'/')
206    }
207    #[cfg(not(windows))]
208    {
209        path.into()
210    }
211}
212
213/// Replaces windows path separators with slashes, unconditionally.
214///
215/// **Note** Do not use these and prefer the conditional versions of this method.
216// TODO: use https://lib.rs/crates/path-slash to handle escapes
217pub fn to_unix_separators<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
218    replace(path, b'\\', b'/')
219}
220
221/// Find backslashes and replace them with slashes, which typically resembles a unix path, unconditionally.
222///
223/// **Note** Do not use these and prefer the conditional versions of this method.
224// TODO: use https://lib.rs/crates/path-slash to handle escapes
225pub fn to_windows_separators<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
226    replace(path, b'/', b'\\')
227}
228
229/// Resolve relative components virtually without accessing the file system, e.g. turn `a/./b/c/.././..` into `a`,
230/// without keeping intermediate `..` and `/a/../b/..` becomes `/`.
231/// If the input path was relative and ends up being the `current_dir`, `.` is returned instead of the full path to `current_dir`.
232///
233/// This is particularly useful when manipulating paths that are based on user input, and not resolving intermediate
234/// symlinks keeps the path similar to what the user provided. If that's not desirable, use `[realpath()][crate::realpath()`
235/// instead.
236///
237/// Note that we might access the `current_dir` if we run out of path components to pop off, which is expected to be absolute
238/// as typical return value of `std::env::current_dir()`.
239/// As a `current_dir` like `/c` can be exhausted by paths like `../../r`, `None` will be returned to indicate the inability
240/// to produce a logically consistent path.
241pub fn normalize<'a>(path: impl Into<Cow<'a, Path>>, current_dir: impl AsRef<Path>) -> Option<Cow<'a, Path>> {
242    use std::path::Component::ParentDir;
243
244    let path = path.into();
245    if !path.components().any(|c| matches!(c, ParentDir)) {
246        return Some(path);
247    }
248    let current_dir = current_dir.as_ref();
249    let mut current_dir_opt = Some(current_dir);
250    let was_relative = path.is_relative();
251    let components = path.components();
252    let mut path = PathBuf::new();
253    for component in components {
254        if let ParentDir = component {
255            let path_was_dot = path == Path::new(".");
256            if path.as_os_str().is_empty() || path_was_dot {
257                path.push(current_dir_opt.take()?);
258            }
259            if !path.pop() {
260                return None;
261            }
262        } else {
263            path.push(component)
264        }
265    }
266
267    if (path.as_os_str().is_empty() || path == current_dir) && was_relative {
268        Cow::Borrowed(Path::new("."))
269    } else {
270        path.into()
271    }
272    .into()
273}