git_url/
parse.rs

1use std::{borrow::Cow, convert::Infallible};
2
3pub use bstr;
4use bstr::{BStr, BString, ByteSlice};
5
6use crate::Scheme;
7
8/// The Error returned by [`parse()`]
9#[derive(Debug, thiserror::Error)]
10#[allow(missing_docs)]
11pub enum Error {
12    #[error("Could not decode URL as UTF8")]
13    Utf8(#[from] std::str::Utf8Error),
14    #[error(transparent)]
15    Url(#[from] url::ParseError),
16    #[error("urls require the path to the repository")]
17    MissingResourceLocation,
18    #[error("file urls require an absolute or relative path to the repository")]
19    MissingRepositoryPath,
20    #[error("\"{url}\" is not a valid local path")]
21    NotALocalFile { url: BString },
22    #[error("Relative URLs are not permitted: {url:?}")]
23    RelativeUrl { url: String },
24}
25
26impl From<Infallible> for Error {
27    fn from(_: Infallible) -> Self {
28        unreachable!("Cannot actually happen, but it seems there can't be a blanket impl for this")
29    }
30}
31
32fn str_to_protocol(s: &str) -> Scheme {
33    Scheme::from(s)
34}
35
36fn guess_protocol(url: &[u8]) -> Option<&str> {
37    match url.find_byte(b':') {
38        Some(colon_pos) => {
39            if url[..colon_pos].find_byteset(b"@.").is_some() {
40                "ssh"
41            } else {
42                url.get(colon_pos + 1..).and_then(|from_colon| {
43                    (from_colon.contains(&b'/') || from_colon.contains(&b'\\')).then_some("file")
44                })?
45            }
46        }
47        None => "file",
48    }
49    .into()
50}
51
52/// Extract the path part from an SCP-like URL `[user@]host.xz:path/to/repo.git/`
53fn extract_scp_path(url: &str) -> Option<&str> {
54    url.splitn(2, ':').last()
55}
56
57fn sanitize_for_protocol<'a>(protocol: &str, url: &'a str) -> Cow<'a, str> {
58    match protocol {
59        "ssh" => url.replacen(':', "/", 1).into(),
60        _ => url.into(),
61    }
62}
63
64fn has_no_explicit_protocol(url: &[u8]) -> bool {
65    url.find(b"://").is_none()
66}
67
68fn to_owned_url(url: url::Url) -> Result<crate::Url, Error> {
69    Ok(crate::Url {
70        serialize_alternative_form: false,
71        scheme: str_to_protocol(url.scheme()),
72        user: if url.username().is_empty() {
73            None
74        } else {
75            Some(url.username().into())
76        },
77        host: url.host_str().map(Into::into),
78        port: url.port(),
79        path: url.path().into(),
80    })
81}
82
83/// Parse the given `bytes` as git url.
84///
85/// # Note
86///
87/// We cannot and should never have to deal with UTF-16 encoded windows strings, so bytes input is acceptable.
88/// For file-paths, we don't expect UTF8 encoding either.
89pub fn parse(input: &BStr) -> Result<crate::Url, Error> {
90    let guessed_protocol = guess_protocol(input).ok_or_else(|| Error::NotALocalFile { url: input.into() })?;
91    let path_without_file_protocol = input.strip_prefix(b"file://");
92    if path_without_file_protocol.is_some() || (has_no_explicit_protocol(input) && guessed_protocol == "file") {
93        let path: BString = path_without_file_protocol
94            .map(|stripped_path| {
95                #[cfg(windows)]
96                {
97                    if stripped_path.starts_with(b"/") {
98                        input
99                            .to_str()
100                            .ok()
101                            .and_then(|url| {
102                                let path = url::Url::parse(url).ok()?.to_file_path().ok()?;
103                                path.is_absolute().then(|| git_path::into_bstr(path).into_owned())
104                            })
105                            .unwrap_or_else(|| stripped_path.into())
106                    } else {
107                        stripped_path.into()
108                    }
109                }
110                #[cfg(not(windows))]
111                {
112                    stripped_path.into()
113                }
114            })
115            .unwrap_or_else(|| input.into());
116        if path.is_empty() {
117            return Err(Error::MissingRepositoryPath);
118        }
119        let input_starts_with_file_protocol = input.starts_with(b"file://");
120        if input_starts_with_file_protocol {
121            let wanted = cfg!(windows).then(|| &[b'\\', b'/'] as &[_]).unwrap_or(&[b'/']);
122            if !wanted.iter().any(|w| path.contains(w)) {
123                return Err(Error::MissingRepositoryPath);
124            }
125        }
126        return Ok(crate::Url {
127            scheme: Scheme::File,
128            path,
129            serialize_alternative_form: !input_starts_with_file_protocol,
130            ..Default::default()
131        });
132    }
133
134    let url_str = std::str::from_utf8(input)?;
135    let (mut url, mut scp_path) = match url::Url::parse(url_str) {
136        Ok(url) => (url, None),
137        Err(url::ParseError::RelativeUrlWithoutBase) => {
138            // happens with bare paths as well as scp like paths. The latter contain a ':' past the host portion,
139            // which we are trying to detect.
140            (
141                url::Url::parse(&format!(
142                    "{}://{}",
143                    guessed_protocol,
144                    sanitize_for_protocol(guessed_protocol, url_str)
145                ))?,
146                extract_scp_path(url_str),
147            )
148        }
149        Err(err) => return Err(err.into()),
150    };
151    // SCP like URLs without user parse as 'something' with the scheme being the 'host'. Hosts always have dots.
152    if url.scheme().find('.').is_some() {
153        // try again with prefixed protocol
154        url = url::Url::parse(&format!("ssh://{}", sanitize_for_protocol("ssh", url_str)))?;
155        scp_path = extract_scp_path(url_str);
156    }
157    if url.path().is_empty() && ["ssh", "git"].contains(&url.scheme()) {
158        return Err(Error::MissingResourceLocation);
159    }
160    if url.cannot_be_a_base() {
161        return Err(Error::RelativeUrl { url: url.into() });
162    }
163
164    let mut url = to_owned_url(url)?;
165    if let Some(path) = scp_path {
166        url.path = path.into();
167        url.serialize_alternative_form = true;
168    }
169    Ok(url)
170}