use std::convert::Infallible;
use bstr::{BStr, BString, ByteSlice};
use crate::Scheme;
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("{} \"{url}\" is not valid UTF-8", kind.as_str())]
Utf8 {
url: BString,
kind: UrlKind,
source: std::str::Utf8Error,
},
#[error("{} {url:?} can not be parsed as valid URL", kind.as_str())]
Url {
url: String,
kind: UrlKind,
source: crate::simple_url::UrlParseError,
},
#[error("The host portion of the following URL is too long ({} bytes, {len} bytes total): {truncated_url:?}", truncated_url.len())]
TooLong { truncated_url: BString, len: usize },
#[error("{} \"{url}\" does not specify a path to a repository", kind.as_str())]
MissingRepositoryPath { url: BString, kind: UrlKind },
#[error("URL {url:?} is relative which is not allowed in this context")]
RelativeUrl { url: String },
}
impl From<Infallible> for Error {
fn from(_: Infallible) -> Self {
unreachable!("Cannot actually happen, but it seems there can't be a blanket impl for this")
}
}
#[derive(Debug, Clone, Copy)]
pub enum UrlKind {
Url,
Scp,
Local,
}
impl UrlKind {
fn as_str(&self) -> &'static str {
match self {
UrlKind::Url => "URL",
UrlKind::Scp => "SCP-like target",
UrlKind::Local => "local path",
}
}
}
pub(crate) enum InputScheme {
Url { protocol_end: usize },
Scp { colon: usize },
Local,
}
pub(crate) fn find_scheme(input: &BStr) -> InputScheme {
if let Some(protocol_end) = input.find("://") {
return InputScheme::Url { protocol_end };
}
let colon = if input.starts_with(b"[") {
if let Some(bracket_end) = input.find_byte(b']') {
input[bracket_end + 1..]
.find_byte(b':')
.map(|pos| bracket_end + 1 + pos)
} else {
input.find_byte(b':')
}
} else {
input.find_byte(b':')
};
if let Some(colon) = colon {
let explicitly_local = &input[..colon].contains(&b'/');
let dos_driver_letter = cfg!(windows) && input[..colon].len() == 1;
if !explicitly_local && !dos_driver_letter {
return InputScheme::Scp { colon };
}
}
InputScheme::Local
}
pub(crate) fn url(input: &BStr, protocol_end: usize) -> Result<crate::Url, Error> {
const MAX_LEN: usize = 1024;
let bytes_to_path = input[protocol_end + "://".len()..]
.iter()
.filter(|b| !b.is_ascii_whitespace())
.skip_while(|b| **b == b'/' || **b == b'\\')
.position(|b| *b == b'/')
.unwrap_or(input.len() - protocol_end);
if bytes_to_path > MAX_LEN || protocol_end > MAX_LEN {
return Err(Error::TooLong {
truncated_url: input[..(protocol_end + "://".len() + MAX_LEN).min(input.len())].into(),
len: input.len(),
});
}
let (input, url) = input_to_utf8_and_url(input, UrlKind::Url)?;
let scheme = Scheme::from(url.scheme.as_str());
if matches!(scheme, Scheme::Git | Scheme::Ssh) && url.path.is_empty() {
return Err(Error::MissingRepositoryPath {
url: input.into(),
kind: UrlKind::Url,
});
}
let path: BString = if url.path.is_empty() && matches!(scheme, Scheme::Http | Scheme::Https) {
"/".into()
} else if matches!(scheme, Scheme::Ssh | Scheme::Git) && url.path.starts_with("/~") {
url.path[1..].into()
} else {
url.path.into()
};
let user = if url.username.is_empty() && url.password.is_none() {
None
} else {
Some(url.username)
};
let password = url.password;
let port = url.port;
let host = if scheme == Scheme::Ssh {
url.host.map(|mut h| {
if let Some(h2) = h.strip_prefix('[') {
if let Some(inner) = h2.strip_suffix("]:") {
h = inner.to_string();
} else if let Some(inner) = h2.strip_suffix(']') {
h = inner.to_string();
}
} else {
let colon_count = h.chars().filter(|&c| c == ':').take(2).count();
if colon_count == 1 {
if let Some(inner) = h.strip_suffix(':') {
h = inner.to_string();
}
}
}
h
})
} else {
url.host
};
Ok(crate::Url {
serialize_alternative_form: false,
scheme,
user,
password,
host,
port,
path,
})
}
pub(crate) fn scp(input: &BStr, colon: usize) -> Result<crate::Url, Error> {
let input = input_to_utf8(input, UrlKind::Scp)?;
let (host, path) = input.split_at(colon);
debug_assert_eq!(path.get(..1), Some(":"), "{path} should start with :");
let path = &path[1..];
if path.is_empty() {
return Err(Error::MissingRepositoryPath {
url: input.to_owned().into(),
kind: UrlKind::Scp,
});
}
let url_string = format!("ssh://{host}");
let url = crate::simple_url::ParsedUrl::parse(&url_string).map_err(|source| Error::Url {
url: input.to_owned(),
kind: UrlKind::Scp,
source,
})?;
let path = if path.starts_with("/~") { &path[1..] } else { path };
let user = if url.username.is_empty() && url.password.is_none() {
None
} else {
Some(url.username)
};
let password = url.password;
let port = url.port;
let host = url.host.map(|h| {
if let Some(h) = h.strip_prefix("[").and_then(|h| h.strip_suffix("]")) {
h.to_string()
} else {
h
}
});
Ok(crate::Url {
serialize_alternative_form: true,
scheme: Scheme::from(url.scheme.as_str()),
user,
password,
host,
port,
path: path.into(),
})
}
pub(crate) fn file_url(input: &BStr, protocol_colon: usize) -> Result<crate::Url, Error> {
let input = input_to_utf8(input, UrlKind::Url)?;
let input_after_protocol = &input[protocol_colon + "://".len()..];
let Some(first_slash) = input_after_protocol
.find('/')
.or_else(|| cfg!(windows).then(|| input_after_protocol.find('\\')).flatten())
else {
return Err(Error::MissingRepositoryPath {
url: input.to_owned().into(),
kind: UrlKind::Url,
});
};
let windows_special_path = if cfg!(windows) {
let input_after_protocol = if first_slash == 0 {
&input_after_protocol[1..]
} else {
input_after_protocol
};
if input_after_protocol.chars().nth(1) == Some(':') {
Some(input_after_protocol)
} else {
None
}
} else {
None
};
let host = if windows_special_path.is_some() || first_slash == 0 {
None
} else {
Some(&input_after_protocol[..first_slash])
};
let path = windows_special_path.unwrap_or(&input_after_protocol[first_slash..]);
Ok(crate::Url {
serialize_alternative_form: false,
host: host.map(Into::into),
..local(path.into())?
})
}
pub(crate) fn local(input: &BStr) -> Result<crate::Url, Error> {
if input.is_empty() {
return Err(Error::MissingRepositoryPath {
url: input.to_owned(),
kind: UrlKind::Local,
});
}
Ok(crate::Url {
serialize_alternative_form: true,
scheme: Scheme::File,
password: None,
user: None,
host: None,
port: None,
path: input.to_owned(),
})
}
fn input_to_utf8(input: &BStr, kind: UrlKind) -> Result<&str, Error> {
std::str::from_utf8(input).map_err(|source| Error::Utf8 {
url: input.to_owned(),
kind,
source,
})
}
fn input_to_utf8_and_url(input: &BStr, kind: UrlKind) -> Result<(&str, crate::simple_url::ParsedUrl), Error> {
let input = input_to_utf8(input, kind)?;
crate::simple_url::ParsedUrl::parse(input)
.map(|url| (input, url))
.map_err(|source| {
match source {
crate::simple_url::UrlParseError::RelativeUrlWithoutBase => {
Error::RelativeUrl { url: input.to_owned() }
}
_ => Error::Url {
url: input.to_owned(),
kind,
source,
},
}
})
}