1use std::convert::Infallible;
2
3use bstr::{BStr, BString, ByteSlice};
4use percent_encoding::percent_decode_str;
5
6use crate::Scheme;
7
8#[derive(Debug, thiserror::Error)]
10#[allow(missing_docs)]
11pub enum Error {
12 #[error("{} \"{url}\" is not valid UTF-8", kind.as_str())]
13 Utf8 {
14 url: BString,
15 kind: UrlKind,
16 source: std::str::Utf8Error,
17 },
18 #[error("{} {url:?} can not be parsed as valid URL", kind.as_str())]
19 Url {
20 url: String,
21 kind: UrlKind,
22 source: url::ParseError,
23 },
24
25 #[error("The host portion of the following URL is too long ({} bytes, {len} bytes total): {truncated_url:?}", truncated_url.len())]
26 TooLong { truncated_url: BString, len: usize },
27 #[error("{} \"{url}\" does not specify a path to a repository", kind.as_str())]
28 MissingRepositoryPath { url: BString, kind: UrlKind },
29 #[error("URL {url:?} is relative which is not allowed in this context")]
30 RelativeUrl { url: String },
31}
32
33impl From<Infallible> for Error {
34 fn from(_: Infallible) -> Self {
35 unreachable!("Cannot actually happen, but it seems there can't be a blanket impl for this")
36 }
37}
38
39#[derive(Debug, Clone, Copy)]
41pub enum UrlKind {
42 Url,
44 Scp,
46 Local,
48}
49
50impl UrlKind {
51 fn as_str(&self) -> &'static str {
52 match self {
53 UrlKind::Url => "URL",
54 UrlKind::Scp => "SCP-like target",
55 UrlKind::Local => "local path",
56 }
57 }
58}
59
60pub(crate) enum InputScheme {
61 Url { protocol_end: usize },
62 Scp { colon: usize },
63 Local,
64}
65
66pub(crate) fn find_scheme(input: &BStr) -> InputScheme {
67 if let Some(protocol_end) = input.find("://") {
70 return InputScheme::Url { protocol_end };
71 }
72
73 if let Some(colon) = input.find_byte(b':') {
74 let explicitly_local = &input[..colon].contains(&b'/');
77 let dos_driver_letter = cfg!(windows) && input[..colon].len() == 1;
78
79 if !explicitly_local && !dos_driver_letter {
80 return InputScheme::Scp { colon };
81 }
82 }
83
84 InputScheme::Local
85}
86
87pub(crate) fn url(input: &BStr, protocol_end: usize) -> Result<crate::Url, Error> {
88 const MAX_LEN: usize = 1024;
89 let bytes_to_path = input[protocol_end + "://".len()..]
90 .iter()
91 .filter(|b| !b.is_ascii_whitespace())
92 .skip_while(|b| **b == b'/' || **b == b'\\')
93 .position(|b| *b == b'/')
94 .unwrap_or(input.len() - protocol_end);
95 if bytes_to_path > MAX_LEN || protocol_end > MAX_LEN {
96 return Err(Error::TooLong {
97 truncated_url: input[..(protocol_end + "://".len() + MAX_LEN).min(input.len())].into(),
98 len: input.len(),
99 });
100 }
101 let (input, url) = input_to_utf8_and_url(input, UrlKind::Url)?;
102 let scheme = url.scheme().into();
103
104 if matches!(scheme, Scheme::Git | Scheme::Ssh) && url.path().is_empty() {
105 return Err(Error::MissingRepositoryPath {
106 url: input.into(),
107 kind: UrlKind::Url,
108 });
109 }
110
111 if url.cannot_be_a_base() {
112 return Err(Error::RelativeUrl { url: input.to_owned() });
113 }
114
115 Ok(crate::Url {
116 serialize_alternative_form: false,
117 scheme,
118 user: url_user(&url, UrlKind::Url)?,
119 password: url
120 .password()
121 .map(|s| percent_decoded_utf8(s, UrlKind::Url))
122 .transpose()?,
123 host: url.host_str().map(Into::into),
124 port: url.port(),
125 path: url.path().into(),
126 })
127}
128
129fn percent_decoded_utf8(s: &str, kind: UrlKind) -> Result<String, Error> {
130 Ok(percent_decode_str(s)
131 .decode_utf8()
132 .map_err(|err| Error::Utf8 {
133 url: s.into(),
134 kind,
135 source: err,
136 })?
137 .into_owned())
138}
139
140pub(crate) fn scp(input: &BStr, colon: usize) -> Result<crate::Url, Error> {
141 let input = input_to_utf8(input, UrlKind::Scp)?;
142
143 let (host, path) = input.split_at(colon);
145 debug_assert_eq!(path.get(..1), Some(":"), "{path} should start with :");
146 let path = &path[1..];
147
148 if path.is_empty() {
149 return Err(Error::MissingRepositoryPath {
150 url: input.to_owned().into(),
151 kind: UrlKind::Scp,
152 });
153 }
154
155 let url = url::Url::parse(&format!("ssh://{host}")).map_err(|source| Error::Url {
160 url: input.to_owned(),
161 kind: UrlKind::Scp,
162 source,
163 })?;
164
165 Ok(crate::Url {
166 serialize_alternative_form: true,
167 scheme: url.scheme().into(),
168 user: url_user(&url, UrlKind::Scp)?,
169 password: url
170 .password()
171 .map(|s| percent_decoded_utf8(s, UrlKind::Scp))
172 .transpose()?,
173 host: url.host_str().map(Into::into),
174 port: url.port(),
175 path: path.into(),
176 })
177}
178
179fn url_user(url: &url::Url, kind: UrlKind) -> Result<Option<String>, Error> {
180 if url.username().is_empty() && url.password().is_none() {
181 Ok(None)
182 } else {
183 Ok(Some(percent_decoded_utf8(url.username(), kind)?))
184 }
185}
186
187pub(crate) fn file_url(input: &BStr, protocol_colon: usize) -> Result<crate::Url, Error> {
188 let input = input_to_utf8(input, UrlKind::Url)?;
189 let input_after_protocol = &input[protocol_colon + "://".len()..];
190
191 let Some(first_slash) = input_after_protocol
192 .find('/')
193 .or_else(|| cfg!(windows).then(|| input_after_protocol.find('\\')).flatten())
194 else {
195 return Err(Error::MissingRepositoryPath {
196 url: input.to_owned().into(),
197 kind: UrlKind::Url,
198 });
199 };
200
201 let windows_special_path = if cfg!(windows) {
209 let input_after_protocol = if first_slash == 0 {
213 &input_after_protocol[1..]
214 } else {
215 input_after_protocol
216 };
217 if input_after_protocol.chars().nth(1) == Some(':') {
219 Some(input_after_protocol)
220 } else {
221 None
222 }
223 } else {
224 None
225 };
226
227 let host = if windows_special_path.is_some() || first_slash == 0 {
228 None
230 } else {
231 Some(&input_after_protocol[..first_slash])
233 };
234
235 let path = windows_special_path.unwrap_or(&input_after_protocol[first_slash..]);
237
238 Ok(crate::Url {
239 serialize_alternative_form: false,
240 host: host.map(Into::into),
241 ..local(path.into())?
242 })
243}
244
245pub(crate) fn local(input: &BStr) -> Result<crate::Url, Error> {
246 if input.is_empty() {
247 return Err(Error::MissingRepositoryPath {
248 url: input.to_owned(),
249 kind: UrlKind::Local,
250 });
251 }
252
253 Ok(crate::Url {
254 serialize_alternative_form: true,
255 scheme: Scheme::File,
256 password: None,
257 user: None,
258 host: None,
259 port: None,
260 path: input.to_owned(),
261 })
262}
263
264fn input_to_utf8(input: &BStr, kind: UrlKind) -> Result<&str, Error> {
265 std::str::from_utf8(input).map_err(|source| Error::Utf8 {
266 url: input.to_owned(),
267 kind,
268 source,
269 })
270}
271
272fn input_to_utf8_and_url(input: &BStr, kind: UrlKind) -> Result<(&str, url::Url), Error> {
273 let input = input_to_utf8(input, kind)?;
274 url::Url::parse(input)
275 .map(|url| (input, url))
276 .map_err(|source| Error::Url {
277 url: input.to_owned(),
278 kind,
279 source,
280 })
281}