1use std::convert::Infallible;
2
3use bstr::{BStr, BString, ByteSlice};
4use percent_encoding::percent_decode_str;
5
6use crate::Scheme;
7
8#[derive(Debug, thiserror::Error)]
10#[allow(missing_docs)]
11pub enum Error {
12 #[error("{} \"{url}\" is not valid UTF-8", kind.as_str())]
13 Utf8 {
14 url: BString,
15 kind: UrlKind,
16 source: std::str::Utf8Error,
17 },
18 #[error("{} {url:?} can not be parsed as valid URL", kind.as_str())]
19 Url {
20 url: String,
21 kind: UrlKind,
22 source: crate::simple_url::UrlParseError,
23 },
24
25 #[error("The host portion of the following URL is too long ({} bytes, {len} bytes total): {truncated_url:?}", truncated_url.len())]
26 TooLong { truncated_url: BString, len: usize },
27 #[error("{} \"{url}\" does not specify a path to a repository", kind.as_str())]
28 MissingRepositoryPath { url: BString, kind: UrlKind },
29 #[error("URL {url:?} is relative which is not allowed in this context")]
30 RelativeUrl { url: String },
31}
32
33impl From<Infallible> for Error {
34 fn from(_: Infallible) -> Self {
35 unreachable!("Cannot actually happen, but it seems there can't be a blanket impl for this")
36 }
37}
38
39#[derive(Debug, Clone, Copy)]
41pub enum UrlKind {
42 Url,
44 Scp,
46 Local,
48}
49
50impl UrlKind {
51 fn as_str(&self) -> &'static str {
52 match self {
53 UrlKind::Url => "URL",
54 UrlKind::Scp => "SCP-like target",
55 UrlKind::Local => "local path",
56 }
57 }
58}
59
60pub(crate) enum InputScheme {
61 Url { protocol_end: usize },
62 Scp { colon: usize },
63 Local,
64}
65
66pub(crate) fn find_scheme(input: &BStr) -> InputScheme {
67 if let Some(protocol_end) = input.find("://") {
70 return InputScheme::Url { protocol_end };
71 }
72
73 let colon = if input.starts_with(b"[") {
75 if let Some(bracket_end) = input.find_byte(b']') {
77 input[bracket_end + 1..]
79 .find_byte(b':')
80 .map(|pos| bracket_end + 1 + pos)
81 } else {
82 input.find_byte(b':')
84 }
85 } else {
86 input.find_byte(b':')
87 };
88
89 if let Some(colon) = colon {
90 let explicitly_local = &input[..colon].contains(&b'/');
93 let dos_driver_letter = cfg!(windows) && input[..colon].len() == 1;
94
95 if !explicitly_local && !dos_driver_letter {
96 return InputScheme::Scp { colon };
97 }
98 }
99
100 InputScheme::Local
101}
102
103pub(crate) fn url(input: &BStr, protocol_end: usize) -> Result<crate::Url, Error> {
104 const MAX_LEN: usize = 1024;
105 let bytes_to_path = input[protocol_end + "://".len()..]
106 .iter()
107 .filter(|b| !b.is_ascii_whitespace())
108 .skip_while(|b| **b == b'/' || **b == b'\\')
109 .position(|b| *b == b'/')
110 .unwrap_or(input.len() - protocol_end);
111 if bytes_to_path > MAX_LEN || protocol_end > MAX_LEN {
112 return Err(Error::TooLong {
113 truncated_url: input[..(protocol_end + "://".len() + MAX_LEN).min(input.len())].into(),
114 len: input.len(),
115 });
116 }
117 let (input, url) = input_to_utf8_and_url(input, UrlKind::Url)?;
118 let scheme = Scheme::from(url.scheme.as_str());
119
120 if matches!(scheme, Scheme::Git | Scheme::Ssh) && url.path.is_empty() {
121 return Err(Error::MissingRepositoryPath {
122 url: input.into(),
123 kind: UrlKind::Url,
124 });
125 }
126
127 let path = if url.path.is_empty() && matches!(scheme, Scheme::Http | Scheme::Https) {
129 "/".into()
130 } else if matches!(scheme, Scheme::Ssh | Scheme::Git) && url.path.starts_with("/~") {
131 url.path[1..].into()
134 } else {
135 url.path.into()
136 };
137
138 let user = url_user(&url, UrlKind::Url)?;
139 let password = url
140 .password
141 .map(|s| percent_decoded_utf8(s, UrlKind::Url))
142 .transpose()?;
143 let port = url.port;
144
145 let host = if scheme == Scheme::Ssh {
147 url.host.map(|mut h| {
148 if let Some(h2) = h.strip_prefix('[') {
150 if let Some(inner) = h2.strip_suffix("]:") {
151 h = inner.to_string();
153 } else if let Some(inner) = h2.strip_suffix(']') {
154 h = inner.to_string();
156 }
157 } else {
158 let colon_count = h.chars().filter(|&c| c == ':').take(2).count();
160 if colon_count == 1 {
161 if let Some(inner) = h.strip_suffix(':') {
162 h = inner.to_string();
163 }
164 }
165 }
166 h
167 })
168 } else {
169 url.host
170 };
171 Ok(crate::Url {
172 serialize_alternative_form: false,
173 scheme,
174 user,
175 password,
176 host,
177 port,
178 path,
179 })
180}
181
182fn percent_decoded_utf8(s: &str, kind: UrlKind) -> Result<String, Error> {
183 Ok(percent_decode_str(s)
184 .decode_utf8()
185 .map_err(|err| Error::Utf8 {
186 url: s.into(),
187 kind,
188 source: err,
189 })?
190 .into_owned())
191}
192
193pub(crate) fn scp(input: &BStr, colon: usize) -> Result<crate::Url, Error> {
194 let input = input_to_utf8(input, UrlKind::Scp)?;
195
196 let (host, path) = input.split_at(colon);
198 debug_assert_eq!(path.get(..1), Some(":"), "{path} should start with :");
199 let path = &path[1..];
200
201 if path.is_empty() {
202 return Err(Error::MissingRepositoryPath {
203 url: input.to_owned().into(),
204 kind: UrlKind::Scp,
205 });
206 }
207
208 let url_string = format!("ssh://{host}");
213 let url = crate::simple_url::ParsedUrl::parse(&url_string).map_err(|source| Error::Url {
214 url: input.to_owned(),
215 kind: UrlKind::Scp,
216 source,
217 })?;
218
219 let path = if path.starts_with("/~") { &path[1..] } else { path };
222
223 let user = url_user(&url, UrlKind::Scp)?;
224 let password = url
225 .password
226 .map(|s| percent_decoded_utf8(s, UrlKind::Scp))
227 .transpose()?;
228 let port = url.port;
229
230 let host = url.host.map(|h| {
232 if let Some(h) = h.strip_prefix("[").and_then(|h| h.strip_suffix("]")) {
233 h.to_string()
234 } else {
235 h
236 }
237 });
238
239 Ok(crate::Url {
240 serialize_alternative_form: true,
241 scheme: Scheme::from(url.scheme.as_str()),
242 user,
243 password,
244 host,
245 port,
246 path: path.into(),
247 })
248}
249
250fn url_user(url: &crate::simple_url::ParsedUrl<'_>, kind: UrlKind) -> Result<Option<String>, Error> {
251 if url.username.is_empty() && url.password.is_none() {
252 Ok(None)
253 } else {
254 Ok(Some(percent_decoded_utf8(url.username, kind)?))
255 }
256}
257
258pub(crate) fn file_url(input: &BStr, protocol_colon: usize) -> Result<crate::Url, Error> {
259 let input = input_to_utf8(input, UrlKind::Url)?;
260 let input_after_protocol = &input[protocol_colon + "://".len()..];
261
262 let Some(first_slash) = input_after_protocol
263 .find('/')
264 .or_else(|| cfg!(windows).then(|| input_after_protocol.find('\\')).flatten())
265 else {
266 return Err(Error::MissingRepositoryPath {
267 url: input.to_owned().into(),
268 kind: UrlKind::Url,
269 });
270 };
271
272 let windows_special_path = if cfg!(windows) {
280 let input_after_protocol = if first_slash == 0 {
284 &input_after_protocol[1..]
285 } else {
286 input_after_protocol
287 };
288 if input_after_protocol.chars().nth(1) == Some(':') {
290 Some(input_after_protocol)
291 } else {
292 None
293 }
294 } else {
295 None
296 };
297
298 let host = if windows_special_path.is_some() || first_slash == 0 {
299 None
301 } else {
302 Some(&input_after_protocol[..first_slash])
304 };
305
306 let path = windows_special_path.unwrap_or(&input_after_protocol[first_slash..]);
308
309 Ok(crate::Url {
310 serialize_alternative_form: false,
311 host: host.map(Into::into),
312 ..local(path.into())?
313 })
314}
315
316pub(crate) fn local(input: &BStr) -> Result<crate::Url, Error> {
317 if input.is_empty() {
318 return Err(Error::MissingRepositoryPath {
319 url: input.to_owned(),
320 kind: UrlKind::Local,
321 });
322 }
323
324 Ok(crate::Url {
325 serialize_alternative_form: true,
326 scheme: Scheme::File,
327 password: None,
328 user: None,
329 host: None,
330 port: None,
331 path: input.to_owned(),
332 })
333}
334
335fn input_to_utf8(input: &BStr, kind: UrlKind) -> Result<&str, Error> {
336 std::str::from_utf8(input).map_err(|source| Error::Utf8 {
337 url: input.to_owned(),
338 kind,
339 source,
340 })
341}
342
343fn input_to_utf8_and_url(input: &BStr, kind: UrlKind) -> Result<(&str, crate::simple_url::ParsedUrl<'_>), Error> {
344 let input = input_to_utf8(input, kind)?;
345 crate::simple_url::ParsedUrl::parse(input)
346 .map(|url| (input, url))
347 .map_err(|source| {
348 match source {
351 crate::simple_url::UrlParseError::RelativeUrlWithoutBase => {
352 Error::RelativeUrl { url: input.to_owned() }
353 }
354 _ => Error::Url {
355 url: input.to_owned(),
356 kind,
357 source,
358 },
359 }
360 })
361}