1use std::convert::Infallible;
2
3use bstr::{BStr, BString, ByteSlice};
4use percent_encoding::percent_decode_str;
5
6use crate::Scheme;
7
8#[derive(Debug, thiserror::Error)]
10#[allow(missing_docs)]
11pub enum Error {
12 #[error("{} \"{url}\" is not valid UTF-8", kind.as_str())]
13 Utf8 {
14 url: BString,
15 kind: UrlKind,
16 source: std::str::Utf8Error,
17 },
18 #[error("{} {url:?} can not be parsed as valid URL", kind.as_str())]
19 Url {
20 url: String,
21 kind: UrlKind,
22 source: crate::simple_url::UrlParseError,
23 },
24
25 #[error("The host portion of the following URL is too long ({} bytes, {len} bytes total): {truncated_url:?}", truncated_url.len())]
26 TooLong { truncated_url: BString, len: usize },
27 #[error("{} \"{url}\" does not specify a path to a repository", kind.as_str())]
28 MissingRepositoryPath { url: BString, kind: UrlKind },
29 #[error("URL {url:?} is relative which is not allowed in this context")]
30 RelativeUrl { url: String },
31}
32
33impl From<Infallible> for Error {
34 fn from(_: Infallible) -> Self {
35 unreachable!("Cannot actually happen, but it seems there can't be a blanket impl for this")
36 }
37}
38
39#[derive(Debug, Clone, Copy)]
41pub enum UrlKind {
42 Url,
44 Scp,
46 Local,
48}
49
50impl UrlKind {
51 fn as_str(&self) -> &'static str {
52 match self {
53 UrlKind::Url => "URL",
54 UrlKind::Scp => "SCP-like target",
55 UrlKind::Local => "local path",
56 }
57 }
58}
59
60pub(crate) enum InputScheme {
61 Url { protocol_end: usize },
62 Scp { colon: usize },
63 Local,
64}
65
66pub(crate) fn find_scheme(input: &BStr) -> InputScheme {
67 if let Some(protocol_end) = input.find("://") {
70 return InputScheme::Url { protocol_end };
71 }
72
73 if let Some(colon) = input.find_byte(b':') {
74 let explicitly_local = &input[..colon].contains(&b'/');
77 let dos_driver_letter = cfg!(windows) && input[..colon].len() == 1;
78
79 if !explicitly_local && !dos_driver_letter {
80 return InputScheme::Scp { colon };
81 }
82 }
83
84 InputScheme::Local
85}
86
87pub(crate) fn url(input: &BStr, protocol_end: usize) -> Result<crate::Url, Error> {
88 const MAX_LEN: usize = 1024;
89 let bytes_to_path = input[protocol_end + "://".len()..]
90 .iter()
91 .filter(|b| !b.is_ascii_whitespace())
92 .skip_while(|b| **b == b'/' || **b == b'\\')
93 .position(|b| *b == b'/')
94 .unwrap_or(input.len() - protocol_end);
95 if bytes_to_path > MAX_LEN || protocol_end > MAX_LEN {
96 return Err(Error::TooLong {
97 truncated_url: input[..(protocol_end + "://".len() + MAX_LEN).min(input.len())].into(),
98 len: input.len(),
99 });
100 }
101 let (input, url) = input_to_utf8_and_url(input, UrlKind::Url)?;
102 let scheme = Scheme::from(url.scheme.as_str());
103
104 if matches!(scheme, Scheme::Git | Scheme::Ssh) && url.path.is_empty() {
105 return Err(Error::MissingRepositoryPath {
106 url: input.into(),
107 kind: UrlKind::Url,
108 });
109 }
110
111 let path = if url.path.is_empty() && matches!(scheme, Scheme::Http | Scheme::Https) {
113 "/".into()
114 } else {
115 url.path.into()
116 };
117
118 Ok(crate::Url {
119 serialize_alternative_form: false,
120 scheme,
121 user: url_user(&url, UrlKind::Url)?,
122 password: url
123 .password
124 .map(|s| percent_decoded_utf8(s, UrlKind::Url))
125 .transpose()?,
126 host: url.host,
127 port: url.port,
128 path,
129 })
130}
131
132fn percent_decoded_utf8(s: &str, kind: UrlKind) -> Result<String, Error> {
133 Ok(percent_decode_str(s)
134 .decode_utf8()
135 .map_err(|err| Error::Utf8 {
136 url: s.into(),
137 kind,
138 source: err,
139 })?
140 .into_owned())
141}
142
143pub(crate) fn scp(input: &BStr, colon: usize) -> Result<crate::Url, Error> {
144 let input = input_to_utf8(input, UrlKind::Scp)?;
145
146 let (host, path) = input.split_at(colon);
148 debug_assert_eq!(path.get(..1), Some(":"), "{path} should start with :");
149 let path = &path[1..];
150
151 if path.is_empty() {
152 return Err(Error::MissingRepositoryPath {
153 url: input.to_owned().into(),
154 kind: UrlKind::Scp,
155 });
156 }
157
158 let url_string = format!("ssh://{host}");
163 let url = crate::simple_url::ParsedUrl::parse(&url_string).map_err(|source| Error::Url {
164 url: input.to_owned(),
165 kind: UrlKind::Scp,
166 source,
167 })?;
168
169 Ok(crate::Url {
170 serialize_alternative_form: true,
171 scheme: Scheme::from(url.scheme.as_str()),
172 user: url_user(&url, UrlKind::Scp)?,
173 password: url
174 .password
175 .map(|s| percent_decoded_utf8(s, UrlKind::Scp))
176 .transpose()?,
177 host: url.host,
178 port: url.port,
179 path: path.into(),
180 })
181}
182
183fn url_user(url: &crate::simple_url::ParsedUrl<'_>, kind: UrlKind) -> Result<Option<String>, Error> {
184 if url.username.is_empty() && url.password.is_none() {
185 Ok(None)
186 } else {
187 Ok(Some(percent_decoded_utf8(url.username, kind)?))
188 }
189}
190
191pub(crate) fn file_url(input: &BStr, protocol_colon: usize) -> Result<crate::Url, Error> {
192 let input = input_to_utf8(input, UrlKind::Url)?;
193 let input_after_protocol = &input[protocol_colon + "://".len()..];
194
195 let Some(first_slash) = input_after_protocol
196 .find('/')
197 .or_else(|| cfg!(windows).then(|| input_after_protocol.find('\\')).flatten())
198 else {
199 return Err(Error::MissingRepositoryPath {
200 url: input.to_owned().into(),
201 kind: UrlKind::Url,
202 });
203 };
204
205 let windows_special_path = if cfg!(windows) {
213 let input_after_protocol = if first_slash == 0 {
217 &input_after_protocol[1..]
218 } else {
219 input_after_protocol
220 };
221 if input_after_protocol.chars().nth(1) == Some(':') {
223 Some(input_after_protocol)
224 } else {
225 None
226 }
227 } else {
228 None
229 };
230
231 let host = if windows_special_path.is_some() || first_slash == 0 {
232 None
234 } else {
235 Some(&input_after_protocol[..first_slash])
237 };
238
239 let path = windows_special_path.unwrap_or(&input_after_protocol[first_slash..]);
241
242 Ok(crate::Url {
243 serialize_alternative_form: false,
244 host: host.map(Into::into),
245 ..local(path.into())?
246 })
247}
248
249pub(crate) fn local(input: &BStr) -> Result<crate::Url, Error> {
250 if input.is_empty() {
251 return Err(Error::MissingRepositoryPath {
252 url: input.to_owned(),
253 kind: UrlKind::Local,
254 });
255 }
256
257 Ok(crate::Url {
258 serialize_alternative_form: true,
259 scheme: Scheme::File,
260 password: None,
261 user: None,
262 host: None,
263 port: None,
264 path: input.to_owned(),
265 })
266}
267
268fn input_to_utf8(input: &BStr, kind: UrlKind) -> Result<&str, Error> {
269 std::str::from_utf8(input).map_err(|source| Error::Utf8 {
270 url: input.to_owned(),
271 kind,
272 source,
273 })
274}
275
276fn input_to_utf8_and_url(input: &BStr, kind: UrlKind) -> Result<(&str, crate::simple_url::ParsedUrl<'_>), Error> {
277 let input = input_to_utf8(input, kind)?;
278 crate::simple_url::ParsedUrl::parse(input)
279 .map(|url| (input, url))
280 .map_err(|source| {
281 match source {
284 crate::simple_url::UrlParseError::RelativeUrlWithoutBase => {
285 Error::RelativeUrl { url: input.to_owned() }
286 }
287 _ => Error::Url {
288 url: input.to_owned(),
289 kind,
290 source,
291 },
292 }
293 })
294}