1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
use crate::Scheme;
use bstr::ByteSlice;
use quick_error::quick_error;
use std::borrow::Cow;
quick_error! {
#[derive(Debug)]
pub enum Error {
Utf8(err: std::str::Utf8Error) {
display("Could not decode URL as UTF8")
from()
source(err)
}
Url(err: String) {
display("the URL could not be parsed: {}", err)
}
UnsupportedProtocol(protocol: String) {
display("Protocol '{}' is not supported", protocol)
}
EmptyPath {
display("Paths cannot be empty")
}
RelativeURL(url: String) {
display("Relative URLs are not permitted: '{}'", url)
}
}
}
fn str_to_protocol(s: &str) -> Result<Scheme, Error> {
Ok(match s {
"ssh" => Scheme::Ssh,
"file" => Scheme::File,
"git" => Scheme::Git,
"http" => Scheme::Http,
"https" => Scheme::Https,
_ => return Err(Error::UnsupportedProtocol(s.into())),
})
}
fn guess_protocol(url: &[u8]) -> &str {
match url.find_byte(b':') {
Some(colon_pos) => {
if url[..colon_pos].find_byte(b'.').is_some() {
"ssh"
} else {
"file"
}
}
None => "file",
}
}
fn sanitize_for_protocol<'a>(protocol: &str, url: &'a str) -> Cow<'a, str> {
match protocol {
"ssh" => url.replacen(":", "/", 1).into(),
_ => url.into(),
}
}
fn has_no_explicit_protocol(url: &[u8]) -> bool {
url.find(b"://").is_none()
}
fn possibly_strip_file_protocol(url: &[u8]) -> &[u8] {
if url.starts_with(b"file://") {
&url[b"file://".len()..]
} else {
url
}
}
fn to_owned_url(url: url::Url) -> Result<crate::Url, Error> {
Ok(crate::Url {
scheme: str_to_protocol(url.scheme())?,
user: if url.username().is_empty() {
None
} else {
Some(url.username().into())
},
host: url.host_str().map(Into::into),
port: url.port(),
path: url.path().into(),
})
}
pub fn parse(url: &[u8]) -> Result<crate::Url, Error> {
let guessed_protocol = guess_protocol(url);
if possibly_strip_file_protocol(url) != url || (has_no_explicit_protocol(url) && guessed_protocol == "file") {
return Ok(crate::Url {
scheme: Scheme::File,
path: possibly_strip_file_protocol(url).into(),
..Default::default()
});
}
let url_str = std::str::from_utf8(url)?;
let mut url = match url::Url::parse(url_str) {
Ok(url) => url,
Err(::url::ParseError::RelativeUrlWithoutBase) => {
url::Url::parse(&format!(
"{}://{}",
guessed_protocol,
sanitize_for_protocol(guessed_protocol, url_str)
))
.map_err(|err| Error::Url(err.to_string()))?
}
Err(err) => return Err(Error::Url(err.to_string())),
};
if url.scheme().find('.').is_some() {
url = url::Url::parse(&format!("ssh://{}", sanitize_for_protocol("ssh", url_str)))
.map_err(|err| Error::Url(err.to_string()))?;
}
if url.path().is_empty() {
return Err(Error::EmptyPath);
}
if url.cannot_be_a_base() {
return Err(Error::RelativeURL(url.into_string()));
}
to_owned_url(url)
}