1use std::fmt::{Display, Formatter};
4
5use ::url::Url;
6
7#[derive(Debug, Clone, PartialEq, Eq)]
9pub enum Scheme {
10 Http,
12 Https,
14 File,
16 Other(String),
18}
19
20impl Scheme {
21 #[must_use]
23 pub fn parse(input: &str) -> Self {
24 match input.to_ascii_lowercase().as_str() {
25 "http" => Self::Http,
26 "https" => Self::Https,
27 "file" => Self::File,
28 other => Self::Other(other.to_owned()),
29 }
30 }
31
32 #[must_use]
34 pub const fn is_initially_allowed(&self) -> bool {
35 matches!(self, Self::Http | Self::Https | Self::File)
36 }
37}
38
39#[derive(Debug, Clone, PartialEq, Eq)]
41pub enum UrlError {
42 Empty,
44 ContainsWhitespace,
46 MissingScheme,
48 DisallowedScheme(String),
50 Invalid(String),
52 MissingHost,
54}
55
56impl Display for UrlError {
57 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
58 match self {
59 Self::Empty => f.write_str("URL is empty"),
60 Self::ContainsWhitespace => f.write_str("URL contains whitespace"),
61 Self::MissingScheme => f.write_str("URL is missing a scheme"),
62 Self::DisallowedScheme(scheme) => write!(f, "URL scheme is not allowed: {scheme}"),
63 Self::Invalid(reason) => write!(f, "URL is invalid: {reason}"),
64 Self::MissingHost => f.write_str("HTTP URL is missing a host"),
65 }
66 }
67}
68
69impl std::error::Error for UrlError {}
70
71#[derive(Debug, Clone, PartialEq, Eq, Hash)]
73pub struct IndexUrl {
74 raw: String,
75 scheme: String,
76}
77
78impl IndexUrl {
79 pub fn parse(input: impl AsRef<str>) -> Result<Self, UrlError> {
82 let trimmed = input.as_ref().trim();
83 if trimmed.is_empty() {
84 return Err(UrlError::Empty);
85 }
86 if trimmed.chars().any(char::is_whitespace) {
87 return Err(UrlError::ContainsWhitespace);
88 }
89
90 let Some((scheme, rest)) = trimmed.split_once(':') else {
91 return Err(UrlError::MissingScheme);
92 };
93
94 let parsed_scheme = Scheme::parse(scheme);
95 if !parsed_scheme.is_initially_allowed() {
96 return Err(UrlError::DisallowedScheme(scheme.to_owned()));
97 }
98 if matches!(parsed_scheme, Scheme::Http | Scheme::Https)
99 && (!rest.starts_with("//") || rest.starts_with("///"))
100 {
101 return Err(UrlError::MissingHost);
102 }
103
104 let mut parsed =
105 Url::parse(trimmed).map_err(|error| UrlError::Invalid(error.to_string()))?;
106 if matches!(parsed_scheme, Scheme::Http | Scheme::Https) && parsed.host_str().is_none() {
107 return Err(UrlError::MissingHost);
108 }
109 parsed.set_fragment(None);
110
111 Ok(Self {
112 raw: parsed.to_string(),
113 scheme: parsed.scheme().to_owned(),
114 })
115 }
116
117 #[must_use]
119 pub fn as_str(&self) -> &str {
120 &self.raw
121 }
122
123 #[must_use]
125 pub fn scheme(&self) -> &str {
126 &self.scheme
127 }
128
129 #[must_use]
131 pub fn origin(&self) -> Option<Origin> {
132 Origin::from_url(self)
133 }
134
135 #[must_use]
137 pub fn cache_key(&self) -> String {
138 let mut key = String::with_capacity(self.raw.len());
139 let mut previous_was_separator = false;
140 for ch in self.raw.chars() {
141 if ch.is_ascii_alphanumeric() {
142 key.push(ch.to_ascii_lowercase());
143 previous_was_separator = false;
144 } else if !previous_was_separator {
145 key.push('_');
146 previous_was_separator = true;
147 }
148 }
149 let trimmed = key.trim_matches('_');
150 if trimmed.is_empty() {
151 "url".to_owned()
152 } else {
153 trimmed.to_owned()
154 }
155 }
156}
157
158impl Display for IndexUrl {
159 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
160 f.write_str(self.as_str())
161 }
162}
163
164#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
166pub struct Origin(String);
167
168impl Origin {
169 #[must_use]
171 pub fn from_url(url: &IndexUrl) -> Option<Self> {
172 let parsed = Url::parse(url.as_str()).ok()?;
173 match parsed.scheme() {
174 "http" | "https" => {
175 let host = parsed.host_str()?;
176 let port = parsed
177 .port()
178 .map(|port| format!(":{port}"))
179 .unwrap_or_default();
180 Some(Self(format!("{}://{}{}", parsed.scheme(), host, port)))
181 }
182 "file" => Some(Self("file://".to_owned())),
183 _ => None,
184 }
185 }
186
187 #[must_use]
189 pub fn from_stored(input: impl Into<String>) -> Self {
190 Self(input.into())
191 }
192
193 #[must_use]
195 pub fn as_str(&self) -> &str {
196 &self.0
197 }
198}
199
200impl Display for Origin {
201 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
202 f.write_str(self.as_str())
203 }
204}
205
206#[cfg(test)]
207mod tests {
208 use super::{IndexUrl, Origin, Scheme, UrlError};
209
210 #[test]
211 fn parses_https_url() {
212 let url = IndexUrl::parse("https://example.com/docs");
213 assert!(url.is_ok());
214 assert_eq!(url.map(|u| u.scheme().to_owned()), Ok("https".to_owned()));
215 }
216
217 #[test]
218 fn rejects_empty_url() {
219 assert_eq!(IndexUrl::parse(""), Err(UrlError::Empty));
220 }
221
222 #[test]
223 fn rejects_whitespace() {
224 assert_eq!(
225 IndexUrl::parse("https://example.com/a b"),
226 Err(UrlError::ContainsWhitespace)
227 );
228 }
229
230 #[test]
231 fn rejects_disallowed_scheme() {
232 assert_eq!(
233 IndexUrl::parse("javascript:alert(1)"),
234 Err(UrlError::DisallowedScheme("javascript".to_owned()))
235 );
236 }
237
238 #[test]
239 fn rejects_missing_scheme() {
240 assert_eq!(
241 IndexUrl::parse("example.com/path"),
242 Err(UrlError::MissingScheme)
243 );
244 }
245
246 #[test]
247 fn supports_file_scheme_and_display_roundtrip() {
248 let parsed = IndexUrl::parse("file:///tmp/example.txt");
249 assert!(parsed.is_ok());
250 if let Ok(url) = parsed {
251 assert_eq!(url.scheme(), "file");
252 assert_eq!(url.to_string(), "file:///tmp/example.txt");
253 assert_eq!(url.as_str(), "file:///tmp/example.txt");
254 }
255 }
256
257 #[test]
258 fn scheme_parser_distinguishes_known_and_other_values() {
259 assert_eq!(Scheme::parse("HTTP"), Scheme::Http);
260 assert_eq!(Scheme::parse("https"), Scheme::Https);
261 assert_eq!(Scheme::parse("file"), Scheme::File);
262 assert_eq!(Scheme::parse("mailto"), Scheme::Other("mailto".to_owned()));
263 }
264
265 #[test]
266 fn only_initial_allowlist_schemes_are_marked_allowed() {
267 assert!(Scheme::Http.is_initially_allowed());
268 assert!(Scheme::Https.is_initially_allowed());
269 assert!(Scheme::File.is_initially_allowed());
270 assert!(!Scheme::Other("ssh".to_owned()).is_initially_allowed());
271 }
272
273 #[test]
274 fn normalizes_scheme_host_default_port_and_fragment() {
275 let url = IndexUrl::parse("HTTP://EXAMPLE.COM:80/docs#part");
276 assert_eq!(
277 url.map(|url| url.to_string()),
278 Ok("http://example.com/docs".to_owned())
279 );
280 }
281
282 #[test]
283 fn derives_http_origin() -> Result<(), Box<dyn std::error::Error>> {
284 let url = IndexUrl::parse("https://example.com:8443/docs")?;
285 assert_eq!(
286 url.origin(),
287 Some(Origin::from_stored("https://example.com:8443"))
288 );
289 Ok(())
290 }
291
292 #[test]
293 fn cache_keys_are_normalized_and_fragment_independent() -> Result<(), Box<dyn std::error::Error>>
294 {
295 let first = IndexUrl::parse("https://EXAMPLE.com:443/docs?q=1#one")?;
296 let second = IndexUrl::parse("https://example.com/docs?q=1#two")?;
297
298 assert_eq!(first.as_str(), second.as_str());
299 assert_eq!(first.cache_key(), second.cache_key());
300 assert!(!first.cache_key().contains('/'));
301 assert!(!first.cache_key().contains('?'));
302 Ok(())
303 }
304
305 #[test]
306 fn rejects_http_urls_without_hosts() {
307 assert_eq!(IndexUrl::parse("https:///docs"), Err(UrlError::MissingHost));
308 }
309
310 #[test]
311 fn rejects_parser_invalid_urls() {
312 assert!(matches!(
313 IndexUrl::parse("http://[::1"),
314 Err(UrlError::Invalid(_))
315 ));
316 }
317
318 #[test]
319 fn derives_file_origin_and_displays_stored_origin() -> Result<(), Box<dyn std::error::Error>> {
320 let url = IndexUrl::parse("file:///tmp/index.html")?;
321 let origin = Origin::from_url(&url);
322
323 assert_eq!(origin, Some(Origin::from_stored("file://")));
324 assert_eq!(Origin::from_stored("file://").to_string(), "file://");
325 Ok(())
326 }
327
328 #[test]
329 fn cache_key_has_fallback_for_non_alphanumeric_urls() -> Result<(), Box<dyn std::error::Error>>
330 {
331 let url = IndexUrl::parse("file:///")?;
332 assert_eq!(url.cache_key(), "file");
333 Ok(())
334 }
335}