1#![crate_name = "minurl"]
9#![deny(missing_docs)]
10#![deny(rustdoc::broken_intra_doc_links)]
11#![deny(rustdoc::private_intra_doc_links)]
12#![allow(bare_trait_objects)]
13#![allow(ellipsis_inclusive_range_patterns)]
14#![cfg_attr(docsrs, feature(doc_cfg))]
15
16use std::ops::Range;
19
20fn default_port_for_scheme(scheme: &str) -> Option<u16> {
22 match scheme {
23 "http" | "ws" => Some(80),
24 "https" | "wss" => Some(443),
25 "ftp" => Some(21),
26 _ => None,
27 }
28}
29
30#[derive(Debug, Clone, PartialEq, Eq)]
32pub enum ParseError {
33 EmptyInput,
35 InvalidCharacter(char),
37 MissingScheme,
39 InvalidScheme,
41 EmptyHost,
43 InvalidPort,
45}
46
47impl std::fmt::Display for ParseError {
48 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49 match self {
50 ParseError::EmptyInput => write!(f, "empty input"),
51 ParseError::InvalidCharacter(c) => write!(f, "invalid character: {:?}", c),
52 ParseError::MissingScheme => write!(f, "missing scheme"),
53 ParseError::InvalidScheme => write!(f, "invalid scheme"),
54 ParseError::EmptyHost => write!(f, "empty host"),
55 ParseError::InvalidPort => write!(f, "invalid port"),
56 }
57 }
58}
59
60impl std::error::Error for ParseError {}
61
62#[derive(Debug, Clone, PartialEq, Eq)]
70pub struct Url {
71 serialization: String,
73 scheme: Range<usize>,
75 username: Range<usize>,
77 password: Option<Range<usize>>,
79 host: Range<usize>,
81 port: Option<u16>,
83 path: Range<usize>,
85 query: Option<Range<usize>>,
87 fragment: Option<Range<usize>>,
89}
90
91impl Url {
92 pub fn parse(url_str: &str) -> Result<Self, ParseError> {
96 if url_str.is_empty() {
97 return Err(ParseError::EmptyInput);
98 }
99
100 for c in url_str.chars() {
102 if !c.is_ascii() || c.is_ascii_control() {
103 return Err(ParseError::InvalidCharacter(c));
104 }
105 }
106
107 let scheme_end = url_str.find("://").ok_or(ParseError::MissingScheme)?;
109
110 if scheme_end == 0 {
111 return Err(ParseError::InvalidScheme);
112 }
113
114 let scheme = &url_str[..scheme_end];
115
116 let mut scheme_chars = scheme.chars();
119 let first_char = scheme_chars.next().ok_or(ParseError::InvalidScheme)?;
120 if !first_char.is_ascii_alphabetic() {
121 return Err(ParseError::InvalidScheme);
122 }
123
124 for c in scheme_chars {
125 if !c.is_ascii_alphanumeric() && c != '+' && c != '-' && c != '.' {
126 return Err(ParseError::InvalidScheme);
127 }
128 }
129
130 let after_scheme_pos = scheme_end + 3;
132 let after_scheme = &url_str[after_scheme_pos..];
133
134 let authority_end =
136 after_scheme.find(|c| c == '/' || c == '?' || c == '#').unwrap_or(after_scheme.len());
137
138 let authority = &after_scheme[..authority_end];
139 let after_authority = &after_scheme[authority_end..];
140
141 let (userinfo, host_and_port) = if let Some(at_pos) = authority.rfind('@') {
143 (Some(&authority[..at_pos]), &authority[at_pos + 1..])
144 } else {
145 (None, authority)
146 };
147
148 let host_start = if let Some(at_pos) = authority.rfind('@') {
150 after_scheme_pos + at_pos + 1
151 } else {
152 after_scheme_pos
153 };
154
155 let (username, password) = if let Some(info) = userinfo {
157 if let Some(colon_pos) = info.find(':') {
158 let username = after_scheme_pos..(after_scheme_pos + colon_pos);
159 let password = Some((after_scheme_pos + colon_pos + 1)..(host_start - 1));
160 (username, password)
161 } else {
162 let username = after_scheme_pos..(after_scheme_pos + info.len());
163 (username, None)
164 }
165 } else {
166 (after_scheme_pos..after_scheme_pos, None) };
168
169 let (host_len, port) = if host_and_port.starts_with('[') {
172 if let Some(bracket_pos) = host_and_port.find(']') {
174 let after_bracket = &host_and_port[bracket_pos + 1..];
175 if after_bracket.starts_with(':') && after_bracket.len() > 1 {
176 let potential_port = &after_bracket[1..];
178 if potential_port.chars().all(|c| c.is_ascii_digit()) {
179 let port_num: u16 =
180 potential_port.parse().map_err(|_| ParseError::InvalidPort)?;
181 (bracket_pos + 1, Some(port_num))
182 } else {
183 (host_and_port.len(), None)
184 }
185 } else if after_bracket.is_empty() {
186 (host_and_port.len(), None)
188 } else {
189 (host_and_port.len(), None)
191 }
192 } else {
193 (host_and_port.len(), None)
195 }
196 } else if let Some(colon_pos) = host_and_port.rfind(':') {
197 let potential_port = &host_and_port[colon_pos + 1..];
198 if !potential_port.is_empty() && potential_port.chars().all(|c| c.is_ascii_digit()) {
200 let port_num: u16 = potential_port.parse().map_err(|_| ParseError::InvalidPort)?;
201 (colon_pos, Some(port_num))
202 } else {
203 (host_and_port.len(), None)
204 }
205 } else {
206 (host_and_port.len(), None)
207 };
208
209 let host_end = host_start + host_len;
210
211 if host_len == 0 {
213 return Err(ParseError::EmptyHost);
214 }
215
216 let path_start = after_scheme_pos + authority_end;
218
219 let mut serialization = url_str.to_string();
221 serialization[..scheme_end].make_ascii_lowercase();
222 let url_len = serialization.len();
223
224 let (path, query, fragment) = {
226 let mut query = None;
227 let mut fragment = None;
228 let mut path_end = url_len;
229
230 if after_authority.starts_with('/') {
231 if let Some(q_pos) = after_authority.find('?') {
233 let query_start = path_start + q_pos;
234 path_end = query_start;
235 if let Some(f_pos) = after_authority[q_pos..].find('#') {
237 let fragment_start = query_start + f_pos;
238 query = Some((query_start + 1)..fragment_start);
239 fragment = Some((fragment_start + 1)..url_len);
240 } else {
241 query = Some((query_start + 1)..url_len);
242 }
243 } else if let Some(f_pos) = after_authority.find('#') {
244 let fragment_start = path_start + f_pos;
245 path_end = fragment_start;
246 fragment = Some((fragment_start + 1)..url_len);
247 }
248 } else {
249 if after_authority.starts_with('?') {
251 let query_start = path_start;
252 path_end = query_start;
253 if let Some(f_pos) = after_authority.find('#') {
254 let fragment_start = path_start + f_pos;
255 query = Some((query_start + 1)..fragment_start);
256 fragment = Some((fragment_start + 1)..url_len);
257 } else {
258 query = Some((query_start + 1)..url_len);
259 }
260 } else if after_authority.starts_with('#') {
261 let fragment_start = path_start;
262 path_end = fragment_start;
263 fragment = Some((fragment_start + 1)..url_len);
264 }
265 }
266
267 (path_start..path_end, query, fragment)
268 };
269
270 Ok(Url {
271 serialization,
272 scheme: 0..scheme_end,
273 username,
274 password,
275 host: host_start..host_end,
276 port,
277 path,
278 query,
279 fragment,
280 })
281 }
282
283 pub fn scheme(&self) -> &str {
285 &self.serialization[self.scheme.clone()]
286 }
287
288 pub fn username(&self) -> &str {
292 &self.serialization[self.username.clone()]
293 }
294
295 pub fn password(&self) -> Option<&str> {
297 self.password.as_ref().map(|r| &self.serialization[r.clone()])
298 }
299
300 pub fn base_url(&self) -> &str {
302 &self.serialization[self.host.clone()]
303 }
304
305 pub fn port(&self) -> Option<u16> {
311 match self.port {
312 Some(port) if Some(port) == default_port_for_scheme(self.scheme()) => None,
313 port => port,
314 }
315 }
316
317 pub fn port_or_known_default(&self) -> Option<u16> {
323 self.port.or_else(|| default_port_for_scheme(self.scheme()))
324 }
325
326 pub fn path(&self) -> &str {
331 &self.serialization[self.path.clone()]
332 }
333
334 pub fn path_segments(&self) -> impl Iterator<Item = &str> {
339 let path = self.path();
340 let path = if path.starts_with('/') { &path[1..] } else { path };
341 path.split('/')
342 }
343
344 pub fn query(&self) -> Option<&str> {
348 self.query.as_ref().map(|r| &self.serialization[r.clone()])
349 }
350
351 pub fn query_pairs(&self) -> impl Iterator<Item = (&str, &str)> {
356 self.query().into_iter().flat_map(|q| {
357 q.split('&').map(|pair| {
358 if let Some(eq_pos) = pair.find('=') {
359 (&pair[..eq_pos], &pair[eq_pos + 1..])
360 } else {
361 (pair, "")
362 }
363 })
364 })
365 }
366
367 pub fn fragment(&self) -> Option<&str> {
371 self.fragment.as_ref().map(|r| &self.serialization[r.clone()])
372 }
373
374 pub fn as_str(&self) -> &str {
376 &self.serialization
377 }
378}
379
380impl std::fmt::Display for Url {
381 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
382 f.write_str(self.as_str())
383 }
384}
385
386#[cfg(test)]
387mod tests {
388 use super::*;
389
390 #[test]
391 fn parse_simple_url() {
392 let url = Url::parse("http://example.com").unwrap();
393 assert_eq!(url.scheme(), "http");
394 assert_eq!(url.base_url(), "example.com");
395 assert_eq!(url.port(), None);
396 }
397
398 #[test]
399 fn parse_url_with_port() {
400 let url = Url::parse("https://example.com:8080").unwrap();
401 assert_eq!(url.scheme(), "https");
402 assert_eq!(url.base_url(), "example.com");
403 assert_eq!(url.port(), Some(8080));
404 }
405
406 #[test]
407 fn parse_url_with_path() {
408 let url = Url::parse("http://example.com/path/to/resource").unwrap();
409 assert_eq!(url.scheme(), "http");
410 assert_eq!(url.base_url(), "example.com");
411 assert_eq!(url.port(), None);
412 }
413
414 #[test]
415 fn empty_input_returns_error() {
416 assert_eq!(Url::parse(""), Err(ParseError::EmptyInput));
417 }
418
419 #[test]
420 fn missing_scheme_returns_error() {
421 assert_eq!(Url::parse("example.com"), Err(ParseError::MissingScheme));
422 }
423
424 #[test]
425 fn invalid_character_returns_error() {
426 assert!(matches!(
428 Url::parse("http://example\x00.com"),
429 Err(ParseError::InvalidCharacter('\x00'))
430 ));
431
432 assert!(matches!(Url::parse("http://exämple.com"), Err(ParseError::InvalidCharacter('ä'))));
434 }
435
436 #[test]
437 fn scheme_is_lowercased() {
438 let url = Url::parse("HTTP://EXAMPLE.COM").unwrap();
439 assert_eq!(url.scheme(), "http");
440 }
441
442 #[test]
443 fn path_returns_full_path() {
444 let url = Url::parse("http://example.com/path/to/resource").unwrap();
445 assert_eq!(url.path(), "/path/to/resource");
446 }
447
448 #[test]
449 fn path_is_empty_when_not_specified() {
450 let url = Url::parse("http://example.com").unwrap();
451 assert_eq!(url.path(), "");
452 }
453
454 #[test]
455 fn path_segments_splits_correctly() {
456 let url = Url::parse("http://example.com/path/to/resource").unwrap();
457 let segments: Vec<&str> = url.path_segments().collect();
458 assert_eq!(segments, vec!["path", "to", "resource"]);
459 }
460
461 #[test]
462 fn path_segments_handles_empty_path() {
463 let url = Url::parse("http://example.com").unwrap();
464 let segments: Vec<&str> = url.path_segments().collect();
465 assert_eq!(segments, vec![""]);
466 }
467
468 #[test]
469 fn path_stops_at_query_string() {
470 let url = Url::parse("http://example.com/path?query=value").unwrap();
471 assert_eq!(url.path(), "/path");
472 }
473
474 #[test]
475 fn path_stops_at_fragment() {
476 let url = Url::parse("http://example.com/path#section").unwrap();
477 assert_eq!(url.path(), "/path");
478 }
479
480 #[test]
481 fn query_returns_query_string() {
482 let url = Url::parse("http://example.com/path?foo=bar&baz=qux").unwrap();
483 assert_eq!(url.query(), Some("foo=bar&baz=qux"));
484 }
485
486 #[test]
487 fn query_is_none_when_not_present() {
488 let url = Url::parse("http://example.com/path").unwrap();
489 assert_eq!(url.query(), None);
490 }
491
492 #[test]
493 fn query_stops_at_fragment() {
494 let url = Url::parse("http://example.com/path?query=value#section").unwrap();
495 assert_eq!(url.query(), Some("query=value"));
496 }
497
498 #[test]
499 fn query_pairs_parses_key_value_pairs() {
500 let url = Url::parse("http://example.com?foo=bar&baz=qux").unwrap();
501 let pairs: Vec<(&str, &str)> = url.query_pairs().collect();
502 assert_eq!(pairs, vec![("foo", "bar"), ("baz", "qux")]);
503 }
504
505 #[test]
506 fn query_pairs_handles_missing_value() {
507 let url = Url::parse("http://example.com?foo&bar=baz").unwrap();
508 let pairs: Vec<(&str, &str)> = url.query_pairs().collect();
509 assert_eq!(pairs, vec![("foo", ""), ("bar", "baz")]);
510 }
511
512 #[test]
513 fn query_pairs_is_empty_when_no_query() {
514 let url = Url::parse("http://example.com").unwrap();
515 let pairs: Vec<(&str, &str)> = url.query_pairs().collect();
516 assert!(pairs.is_empty());
517 }
518
519 #[test]
520 fn fragment_returns_fragment() {
521 let url = Url::parse("http://example.com/path#section").unwrap();
522 assert_eq!(url.fragment(), Some("section"));
523 }
524
525 #[test]
526 fn fragment_is_none_when_not_present() {
527 let url = Url::parse("http://example.com/path").unwrap();
528 assert_eq!(url.fragment(), None);
529 }
530
531 #[test]
532 fn fragment_with_query() {
533 let url = Url::parse("http://example.com/path?query=value#section").unwrap();
534 assert_eq!(url.query(), Some("query=value"));
535 assert_eq!(url.fragment(), Some("section"));
536 }
537
538 #[test]
539 fn fragment_without_path_or_query() {
540 let url = Url::parse("http://example.com#section").unwrap();
541 assert_eq!(url.path(), "");
542 assert_eq!(url.query(), None);
543 assert_eq!(url.fragment(), Some("section"));
544 }
545
546 #[test]
547 fn as_str_returns_full_url() {
548 let url = Url::parse("http://example.com/path?query=value#section").unwrap();
549 assert_eq!(url.as_str(), "http://example.com/path?query=value#section");
550 }
551
552 #[test]
553 fn as_str_with_port() {
554 let url = Url::parse("https://example.com:8080/path").unwrap();
555 assert_eq!(url.as_str(), "https://example.com:8080/path");
556 }
557
558 #[test]
559 fn as_str_normalizes_scheme_to_lowercase() {
560 let url = Url::parse("HTTP://EXAMPLE.COM/path").unwrap();
561 assert_eq!(url.as_str(), "http://EXAMPLE.COM/path");
562 }
563
564 #[test]
565 fn as_str_minimal_url() {
566 let url = Url::parse("http://example.com").unwrap();
567 assert_eq!(url.as_str(), "http://example.com");
568 }
569
570 #[test]
571 fn display_matches_as_str() {
572 let url = Url::parse("http://example.com/path?query=value#section").unwrap();
573 assert_eq!(format!("{}", url), url.as_str());
574 }
575
576 #[test]
577 fn display_can_be_used_in_format_string() {
578 let url = Url::parse("http://example.com").unwrap();
579 let formatted = format!("URL: {}", url);
580 assert_eq!(formatted, "URL: http://example.com");
581 }
582
583 #[test]
584 fn ipv6_without_port() {
585 let url = Url::parse("http://[::1]/path").unwrap();
586 assert_eq!(url.scheme(), "http");
587 assert_eq!(url.base_url(), "[::1]");
588 assert_eq!(url.port(), None);
589 assert_eq!(url.path(), "/path");
590 }
591
592 #[test]
593 fn ipv6_with_port() {
594 let url = Url::parse("http://[::1]:8080/path").unwrap();
595 assert_eq!(url.scheme(), "http");
596 assert_eq!(url.base_url(), "[::1]");
597 assert_eq!(url.port(), Some(8080));
598 assert_eq!(url.path(), "/path");
599 }
600
601 #[test]
602 fn ipv6_full_address_with_port() {
603 let url = Url::parse("http://[2001:db8::1]:443/").unwrap();
604 assert_eq!(url.base_url(), "[2001:db8::1]");
605 assert_eq!(url.port(), Some(443));
606 }
607
608 #[test]
609 fn ipv6_as_str_roundtrip() {
610 let url = Url::parse("http://[::1]:8080/path").unwrap();
611 assert_eq!(url.as_str(), "http://[::1]:8080/path");
612 }
613
614 #[test]
615 fn userinfo_with_username_only() {
616 let url = Url::parse("http://user@example.com/path").unwrap();
617 assert_eq!(url.username(), "user");
618 assert_eq!(url.password(), None);
619 assert_eq!(url.base_url(), "example.com");
620 assert_eq!(url.path(), "/path");
621 }
622
623 #[test]
624 fn userinfo_with_username_and_password() {
625 let url = Url::parse("http://user:pass@example.com/path").unwrap();
626 assert_eq!(url.username(), "user");
627 assert_eq!(url.password(), Some("pass"));
628 assert_eq!(url.base_url(), "example.com");
629 assert_eq!(url.path(), "/path");
630 }
631
632 #[test]
633 fn userinfo_with_port() {
634 let url = Url::parse("http://user:pass@example.com:8080/path").unwrap();
635 assert_eq!(url.username(), "user");
636 assert_eq!(url.password(), Some("pass"));
637 assert_eq!(url.base_url(), "example.com");
638 assert_eq!(url.port(), Some(8080));
639 }
640
641 #[test]
642 fn userinfo_empty_when_not_present() {
643 let url = Url::parse("http://example.com/path").unwrap();
644 assert_eq!(url.username(), "");
645 assert_eq!(url.password(), None);
646 }
647
648 #[test]
649 fn userinfo_as_str_roundtrip() {
650 let url = Url::parse("http://user:pass@example.com:8080/path").unwrap();
651 assert_eq!(url.as_str(), "http://user:pass@example.com:8080/path");
652 }
653
654 #[test]
655 fn userinfo_with_empty_password() {
656 let url = Url::parse("http://user:@example.com").unwrap();
657 assert_eq!(url.username(), "user");
658 assert_eq!(url.password(), Some(""));
659 assert_eq!(url.base_url(), "example.com");
660 }
661
662 #[test]
663 fn parse_error_display() {
664 assert_eq!(ParseError::EmptyInput.to_string(), "empty input");
665 assert_eq!(ParseError::InvalidCharacter('\x00').to_string(), "invalid character: '\\0'");
666 assert_eq!(ParseError::MissingScheme.to_string(), "missing scheme");
667 assert_eq!(ParseError::InvalidScheme.to_string(), "invalid scheme");
668 assert_eq!(ParseError::EmptyHost.to_string(), "empty host");
669 assert_eq!(ParseError::InvalidPort.to_string(), "invalid port");
670 }
671
672 #[test]
673 fn empty_host_returns_error() {
674 assert_eq!(Url::parse("http:///path"), Err(ParseError::EmptyHost));
675 assert_eq!(Url::parse("http://:8080/path"), Err(ParseError::EmptyHost));
676 assert_eq!(Url::parse("http://user@/path"), Err(ParseError::EmptyHost));
677 }
678
679 #[test]
680 fn parse_error_is_std_error() {
681 fn assert_error<E: std::error::Error>(_: &E) {}
682 assert_error(&ParseError::EmptyInput);
683 }
684}