faup_rs/lib.rs
1//! # faup-rs: Fast URL Parser for Rust
2//!
3//! [](https://crates.io/crates/faup-rs)
4//! [](https://docs.rs/faup-rs)
5//! 
6//!
7//! A high-performance, zero-allocation URL parser for Rust that handles:
8//! - Hostnames (with subdomains, custom TLDs, and IDNs)
9//! - IPv4/IPv6 addresses
10//! - User credentials (username/password)
11//! - Ports, paths, queries, and fragments
12//! - UTF-8 and URL-encoded characters
13//!
14//! ## Features
15//!
16//! ✅ **Zero-allocation parsing**: Borrows input strings where possible
17//!
18//! ✅ **Public Suffix List (PSL)**: Correctly identifies domain suffixes
19//!
20//! ✅ **Custom TLDs**: Extendable via the `CUSTOM_TLDS` constant
21//!
22//! ✅ **Comprehensive error handling**: Clear, actionable error types
23//!
24//! ✅ **UTF-8 support**: Full Unicode handling for all URL components
25//!
26//! ## Installation
27//!
28//! Add to your `Cargo.toml`:
29//! ```toml
30//! [dependencies]
31//! faup-rs = "0.1"
32//!```
33//!
34//! ## Usage
35//!
36//! ### Basic Parsing
37//! ```
38//! use faup_rs::Url;
39//!
40//! let url = Url::parse("https://user:pass@sub.example.com:8080/path?query=value#fragment").unwrap();
41//! assert_eq!(url.scheme(), "https");
42//! assert_eq!(url.host().to_string(), "sub.example.com");
43//! assert_eq!(url.port(), Some(8080));
44//! assert_eq!(url.path(), Some("/path"));
45//! assert_eq!(url.query(), Some("query=value"));
46//! assert_eq!(url.fragment(), Some("fragment"));
47//!```
48//!
49//! ### Hostname Components
50//! ```
51//! use faup_rs::{Url, Host};
52//!
53//! let url = Url::parse("https://sub.example.co.uk").unwrap();
54//! if let Host::Hostname(hostname) = url.host() {
55//! assert_eq!(hostname.full_name(), "sub.example.co.uk");
56//! assert_eq!(hostname.suffix(), Some("co.uk"));
57//! assert_eq!(hostname.domain(), Some("example.co.uk"));
58//! assert_eq!(hostname.subdomain(), Some("sub"));
59//! }
60//!```
61//!
62//! ### IP Addresses
63//! ```
64//! use faup_rs::Url;
65//!
66//! let url = Url::parse("http://[::1]").unwrap();
67//! assert!(matches!(url.host(), faup_rs::Host::Ip(ip) if ip.is_loopback()));
68//!```
69//!
70//! ### User Info (UTF-8 Support)
71//! ```
72//! use faup_rs::Url;
73//!
74//! let url = Url::parse("https://用户:密码@example.com").unwrap();
75//! let user_info = url.userinfo().unwrap();
76//! assert_eq!(user_info.username(), "用户");
77//! assert_eq!(user_info.password(), Some("密码"));
78//!```
79//!
80//! ### Custom TLDs
81//! ```
82//! use faup_rs::Url;
83//!
84//! let url = Url::parse("http://example.b32.i2p").unwrap();
85//! assert_eq!(url.suffix(), Some("b32.i2p"));
86//!```
87//!
88//! ## Examples
89//!
90//! ### Real-World URLs
91//! ```
92//! use faup_rs::Url;
93//!
94//! let urls = [
95//! "https://www.example.co.uk",
96//! "http://sub.domain.example.com/path/to/page",
97//! "https://例子.测试",
98//! "http://toaster.dyrøy.no",
99//! "http://full.custom-tld.test.b32.i2p",
100//! ];
101//! for url_str in urls {
102//! let url = Url::parse(url_str).unwrap();
103//! println!("Parsed: {}", url);
104//! }
105//!```
106//!
107//! ## License
108//!
109//! This project is licensed under the GNU General Public License v3.0 (GPLv3)..
110//!
111//! ## Acknowledgement
112//!
113//! Thanks to Sebastien Tricaud for [the original work on faup](https://github.com/stricaud/faup).
114//!
115use std::{
116 borrow::Cow,
117 fmt,
118 net::{IpAddr, Ipv4Addr, Ipv6Addr},
119 str::FromStr,
120};
121
122use pest::{Parser, iterators::Pair};
123use pest_derive::Parser;
124use thiserror::Error;
125
126static CUSTOM_TLDS: &[&str] = &["b32.i2p"];
127
128#[derive(Debug, Error)]
129pub enum Error {
130 #[error("invalid port")]
131 InvalidPort,
132 #[error("invalid ipv4 address")]
133 InvalidIPv4,
134 #[error("invalid ipv6 address")]
135 InvalidIPv6,
136 #[error("invalid host")]
137 InvalidHost,
138 #[error("{0}")]
139 Other(String),
140 #[error("parser error: {0}")]
141 Parse(#[from] Box<pest::error::Error<Rule>>),
142}
143
144impl Error {
145 fn other<S: AsRef<str>>(s: S) -> Self {
146 Error::Other(s.as_ref().to_string())
147 }
148}
149
150#[derive(Parser)]
151#[grammar = "grammar.pest"]
152pub(crate) struct UrlParser;
153
154#[inline(always)]
155fn suffix(hostname: &str) -> Option<&str> {
156 for tld in CUSTOM_TLDS {
157 if hostname.ends_with(tld) {
158 return Some(tld);
159 }
160 }
161 psl::suffix_str(hostname)
162}
163
164/// Represents a parsed hostname with its components (subdomain, domain, and suffix).
165///
166/// The `Hostname` struct provides access to the different parts of a domain name,
167/// including support for internationalized domain names (IDNs), custom top-level domains (TLDs),
168/// and subdomains. It uses the Public Suffix List (via the `psl` crate) to properly identify
169/// domain suffixes, with additional support for custom TLDs.
170///
171/// # Examples
172///
173/// ```
174/// use faup_rs::{Url, Host};
175///
176/// // Parse a simple domain
177/// let url = Url::parse("https://example.com").unwrap();
178/// if let Host::Hostname(hostname) = url.host() {
179/// assert_eq!(hostname.full_name(), "example.com");
180/// assert_eq!(hostname.suffix(), Some("com"));
181/// assert_eq!(hostname.domain(), Some("example.com"));
182/// assert_eq!(hostname.subdomain(), None);
183/// }
184///
185/// // Parse a domain with subdomains
186/// let url = Url::parse("https://sub.example.co.uk").unwrap();
187/// if let Host::Hostname(hostname) = url.host() {
188/// assert_eq!(hostname.full_name(), "sub.example.co.uk");
189/// assert_eq!(hostname.suffix(), Some("co.uk"));
190/// assert_eq!(hostname.domain(), Some("example.co.uk"));
191/// assert_eq!(hostname.subdomain(), Some("sub"));
192/// }
193///
194/// // Parse a domain with UTF-8 characters
195/// let url = Url::parse("https://例子.测试").unwrap();
196/// if let Host::Hostname(hostname) = url.host() {
197/// assert_eq!(hostname.full_name(), "例子.测试");
198/// assert_eq!(hostname.suffix(), Some("测试"));
199/// assert_eq!(hostname.domain(), Some("例子.测试"));
200/// assert_eq!(hostname.subdomain(), None);
201/// }
202///
203/// // Parse a domain with custom TLD
204/// let url = Url::parse("http://example.b32.i2p").unwrap();
205/// if let Host::Hostname(hostname) = url.host() {
206/// assert_eq!(hostname.suffix(), Some("b32.i2p"));
207/// }
208/// ```
209#[derive(Debug)]
210pub struct Hostname<'url> {
211 hostname: Cow<'url, str>,
212 subdomain: Option<Cow<'url, str>>,
213 domain: Option<Cow<'url, str>>,
214 suffix: Option<Cow<'url, str>>,
215}
216
217impl<'url> Hostname<'url> {
218 fn into_owned<'owned>(self) -> Hostname<'owned> {
219 Hostname {
220 hostname: Cow::Owned(self.hostname.into_owned()),
221 subdomain: self.subdomain.map(|s| Cow::Owned(s.into_owned())),
222 domain: self.domain.map(|d| Cow::Owned(d.into_owned())),
223 suffix: self.suffix.map(|s| Cow::Owned(s.into_owned())),
224 }
225 }
226
227 fn from_str(hostname: &'url str) -> Self {
228 let suffix = suffix(hostname).map(Cow::Borrowed);
229
230 let domain = if let Some(suffix) = suffix.as_ref() {
231 let i = hostname.rfind(suffix.as_ref()).unwrap();
232 let dom_start = hostname[..i]
233 .trim_end_matches('.')
234 .rfind('.')
235 .map(|i| i + 1)
236 .unwrap_or_default();
237 Some(Cow::Borrowed(&hostname[dom_start..]))
238 } else {
239 None
240 };
241
242 let subdomain = if let Some(domain) = domain.as_ref() {
243 // cannot panic domain must be in hostname
244 let i = hostname.find(domain.as_ref()).unwrap().saturating_sub(1); // we get index after dot so we need to jump over it in order to process string backward
245 let subdomain = &hostname[..i];
246 if subdomain.is_empty() {
247 None
248 } else {
249 Some(Cow::Borrowed(subdomain))
250 }
251 } else {
252 None
253 };
254
255 Hostname {
256 hostname: Cow::Borrowed(hostname),
257 subdomain,
258 domain,
259 suffix,
260 }
261 }
262
263 /// Returns the complete hostname as a string.
264 ///
265 /// # Returns
266 ///
267 /// * `&str` - The full hostname.
268 ///
269 /// # Examples
270 ///
271 /// ```
272 /// use faup_rs::{Url, Host};
273 ///
274 /// let url = Url::parse("https://sub.example.com").unwrap();
275 /// if let Host::Hostname(hostname) = url.host() {
276 /// assert_eq!(hostname.full_name(), "sub.example.com");
277 /// }
278 /// ```
279 #[inline(always)]
280 pub fn full_name(&self) -> &str {
281 &self.hostname
282 }
283
284 /// Returns the suffix (top-level domain) of the hostname, if recognized.
285 ///
286 /// The suffix is determined using the Public Suffix List, with additional support
287 /// for custom TLDs defined in the `CUSTOM_TLDS` constant.
288 ///
289 /// # Returns
290 ///
291 /// * `Option<&str>` - The suffix (TLD), or `None` if not recognized.
292 ///
293 /// # Examples
294 ///
295 /// ```
296 /// use faup_rs::{Url, Host};
297 ///
298 /// // Standard TLD
299 /// let url = Url::parse("https://example.com").unwrap();
300 /// if let Host::Hostname(hostname) = url.host() {
301 /// assert_eq!(hostname.suffix(), Some("com"));
302 /// }
303 ///
304 /// // Multi-level TLD
305 /// let url = Url::parse("https://example.co.uk").unwrap();
306 /// if let Host::Hostname(hostname) = url.host() {
307 /// assert_eq!(hostname.suffix(), Some("co.uk"));
308 /// }
309 ///
310 /// // Custom TLD
311 /// let url = Url::parse("http://example.b32.i2p").unwrap();
312 /// if let Host::Hostname(hostname) = url.host() {
313 /// assert_eq!(hostname.suffix(), Some("b32.i2p"));
314 /// }
315 /// ```
316 #[inline(always)]
317 pub fn suffix(&self) -> Option<&str> {
318 self.suffix.as_ref().map(|p| p.as_ref())
319 }
320
321 /// Returns the domain part of the hostname, if recognized.
322 ///
323 /// The domain is the registrable part of the hostname, excluding any subdomains
324 /// and including the suffix.
325 ///
326 /// # Returns
327 ///
328 /// * `Option<&str>` - The domain, or `None` if not recognized.
329 ///
330 /// # Examples
331 ///
332 /// ```
333 /// use faup_rs::{Url, Host};
334 ///
335 /// // Simple domain
336 /// let url = Url::parse("https://example.com").unwrap();
337 /// if let Host::Hostname(hostname) = url.host() {
338 /// assert_eq!(hostname.domain(), Some("example.com"));
339 /// }
340 ///
341 /// // Domain with multi-level TLD
342 /// let url = Url::parse("https://example.co.uk").unwrap();
343 /// if let Host::Hostname(hostname) = url.host() {
344 /// assert_eq!(hostname.domain(), Some("example.co.uk"));
345 /// }
346 /// ```
347 #[inline(always)]
348 pub fn domain(&self) -> Option<&str> {
349 self.domain.as_ref().map(|p| p.as_ref())
350 }
351
352 /// Returns the subdomain part of the hostname, if present.
353 ///
354 /// The subdomain is everything before the domain. For example, in "sub.example.com",
355 /// "sub" is the subdomain.
356 ///
357 /// # Returns
358 ///
359 /// * `Option<&str>` - The subdomain, or `None` if not present.
360 ///
361 /// # Examples
362 ///
363 /// ```
364 /// use faup_rs::{Url, Host};
365 ///
366 /// // Single-level subdomain
367 /// let url = Url::parse("https://sub.example.com").unwrap();
368 /// if let Host::Hostname(hostname) = url.host() {
369 /// assert_eq!(hostname.subdomain(), Some("sub"));
370 /// }
371 ///
372 /// // Multi-level subdomain
373 /// let url = Url::parse("https://a.b.example.com").unwrap();
374 /// if let Host::Hostname(hostname) = url.host() {
375 /// assert_eq!(hostname.subdomain(), Some("a.b"));
376 /// }
377 ///
378 /// // No subdomain
379 /// let url = Url::parse("https://example.com").unwrap();
380 /// if let Host::Hostname(hostname) = url.host() {
381 /// assert_eq!(hostname.subdomain(), None);
382 /// }
383 /// ```
384 #[inline(always)]
385 pub fn subdomain(&self) -> Option<&str> {
386 self.subdomain.as_ref().map(|p| p.as_ref())
387 }
388}
389
390/// Represents the host component of a URL, which can be either a hostname or an IP address.
391#[derive(Debug)]
392pub enum Host<'url> {
393 /// A hostname (domain name).
394 Hostname(Hostname<'url>),
395 /// An IP address (either IPv4 or IPv6).
396 Ip(IpAddr),
397}
398
399impl fmt::Display for Host<'_> {
400 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
401 match self {
402 Host::Hostname(hostname) => write!(f, "{}", hostname.full_name()),
403 Host::Ip(ip) => write!(f, "{ip}"),
404 }
405 }
406}
407
408impl<'host> Host<'host> {
409 fn into_owned<'owned>(self) -> Host<'owned> {
410 match self {
411 Host::Hostname(h) => Host::Hostname(h.into_owned()),
412 Host::Ip(ip) => Host::Ip(ip),
413 }
414 }
415
416 #[inline(always)]
417 fn from_pair(host_pair: Pair<'host, Rule>) -> Result<Self, Error> {
418 match host_pair.as_rule() {
419 Rule::hostname => {
420 if let Ok(ipv4) =
421 UrlParser::parse(Rule::ipv4, host_pair.as_str()).map(|p| p.as_str())
422 {
423 Ok(Ipv4Addr::from_str(ipv4)
424 .map(IpAddr::from)
425 .map(Host::Ip)
426 .map_err(|_| Error::InvalidIPv4)?)
427 } else {
428 Ok(Host::Hostname(Hostname::from_str(host_pair.as_str())))
429 }
430 }
431
432 Rule::ipv6 => Ok(Ipv6Addr::from_str(
433 host_pair.as_str().trim_matches(|c| c == '[' || c == ']'),
434 )
435 .map(IpAddr::from)
436 .map(Host::Ip)
437 .map_err(|_| Error::InvalidIPv6)?),
438 _ => Err(Error::other(format!(
439 "unexpected parsing rule: {:?}",
440 host_pair.as_rule()
441 ))),
442 }
443 }
444
445 /// Parses a string into a `Host` enum.
446 ///
447 /// This function expects the input string to be a URL host, which can be either
448 /// an IPv4 address, an IPv6 address, or a hostname.
449 ///
450 /// # Arguments
451 ///
452 /// * `host` - A string slice that holds the host to parse (e.g., `"example.com"`, `"127.0.0.1"`, `"::1"`).
453 ///
454 /// # Returns
455 ///
456 /// * `Result<Host, Error>` - A [`Host`] enum if parsing is successful, or an [`Error`] if parsing fails.
457 ///
458 /// # Examples
459 ///
460 /// ```
461 /// use faup_rs::Host;
462 ///
463 /// // Parse an IPv4 address
464 /// let host = Host::parse("127.0.0.1").unwrap();
465 /// assert!(matches!(host, Host::Ip(std::net::IpAddr::V4(_))));
466 ///
467 /// // Parse an IPv6 address
468 /// let host = Host::parse("::1").unwrap();
469 /// assert!(matches!(host, Host::Ip(std::net::IpAddr::V6(_))));
470 ///
471 /// // Parse a hostname
472 /// let host = Host::parse("example.com").unwrap();
473 /// assert!(matches!(host, Host::Hostname(_)));
474 ///
475 /// // Parse a hostname with a subdomain
476 /// let host = Host::parse("sub.example.com").unwrap();
477 /// assert!(matches!(host, Host::Hostname(_)));
478 ///
479 /// // Parse a hostname with a custom TLD
480 /// let host = Host::parse("example.b32.i2p").unwrap();
481 /// assert!(matches!(host, Host::Hostname(_)));
482 ///
483 /// // Attempt to parse an invalid host
484 /// let result = Host::parse("invalid..host");
485 /// assert!(matches!(result, Err(faup_rs::Error::InvalidHost)));
486 /// ```
487 #[inline]
488 pub fn parse(host: &'host str) -> Result<Self, Error> {
489 Self::from_pair(
490 UrlParser::parse(Rule::checked_host, host)
491 .map_err(|_| Error::InvalidHost)?
492 .next()
493 // this should not panic as parser guarantee some pair exist
494 .expect("expecting host pair"),
495 )
496 }
497
498 /// Returns the hostname component if this is a `Host::Hostname` variant.
499 ///
500 /// # Returns
501 ///
502 /// * `Option<&Hostname>` - The hostname, or `None` if this is an IP address.
503 pub fn as_hostname(&self) -> Option<&Hostname<'_>> {
504 match self {
505 Host::Hostname(h) => Some(h),
506 _ => None,
507 }
508 }
509}
510
511/// Represents user information (username and password) in a URL.
512///
513/// This struct stores the credentials that may be present in a URL's authority component.
514/// It supports both ASCII and UTF-8 characters in usernames and passwords.
515///
516/// # Examples
517///
518/// ```
519/// use faup_rs::{Url, UserInfo};
520///
521/// // Parse a URL with user info
522/// let url = Url::parse("https://user:pass@example.com").unwrap();
523/// let user_info = url.userinfo().unwrap();
524///
525/// // Access username and password
526/// assert_eq!(user_info.username(), "user");
527/// assert_eq!(user_info.password(), Some("pass"));
528///
529/// // Parse a URL with only username
530/// let url = Url::parse("https://user@example.com").unwrap();
531/// let user_info = url.userinfo().unwrap();
532/// assert_eq!(user_info.username(), "user");
533/// assert_eq!(user_info.password(), None);
534///
535/// // Parse a URL with UTF-8 user info
536/// let url = Url::parse("https://用户:密码@example.com").unwrap();
537/// let user_info = url.userinfo().unwrap();
538/// assert_eq!(user_info.username(), "用户");
539/// assert_eq!(user_info.password(), Some("密码"));
540/// ```
541#[derive(Debug)]
542pub struct UserInfo<'url> {
543 username: Cow<'url, str>,
544 password: Option<Cow<'url, str>>,
545}
546
547impl<'url> UserInfo<'url> {
548 #[inline]
549 fn into_owned<'owned>(self) -> UserInfo<'owned> {
550 UserInfo {
551 username: Cow::Owned(self.username.into_owned()),
552 password: self.password.map(|p| Cow::Owned(p.into_owned())),
553 }
554 }
555
556 #[inline(always)]
557 fn from_pair(pair: Pair<'url, Rule>) -> Self {
558 let mut username = None;
559 let mut password = None;
560 for p in pair.into_inner() {
561 match p.as_rule() {
562 Rule::username => username = Some(Cow::Borrowed(p.as_str())),
563 Rule::password => password = Some(Cow::Borrowed(p.as_str())),
564 _ => {}
565 }
566 }
567 Self {
568 username: username.expect("username is guaranteed by parser"),
569 password,
570 }
571 }
572}
573
574impl UserInfo<'_> {
575 /// Returns the username component of the user information.
576 ///
577 /// # Returns
578 ///
579 /// * `&str` - The username.
580 ///
581 /// # Examples
582 ///
583 /// ```
584 /// use faup_rs::Url;
585 ///
586 /// let url = Url::parse("https://user@example.com").unwrap();
587 /// assert_eq!(url.userinfo().unwrap().username(), "user");
588 ///
589 /// // UTF-8 username
590 /// let url = Url::parse("https://用户@example.com").unwrap();
591 /// assert_eq!(url.userinfo().unwrap().username(), "用户");
592 /// ```
593 #[inline(always)]
594 pub fn username(&self) -> &str {
595 &self.username
596 }
597
598 /// Returns the password component of the user information, if present.
599 ///
600 /// # Returns
601 ///
602 /// * `Option<&str>` - The password, or `None` if not present.
603 ///
604 /// # Examples
605 ///
606 /// ```
607 /// use faup_rs::Url;
608 ///
609 /// // With password
610 /// let url = Url::parse("https://user:pass@example.com").unwrap();
611 /// assert_eq!(url.userinfo().unwrap().password(), Some("pass"));
612 ///
613 /// // Without password
614 /// let url = Url::parse("https://user@example.com").unwrap();
615 /// assert_eq!(url.userinfo().unwrap().password(), None);
616 ///
617 /// // UTF-8 password
618 /// let url = Url::parse("https://user:密码@example.com").unwrap();
619 /// assert_eq!(url.userinfo().unwrap().password(), Some("密码"));
620 /// ```
621 #[inline(always)]
622 pub fn password(&self) -> Option<&str> {
623 self.password.as_ref().map(|p| p.as_ref())
624 }
625}
626
627/// A parsed URL with support for hostnames, IPv4/IPv6 addresses, userinfo, ports, paths, queries, and fragments.
628///
629/// This struct represents a URL parsed from a string, with all components accessible individually.
630/// It supports both ASCII and UTF-8 characters in all components, and properly handles subdomains,
631/// custom TLDs, and internationalized domain names (IDNs).
632///
633/// # Examples
634///
635/// ```
636/// use faup_rs::Url;
637///
638/// // Parse a simple URL
639/// let url = Url::parse("https://example.com").unwrap();
640/// assert_eq!(url.scheme(), "https");
641/// assert_eq!(url.host().as_hostname().unwrap().full_name(), "example.com");
642///
643/// // Parse a URL with all components
644/// let url = Url::parse("https://user:pass@sub.example.com:8080/path?query=value#fragment").unwrap();
645/// assert_eq!(url.scheme(), "https");
646/// assert_eq!(url.userinfo().unwrap().username(), "user");
647/// assert_eq!(url.port(), Some(8080));
648/// assert_eq!(url.path(), Some("/path"));
649/// assert_eq!(url.query(), Some("query=value"));
650/// assert_eq!(url.fragment(), Some("fragment"));
651/// ```
652#[derive(Debug)]
653pub struct Url<'url> {
654 orig: Cow<'url, str>,
655 scheme: Cow<'url, str>,
656 userinfo: Option<UserInfo<'url>>,
657 host: Host<'url>,
658 port: Option<u16>,
659 path: Option<Cow<'url, str>>,
660 query: Option<Cow<'url, str>>,
661 fragment: Option<Cow<'url, str>>,
662}
663
664impl fmt::Display for Url<'_> {
665 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
666 write!(f, "{}", self.as_str())
667 }
668}
669
670impl<'url> Url<'url> {
671 fn from_pair(pair: Pair<'url, Rule>) -> Result<Self, Error> {
672 let orig = Cow::Borrowed(pair.as_str());
673 let mut scheme = None;
674 let mut userinfo = None;
675 let mut host = None;
676 let mut port = None;
677 let mut path = None;
678 let mut query = None;
679 let mut fragment = None;
680
681 for p in pair.into_inner() {
682 match p.as_rule() {
683 Rule::scheme => {
684 scheme = Some(Cow::Borrowed(p.as_str()));
685 }
686 Rule::userinfo => userinfo = Some(UserInfo::from_pair(p)),
687 Rule::host => {
688 // cannot panic guarantee by parser
689 let host_pair = p.into_inner().next().unwrap();
690 host = Some(Host::from_pair(host_pair)?)
691 }
692 Rule::port => {
693 port = Some(u16::from_str(p.as_str()).map_err(|_| Error::InvalidPort)?)
694 }
695 Rule::path => {
696 path = Some(Cow::Borrowed(p.as_str()));
697 }
698
699 Rule::query => {
700 query = Some(Cow::Borrowed(&p.as_str()[1..]));
701 }
702
703 Rule::fragment => {
704 fragment = Some(Cow::Borrowed(&p.as_str()[1..]));
705 }
706 _ => {}
707 }
708 }
709
710 Ok(Url {
711 orig,
712 scheme: scheme.unwrap(),
713 userinfo,
714 host: host.unwrap(),
715 port,
716 path,
717 query,
718 fragment,
719 })
720 }
721
722 /// Creates a new `Url` by parsing a string slice.
723 ///
724 /// # Arguments
725 ///
726 /// * `s` - A string slice containing the URL to parse.
727 ///
728 /// # Returns
729 ///
730 /// * `Result<Url, Error>` - A parsed `Url` if successful, or an `Error` if parsing fails.
731 ///
732 /// # Examples
733 ///
734 /// ```
735 /// use faup_rs::Url;
736 ///
737 /// let url = Url::parse("https://example.com").unwrap();
738 /// assert_eq!(url.scheme(), "https");
739 /// assert_eq!(url.domain(), Some("example.com"));
740 /// assert_eq!(url.suffix(), Some("com"));
741 /// ```
742 pub fn parse(s: &'url str) -> Result<Self, Error> {
743 let mut pairs = UrlParser::parse(Rule::url, s).map_err(Box::new)?;
744 Self::from_pair(pairs.next().unwrap())
745 }
746
747 /// Returns the original URL string.
748 ///
749 /// # Returns
750 ///
751 /// * `&str` - The original URL string.
752 ///
753 /// # Examples
754 ///
755 /// ```
756 /// use faup_rs::Url;
757 ///
758 /// let url = Url::parse("https://example.com").unwrap();
759 /// assert_eq!(url.as_str(), "https://example.com");
760 /// ```
761 #[inline(always)]
762 pub fn as_str(&self) -> &str {
763 &self.orig
764 }
765
766 /// Returns the scheme of the URL.
767 ///
768 /// # Returns
769 ///
770 /// * `&str` - The URL scheme (e.g., "http", "https").
771 ///
772 /// # Examples
773 ///
774 /// ```
775 /// use faup_rs::Url;
776 ///
777 /// let url = Url::parse("https://example.com").unwrap();
778 /// assert_eq!(url.scheme(), "https");
779 /// ```
780 #[inline(always)]
781 pub fn scheme(&self) -> &str {
782 &self.scheme
783 }
784
785 /// Returns the user information component of the URL, if present.
786 ///
787 /// # Returns
788 ///
789 /// * `Option<&UserInfo>` - The user information, or `None` if not present.
790 ///
791 /// # Examples
792 ///
793 /// ```
794 /// use faup_rs::Url;
795 ///
796 /// let url = Url::parse("https://user:pass@example.com").unwrap();
797 /// assert_eq!(url.userinfo().unwrap().username(), "user");
798 /// assert_eq!(url.userinfo().unwrap().password(), Some("pass"));
799 /// ```
800 #[inline(always)]
801 pub fn userinfo(&self) -> Option<&UserInfo<'_>> {
802 self.userinfo.as_ref()
803 }
804
805 /// Returns the host component of the URL.
806 ///
807 /// # Returns
808 ///
809 /// * `&Host` - The host, which can be either a hostname or an IP address.
810 ///
811 /// # Examples
812 ///
813 /// ```
814 /// use faup_rs::Url;
815 ///
816 /// let url = Url::parse("https://sub2.sub1.example.com").unwrap();
817 /// let hostname = url.host().as_hostname().unwrap();
818 /// assert_eq!(hostname.full_name(), "sub2.sub1.example.com");
819 /// assert_eq!(hostname.domain(), Some("example.com"));
820 /// assert_eq!(hostname.suffix(), Some("com"));
821 /// assert_eq!(hostname.subdomain(), Some("sub2.sub1"));
822 /// ```
823 #[inline(always)]
824 pub fn host(&self) -> &Host<'_> {
825 &self.host
826 }
827
828 /// Returns the domain part of the hostname, if present.
829 ///
830 /// This is a convenience method that directly accesses the domain component
831 /// of the hostname, if the host is a hostname (not an IP address).
832 ///
833 /// # Returns
834 ///
835 /// * `Option<&str>` - The domain part of the hostname, or `None` if:
836 /// - The host is an IP address
837 /// - The hostname doesn't have a recognized domain
838 ///
839 /// # Examples
840 ///
841 /// ```
842 /// use faup_rs::Url;
843 ///
844 /// // With a domain name
845 /// let url = Url::parse("https://sub.example.com").unwrap();
846 /// assert_eq!(url.domain(), Some("example.com"));
847 ///
848 /// // With an IP address
849 /// let url = Url::parse("https://127.0.0.1").unwrap();
850 /// assert_eq!(url.domain(), None);
851 /// ```
852 #[inline(always)]
853 pub fn domain(&self) -> Option<&str> {
854 self.host.as_hostname().and_then(|h| h.domain())
855 }
856
857 /// Returns the subdomain part of the hostname, if present.
858 ///
859 /// This is a convenience method that directly accesses the subdomain component
860 /// of the hostname, if the host is a hostname (not an IP address).
861 ///
862 /// # Returns
863 ///
864 /// * `Option<&str>` - The subdomain part of the hostname, or `None` if:
865 /// - The host is an IP address
866 /// - The hostname doesn't have a subdomain
867 ///
868 /// # Examples
869 ///
870 /// ```
871 /// use faup_rs::Url;
872 ///
873 /// // With a subdomain
874 /// let url = Url::parse("https://sub.example.com").unwrap();
875 /// assert_eq!(url.subdomain(), Some("sub"));
876 ///
877 /// // Without a subdomain
878 /// let url = Url::parse("https://example.com").unwrap();
879 /// assert_eq!(url.subdomain(), None);
880 ///
881 /// // With an IP address
882 /// let url = Url::parse("https://127.0.0.1").unwrap();
883 /// assert_eq!(url.subdomain(), None);
884 /// ```
885 #[inline(always)]
886 pub fn subdomain(&self) -> Option<&str> {
887 self.host.as_hostname().and_then(|h| h.subdomain())
888 }
889
890 /// Returns the suffix (top-level domain) of the hostname, if present.
891 ///
892 /// This is a convenience method that directly accesses the suffix component
893 /// of the hostname, if the host is a hostname (not an IP address).
894 ///
895 /// # Returns
896 ///
897 /// * `Option<&str>` - The suffix (TLD) of the hostname, or `None` if:
898 /// - The host is an IP address
899 /// - The hostname doesn't have a recognized suffix
900 ///
901 /// # Examples
902 ///
903 /// ```
904 /// use faup_rs::Url;
905 ///
906 /// // With a standard TLD
907 /// let url = Url::parse("https://example.com").unwrap();
908 /// assert_eq!(url.suffix(), Some("com"));
909 ///
910 /// // With a custom TLD
911 /// let url = Url::parse("http://example.b32.i2p").unwrap();
912 /// assert_eq!(url.suffix(), Some("b32.i2p"));
913 ///
914 /// // With an IP address
915 /// let url = Url::parse("https://127.0.0.1").unwrap();
916 /// assert_eq!(url.suffix(), None);
917 /// ```
918 #[inline(always)]
919 pub fn suffix(&self) -> Option<&str> {
920 self.host.as_hostname().and_then(|h| h.suffix())
921 }
922
923 /// Returns the port number of the URL, if present.
924 ///
925 /// # Returns
926 ///
927 /// * `Option<u16>` - The port number, or `None` if not specified.
928 ///
929 /// # Examples
930 ///
931 /// ```
932 /// use faup_rs::Url;
933 ///
934 /// let url = Url::parse("https://example.com:8080").unwrap();
935 /// assert_eq!(url.port(), Some(8080));
936 /// ```
937 #[inline(always)]
938 pub fn port(&self) -> Option<u16> {
939 self.port
940 }
941
942 /// Returns the path component of the URL, if present.
943 ///
944 /// # Returns
945 ///
946 /// * `Option<&str>` - The path, or `None` if not present.
947 ///
948 /// # Examples
949 ///
950 /// ```
951 /// use faup_rs::Url;
952 ///
953 /// let url = Url::parse("https://example.com/path").unwrap();
954 /// assert_eq!(url.path(), Some("/path"));
955 /// ```
956 #[inline(always)]
957 pub fn path(&self) -> Option<&str> {
958 self.path.as_ref().map(|p| p.as_ref())
959 }
960
961 /// Returns the query component of the URL, if present.
962 ///
963 /// # Returns
964 ///
965 /// * `Option<&str>` - The query string, or `None` if not present.
966 ///
967 /// # Examples
968 ///
969 /// ```
970 /// use faup_rs::Url;
971 ///
972 /// let url = Url::parse("https://example.com?query=value").unwrap();
973 /// assert_eq!(url.query(), Some("query=value"));
974 /// ```
975 #[inline(always)]
976 pub fn query(&self) -> Option<&str> {
977 self.query.as_ref().map(|p| p.as_ref())
978 }
979
980 /// Returns the fragment component of the URL, if present.
981 ///
982 /// # Returns
983 ///
984 /// * `Option<&str>` - The fragment, or `None` if not present.
985 ///
986 /// # Examples
987 ///
988 /// ```
989 /// use faup_rs::Url;
990 ///
991 /// let url = Url::parse("https://example.com#fragment").unwrap();
992 /// assert_eq!(url.fragment(), Some("fragment"));
993 /// ```
994 #[inline(always)]
995 pub fn fragment(&self) -> Option<&str> {
996 self.fragment.as_ref().map(|p| p.as_ref())
997 }
998
999 /// Converts this borrowed `Url` into an owned `Url`.
1000 ///
1001 /// This is useful when you need to store the `Url` for longer than the lifetime of the input string.
1002 ///
1003 /// # Performance
1004 ///
1005 /// When using this method strings will be cloned.
1006 ///
1007 /// # Returns
1008 ///
1009 /// * `Url<'owned>` - An owned version of the URL.
1010 ///
1011 /// # Examples
1012 ///
1013 /// ```
1014 /// use faup_rs::Url;
1015 ///
1016 /// let url = Url::parse("https://example.com").unwrap();
1017 /// let owned_url = url.into_owned();
1018 /// ```
1019 pub fn into_owned<'owned>(self) -> Url<'owned> {
1020 Url {
1021 orig: Cow::Owned(self.orig.into_owned()),
1022 scheme: Cow::Owned(self.scheme.into_owned()),
1023 userinfo: self.userinfo.map(|u| u.into_owned()),
1024 host: self.host.into_owned(),
1025 port: self.port,
1026 path: self.path.map(|p| Cow::Owned(p.into_owned())),
1027 query: self.query.map(|q| Cow::Owned(q.into_owned())),
1028 fragment: self.fragment.map(|f| Cow::Owned(f.into_owned())),
1029 }
1030 }
1031}
1032
1033#[cfg(test)]
1034mod tests {
1035 use super::*;
1036 use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
1037
1038 /// Test basic URL parsing with various real-world examples
1039 #[test]
1040 fn test_real_world_examples() {
1041 let test_urls = [
1042 "https://www.example.co.uk",
1043 "http://sub.domain.example.com/path/to/page",
1044 "ftp://files.example.org/downloads/archive.zip",
1045 "https://www.example.com/search?q=rust+programming&page=1",
1046 "http://api.example.net/data?user=123&sort=desc",
1047 "https://docs.example.com/guide#installation",
1048 "http://example.com/page#section-1",
1049 "https://example.com/path%20with%20spaces",
1050 "http://localhost:3000/api/v1",
1051 "http://toaster.dyrøy.no",
1052 "http://full.custom-tld.test.b32.i2p",
1053 "https://alex:adore-la-quiche@avec-des-œufs.be#et-des-lardons",
1054 "https://%40lex:adore:la:quiche@%61vec-des-œufs.be/../../..some/directory/traversal/../#et-des-lardons",
1055 "https://44.129.205.92.host.secureserver.net",
1056 ];
1057
1058 for url in test_urls {
1059 println!("Testing: {url}");
1060 let _ = Url::parse(url)
1061 .inspect_err(|e| println!("Error parsing '{url}': {e}"))
1062 .unwrap();
1063 }
1064 }
1065
1066 /// Test minimal URL components
1067 #[test]
1068 fn test_minimal_url() {
1069 let url = Url::parse("https://example.com").unwrap();
1070 assert_eq!(url.scheme(), "https");
1071 assert_eq!(url.host().to_string(), "example.com");
1072 assert_eq!(url.port(), None);
1073 assert_eq!(url.path(), None);
1074 assert_eq!(url.query(), None);
1075 assert_eq!(url.fragment(), None);
1076 assert!(url.userinfo().is_none());
1077
1078 let hn = url.host().as_hostname().unwrap();
1079 assert_eq!(hn.full_name(), "example.com");
1080 assert_eq!(hn.suffix(), Some("com"));
1081 assert_eq!(hn.domain(), Some("example.com"));
1082 assert_eq!(hn.subdomain(), None);
1083 }
1084
1085 /// Test URLs with user information
1086 #[test]
1087 fn test_user_info() {
1088 // With both username and password
1089 let url = Url::parse("https://user:pass@example.com").unwrap();
1090 assert_eq!(url.scheme(), "https");
1091 assert_eq!(url.host().to_string(), "example.com");
1092 let userinfo = url.userinfo().unwrap();
1093 assert_eq!(userinfo.username(), "user");
1094 assert_eq!(userinfo.password(), Some("pass"));
1095
1096 // With only username
1097 let url = Url::parse("ftp://user@example.com").unwrap();
1098 assert_eq!(url.scheme(), "ftp");
1099 let userinfo = url.userinfo().unwrap();
1100 assert_eq!(userinfo.username(), "user");
1101 assert_eq!(userinfo.password(), None);
1102
1103 // With UTF-8 user info
1104 let url = Url::parse("https://用户:密码@example.com").unwrap();
1105 let userinfo = url.userinfo().unwrap();
1106 assert_eq!(userinfo.username(), "用户");
1107 assert_eq!(userinfo.password(), Some("密码"));
1108 }
1109
1110 /// Test URLs with ports
1111 #[test]
1112 fn test_ports() {
1113 // With standard port
1114 let url = Url::parse("http://example.com:80").unwrap();
1115 assert_eq!(url.port(), Some(80));
1116
1117 // With custom port
1118 let url = Url::parse("http://example.com:8080").unwrap();
1119 assert_eq!(url.port(), Some(8080));
1120
1121 // Invalid port
1122 let err = Url::parse("http://example.com:99999").unwrap_err();
1123 assert!(matches!(err, Error::InvalidPort));
1124 }
1125
1126 /// Test URLs with paths
1127 #[test]
1128 fn test_paths() {
1129 // Simple path
1130 let url = Url::parse("https://example.com/path/to/resource").unwrap();
1131 assert_eq!(url.path(), Some("/path/to/resource"));
1132
1133 // Complex path
1134 let url = Url::parse("http://example.com/a/b/c.html").unwrap();
1135 assert_eq!(url.path(), Some("/a/b/c.html"));
1136
1137 // UTF-8 path
1138 let url = Url::parse("https://example.com/路径/资源").unwrap();
1139 assert_eq!(url.path(), Some("/路径/资源"));
1140
1141 // No path
1142 let url = Url::parse("https://example.com").unwrap();
1143 assert_eq!(url.path(), None);
1144 }
1145
1146 /// Test URLs with queries
1147 #[test]
1148 fn test_queries() {
1149 // Simple query
1150 let url = Url::parse("https://example.com?key=value").unwrap();
1151 assert_eq!(url.query(), Some("key=value"));
1152
1153 // UTF-8 query
1154 let url = Url::parse("https://example.com?查询=值").unwrap();
1155 assert_eq!(url.query(), Some("查询=值"));
1156
1157 // No query
1158 let url = Url::parse("https://example.com").unwrap();
1159 assert_eq!(url.query(), None);
1160 }
1161
1162 /// Test URLs with fragments
1163 #[test]
1164 fn test_fragments() {
1165 // Simple fragment
1166 let url = Url::parse("https://example.com#section1").unwrap();
1167 assert_eq!(url.fragment(), Some("section1"));
1168
1169 // UTF-8 fragment
1170 let url = Url::parse("https://example.com#片段").unwrap();
1171 assert_eq!(url.fragment(), Some("片段"));
1172
1173 // No fragment
1174 let url = Url::parse("https://example.com").unwrap();
1175 assert_eq!(url.fragment(), None);
1176 }
1177
1178 /// Test URLs with all components
1179 #[test]
1180 fn test_all_components() {
1181 let url = Url::parse(
1182 "https://user:pass@sub.example.com:8080/path/to/resource?key=value#section1",
1183 )
1184 .unwrap();
1185
1186 assert_eq!(url.scheme(), "https");
1187 let userinfo = url.userinfo().unwrap();
1188 assert_eq!(userinfo.username(), "user");
1189 assert_eq!(userinfo.password(), Some("pass"));
1190 assert_eq!(url.host().to_string(), "sub.example.com");
1191 assert_eq!(url.port(), Some(8080));
1192 assert_eq!(url.path(), Some("/path/to/resource"));
1193 assert_eq!(url.query(), Some("key=value"));
1194 assert_eq!(url.fragment(), Some("section1"));
1195 }
1196
1197 /// Test hostname parsing
1198 #[test]
1199 fn test_hostnames() {
1200 // Basic hostname
1201 let url = Url::parse("https://example.com").unwrap();
1202 let hn = url.host().as_hostname().unwrap();
1203 assert_eq!(hn.full_name(), "example.com");
1204 assert_eq!(hn.suffix(), Some("com"));
1205 assert_eq!(hn.domain(), Some("example.com"));
1206 assert_eq!(hn.subdomain(), None);
1207
1208 // Single-level subdomain
1209 let url = Url::parse("https://sub.example.com").unwrap();
1210 let hn = url.host().as_hostname().unwrap();
1211 assert_eq!(hn.full_name(), "sub.example.com");
1212 assert_eq!(hn.suffix(), Some("com"));
1213 assert_eq!(hn.domain(), Some("example.com"));
1214 assert_eq!(hn.subdomain(), Some("sub"));
1215
1216 // Multi-level subdomain
1217 let url = Url::parse("https://a.b.example.com").unwrap();
1218 let hn = url.host().as_hostname().unwrap();
1219 assert_eq!(hn.full_name(), "a.b.example.com");
1220 assert_eq!(hn.suffix(), Some("com"));
1221 assert_eq!(hn.domain(), Some("example.com"));
1222 assert_eq!(hn.subdomain(), Some("a.b"));
1223
1224 // Complex subdomain with all components
1225 let url = Url::parse(
1226 "https://user:pass@sub1.sub2.example.com:8080/path/to/resource?key=value#section1",
1227 )
1228 .unwrap();
1229 let hn = url.host().as_hostname().unwrap();
1230 assert_eq!(hn.full_name(), "sub1.sub2.example.com");
1231 assert_eq!(hn.suffix(), Some("com"));
1232 assert_eq!(hn.domain(), Some("example.com"));
1233 assert_eq!(hn.subdomain(), Some("sub1.sub2"));
1234
1235 // Custom TLD
1236 let url = Url::parse("http://example.b32.i2p").unwrap();
1237 let hn = url.host().as_hostname().unwrap();
1238 assert_eq!(hn.full_name(), "example.b32.i2p");
1239 assert_eq!(hn.suffix(), Some("b32.i2p"));
1240 assert_eq!(hn.domain(), Some("example.b32.i2p"));
1241 assert_eq!(hn.subdomain(), None);
1242
1243 // UTF-8 hostname
1244 let url = Url::parse("https://例子.测试").unwrap();
1245 let hn = url.host().as_hostname().unwrap();
1246 assert_eq!(hn.full_name(), "例子.测试");
1247 assert_eq!(hn.suffix(), Some("测试"));
1248 assert_eq!(hn.domain(), Some("例子.测试"));
1249 assert_eq!(hn.subdomain(), None);
1250
1251 // UTF-8 subdomain
1252 let url = Url::parse("https://子域.例子.测试").unwrap();
1253 let hn = url.host().as_hostname().unwrap();
1254 assert_eq!(hn.full_name(), "子域.例子.测试");
1255 assert_eq!(hn.suffix(), Some("测试"));
1256 assert_eq!(hn.domain(), Some("例子.测试"));
1257 assert_eq!(hn.subdomain(), Some("子域"));
1258 }
1259
1260 /// Test IP address hosts
1261 #[test]
1262 fn test_ip_hosts() {
1263 // IPv4
1264 let url = Url::parse("http://127.0.0.1").unwrap();
1265 match url.host() {
1266 Host::Ip(IpAddr::V4(ip)) => assert_eq!(ip, &Ipv4Addr::new(127, 0, 0, 1)),
1267 _ => panic!("Expected IPv4 address"),
1268 }
1269
1270 // IPv6
1271 let url = Url::parse("http://[::1]").unwrap();
1272 match url.host() {
1273 Host::Ip(IpAddr::V6(ip)) => assert_eq!(ip, &Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1)),
1274 _ => panic!("Expected IPv6 address"),
1275 }
1276
1277 // Invalid IPv4
1278 let err = Url::parse("http://999.999.999.999").unwrap_err();
1279 assert!(matches!(err, Error::InvalidIPv4));
1280
1281 // Invalid IPv6
1282 let err = Url::parse("http://[::::]").unwrap_err();
1283 assert!(matches!(err, Error::InvalidIPv6));
1284 }
1285
1286 /// Test edge cases
1287 #[test]
1288 fn test_edge_cases() {
1289 // Empty path
1290 let url = Url::parse("https://example.com/").unwrap();
1291 assert_eq!(url.path(), Some("/"));
1292
1293 // Empty query
1294 let url = Url::parse("https://example.com?").unwrap();
1295 assert_eq!(url.query(), Some(""));
1296
1297 // Empty fragment
1298 let url = Url::parse("https://example.com#").unwrap();
1299 assert_eq!(url.fragment(), Some(""));
1300
1301 // No subdomain
1302 let url = Url::parse("https://example.com").unwrap();
1303 let hn = url.host().as_hostname().unwrap();
1304 assert_eq!(hn.subdomain(), None);
1305 }
1306
1307 /// Test URLs with special characters
1308 #[test]
1309 fn test_special_characters() {
1310 // URL-encoded characters
1311 let url =
1312 Url::parse("https://%40lex:adore:la:quiche@%61vec-des-œufs.be#et-des-lardons").unwrap();
1313 assert_eq!(url.host().to_string(), "%61vec-des-œufs.be");
1314 let userinfo = url.userinfo().unwrap();
1315 assert_eq!(userinfo.username(), "%40lex");
1316 assert_eq!(userinfo.password(), Some("adore:la:quiche"));
1317 assert_eq!(url.fragment(), Some("et-des-lardons"));
1318
1319 // Path traversal
1320 let url = Url::parse("https://example.com/../../..some/directory/traversal/../").unwrap();
1321 assert_eq!(url.path(), Some("/../../..some/directory/traversal/../"));
1322 }
1323
1324 #[test]
1325 fn test_host_from_str() {
1326 // Valid IPv4
1327 let host = Host::parse("127.0.0.1").unwrap();
1328 assert!(matches!(host, Host::Ip(std::net::IpAddr::V4(_))));
1329
1330 // Valid IPv6
1331 let host = Host::parse("::1").unwrap();
1332 assert!(matches!(host, Host::Ip(std::net::IpAddr::V6(_))));
1333
1334 let host = Host::parse("[::1]");
1335 assert!(matches!(host, Err(Error::InvalidHost)));
1336
1337 // Invalid IPv6
1338 let result = Host::parse("::::");
1339 assert!(matches!(result, Err(Error::InvalidIPv6)));
1340
1341 // Valid hostname
1342 let host = Host::parse("example.com").unwrap();
1343 assert!(matches!(host, Host::Hostname(_)));
1344
1345 // Hostname with subdomain
1346 let host = Host::parse("sub.example.com").unwrap();
1347 assert!(matches!(host, Host::Hostname(_)));
1348
1349 // Hostname with custom TLD
1350 let host = Host::parse("example.b32.i2p").unwrap();
1351 assert!(matches!(host, Host::Hostname(_)));
1352
1353 // Invalid hostname (placeholder logic)
1354 let result = Host::parse("example..com");
1355 assert!(matches!(result, Err(Error::InvalidHost)));
1356 }
1357}