faup_rs/
lib.rs

1//! # faup-rs: Fast URL Parser for Rust
2//!
3//! A high-performance, zero-allocation URL parser for Rust that handles:
4//! - Hostnames (with subdomains, custom TLDs, and IDNs)
5//! - IPv4/IPv6 addresses
6//! - User credentials (username/password)
7//! - Ports, paths, queries, and fragments
8//! - UTF-8 and URL-encoded characters
9//!
10//! ## Features
11//!
12//! ✅ **Zero-allocation parsing**: Borrows input strings where possible
13//!
14//! ✅ **Public Suffix List (PSL)**: Correctly identifies domain suffixes
15//!
16//! ✅ **Custom TLDs**: Extendable via the `CUSTOM_TLDS` constant
17//!
18//! ✅ **Comprehensive error handling**: Clear, actionable error types
19//!
20//! ✅ **UTF-8 support**: Full Unicode handling for all URL components
21//!
22//! ## Installation
23//!
24//! Add to your `Cargo.toml`:
25//! ```toml
26//! [dependencies]
27//! faup-rs = "0.1"
28//!```
29//!
30//! ## Usage
31//!
32//! ### Basic Parsing
33//! ```
34//! use faup_rs::Url;
35//!
36//! let url = Url::parse("https://user:pass@sub.example.com:8080/path?query=value#fragment").unwrap();
37//! assert_eq!(url.scheme(), "https");
38//! assert_eq!(url.host().to_string(), "sub.example.com");
39//! assert_eq!(url.port(), Some(8080));
40//! assert_eq!(url.path(), Some("/path"));
41//! assert_eq!(url.query(), Some("query=value"));
42//! assert_eq!(url.fragment(), Some("fragment"));
43//!```
44//!
45//! ### Hostname Components
46//! ```
47//! use faup_rs::{Url, Host};
48//!
49//! let url = Url::parse("https://sub.example.co.uk").unwrap();
50//! if let Host::Hostname(hostname) = url.host() {
51//!     assert_eq!(hostname.full_name(), "sub.example.co.uk");
52//!     assert_eq!(hostname.suffix(), Some("co.uk"));
53//!     assert_eq!(hostname.domain(), Some("example.co.uk"));
54//!     assert_eq!(hostname.subdomain(), Some("sub"));
55//! }
56//!```
57//!
58//! ### IP Addresses
59//! ```
60//! use faup_rs::Url;
61//!
62//! let url = Url::parse("http://[::1]").unwrap();
63//! assert!(matches!(url.host(), faup_rs::Host::Ip(ip) if ip.is_loopback()));
64//!```
65//!
66//! ### User Info (UTF-8 Support)
67//! ```
68//! use faup_rs::Url;
69//!
70//! let url = Url::parse("https://用户:密码@example.com").unwrap();
71//! let user_info = url.userinfo().unwrap();
72//! assert_eq!(user_info.username(), "用户");
73//! assert_eq!(user_info.password(), Some("密码"));
74//!```
75//!
76//! ### Custom TLDs
77//! ```
78//! use faup_rs::Url;
79//!
80//! let url = Url::parse("http://example.b32.i2p").unwrap();
81//! assert_eq!(url.suffix(), Some("b32.i2p"));
82//!```
83//!
84//! ## Examples
85//!
86//! ### Real-World URLs
87//! ```
88//! use faup_rs::Url;
89//!
90//! let urls = [
91//!     "https://www.example.co.uk",
92//!     "http://sub.domain.example.com/path/to/page",
93//!     "https://例子.测试",
94//!     "http://toaster.dyrøy.no",
95//!     "http://full.custom-tld.test.b32.i2p",
96//! ];
97//! for url_str in urls {
98//!     let url = Url::parse(url_str).unwrap();
99//!     println!("Parsed: {}", url);
100//! }
101//!```
102//!
103//! ## License
104//!
105//! This project is licensed under the GNU General Public License v3.0 (GPLv3)..
106//!
107use std::{
108    borrow::Cow,
109    fmt,
110    net::{IpAddr, Ipv4Addr, Ipv6Addr},
111    str::FromStr,
112};
113
114use pest::{Parser, iterators::Pair};
115use pest_derive::Parser;
116use thiserror::Error;
117
118static CUSTOM_TLDS: &[&str] = &["b32.i2p"];
119
120#[derive(Debug, Error)]
121pub enum Error {
122    #[error("invalid port")]
123    InvalidPort,
124    #[error("invalid ipv4 address")]
125    InvalidIPv4,
126    #[error("invalid ipv6 address")]
127    InvalidIPv6,
128    #[error("parser error: {0}")]
129    Parse(#[from] Box<pest::error::Error<Rule>>),
130}
131
132#[derive(Parser)]
133#[grammar = "grammar.pest"]
134pub(crate) struct UrlParser;
135
136#[inline(always)]
137fn suffix(hostname: &str) -> Option<&str> {
138    for tld in CUSTOM_TLDS {
139        if hostname.ends_with(tld) {
140            return Some(tld);
141        }
142    }
143    psl::suffix_str(hostname)
144}
145
146/// Represents a parsed hostname with its components (subdomain, domain, and suffix).
147///
148/// The `Hostname` struct provides access to the different parts of a domain name,
149/// including support for internationalized domain names (IDNs), custom top-level domains (TLDs),
150/// and subdomains. It uses the Public Suffix List (via the `psl` crate) to properly identify
151/// domain suffixes, with additional support for custom TLDs.
152///
153/// # Examples
154///
155/// ```
156/// use faup_rs::{Url, Host};
157///
158/// // Parse a simple domain
159/// let url = Url::parse("https://example.com").unwrap();
160/// if let Host::Hostname(hostname) = url.host() {
161///     assert_eq!(hostname.full_name(), "example.com");
162///     assert_eq!(hostname.suffix(), Some("com"));
163///     assert_eq!(hostname.domain(), Some("example.com"));
164///     assert_eq!(hostname.subdomain(), None);
165/// }
166///
167/// // Parse a domain with subdomains
168/// let url = Url::parse("https://sub.example.co.uk").unwrap();
169/// if let Host::Hostname(hostname) = url.host() {
170///     assert_eq!(hostname.full_name(), "sub.example.co.uk");
171///     assert_eq!(hostname.suffix(), Some("co.uk"));
172///     assert_eq!(hostname.domain(), Some("example.co.uk"));
173///     assert_eq!(hostname.subdomain(), Some("sub"));
174/// }
175///
176/// // Parse a domain with UTF-8 characters
177/// let url = Url::parse("https://例子.测试").unwrap();
178/// if let Host::Hostname(hostname) = url.host() {
179///     assert_eq!(hostname.full_name(), "例子.测试");
180///     assert_eq!(hostname.suffix(), Some("测试"));
181///     assert_eq!(hostname.domain(), Some("例子.测试"));
182///     assert_eq!(hostname.subdomain(), None);
183/// }
184///
185/// // Parse a domain with custom TLD
186/// let url = Url::parse("http://example.b32.i2p").unwrap();
187/// if let Host::Hostname(hostname) = url.host() {
188///     assert_eq!(hostname.suffix(), Some("b32.i2p"));
189/// }
190/// ```
191#[derive(Debug)]
192pub struct Hostname<'url> {
193    hostname: Cow<'url, str>,
194    subdomain: Option<Cow<'url, str>>,
195    domain: Option<Cow<'url, str>>,
196    suffix: Option<Cow<'url, str>>,
197}
198
199impl<'url> Hostname<'url> {
200    fn into_owned<'owned>(self) -> Hostname<'owned> {
201        Hostname {
202            hostname: Cow::Owned(self.hostname.into_owned()),
203            subdomain: self.subdomain.map(|s| Cow::Owned(s.into_owned())),
204            domain: self.domain.map(|d| Cow::Owned(d.into_owned())),
205            suffix: self.suffix.map(|s| Cow::Owned(s.into_owned())),
206        }
207    }
208
209    fn from_str(hostname: &'url str) -> Self {
210        let suffix = suffix(hostname).map(Cow::Borrowed);
211
212        let domain = if let Some(suffix) = suffix.as_ref() {
213            let i = hostname.rfind(suffix.as_ref()).unwrap();
214            let dom_start = hostname[..i]
215                .trim_end_matches('.')
216                .rfind('.')
217                .map(|i| i + 1)
218                .unwrap_or_default();
219            Some(Cow::Borrowed(&hostname[dom_start..]))
220        } else {
221            None
222        };
223
224        let subdomain = if let Some(domain) = domain.as_ref() {
225            // cannot panic domain must be in hostname
226            let i = hostname.find(domain.as_ref()).unwrap().saturating_sub(1); // we get index after dot so we need to jump over it in order to process string backward
227            let subdomain = &hostname[..i];
228            if subdomain.is_empty() {
229                None
230            } else {
231                Some(Cow::Borrowed(subdomain))
232            }
233        } else {
234            None
235        };
236
237        Hostname {
238            hostname: Cow::Borrowed(hostname),
239            subdomain,
240            domain,
241            suffix,
242        }
243    }
244
245    /// Returns the complete hostname as a string.
246    ///
247    /// # Returns
248    ///
249    /// * `&str` - The full hostname.
250    ///
251    /// # Examples
252    ///
253    /// ```
254    /// use faup_rs::{Url, Host};
255    ///
256    /// let url = Url::parse("https://sub.example.com").unwrap();
257    /// if let Host::Hostname(hostname) = url.host() {
258    ///     assert_eq!(hostname.full_name(), "sub.example.com");
259    /// }
260    /// ```
261    #[inline(always)]
262    pub fn full_name(&self) -> &str {
263        &self.hostname
264    }
265
266    /// Returns the suffix (top-level domain) of the hostname, if recognized.
267    ///
268    /// The suffix is determined using the Public Suffix List, with additional support
269    /// for custom TLDs defined in the `CUSTOM_TLDS` constant.
270    ///
271    /// # Returns
272    ///
273    /// * `Option<&str>` - The suffix (TLD), or `None` if not recognized.
274    ///
275    /// # Examples
276    ///
277    /// ```
278    /// use faup_rs::{Url, Host};
279    ///
280    /// // Standard TLD
281    /// let url = Url::parse("https://example.com").unwrap();
282    /// if let Host::Hostname(hostname) = url.host() {
283    ///     assert_eq!(hostname.suffix(), Some("com"));
284    /// }
285    ///
286    /// // Multi-level TLD
287    /// let url = Url::parse("https://example.co.uk").unwrap();
288    /// if let Host::Hostname(hostname) = url.host() {
289    ///     assert_eq!(hostname.suffix(), Some("co.uk"));
290    /// }
291    ///
292    /// // Custom TLD
293    /// let url = Url::parse("http://example.b32.i2p").unwrap();
294    /// if let Host::Hostname(hostname) = url.host() {
295    ///     assert_eq!(hostname.suffix(), Some("b32.i2p"));
296    /// }
297    /// ```
298    #[inline(always)]
299    pub fn suffix(&self) -> Option<&str> {
300        self.suffix.as_ref().map(|p| p.as_ref())
301    }
302
303    /// Returns the domain part of the hostname, if recognized.
304    ///
305    /// The domain is the registrable part of the hostname, excluding any subdomains
306    /// and including the suffix.
307    ///
308    /// # Returns
309    ///
310    /// * `Option<&str>` - The domain, or `None` if not recognized.
311    ///
312    /// # Examples
313    ///
314    /// ```
315    /// use faup_rs::{Url, Host};
316    ///
317    /// // Simple domain
318    /// let url = Url::parse("https://example.com").unwrap();
319    /// if let Host::Hostname(hostname) = url.host() {
320    ///     assert_eq!(hostname.domain(), Some("example.com"));
321    /// }
322    ///
323    /// // Domain with multi-level TLD
324    /// let url = Url::parse("https://example.co.uk").unwrap();
325    /// if let Host::Hostname(hostname) = url.host() {
326    ///     assert_eq!(hostname.domain(), Some("example.co.uk"));
327    /// }
328    /// ```
329    #[inline(always)]
330    pub fn domain(&self) -> Option<&str> {
331        self.domain.as_ref().map(|p| p.as_ref())
332    }
333
334    /// Returns the subdomain part of the hostname, if present.
335    ///
336    /// The subdomain is everything before the domain. For example, in "sub.example.com",
337    /// "sub" is the subdomain.
338    ///
339    /// # Returns
340    ///
341    /// * `Option<&str>` - The subdomain, or `None` if not present.
342    ///
343    /// # Examples
344    ///
345    /// ```
346    /// use faup_rs::{Url, Host};
347    ///
348    /// // Single-level subdomain
349    /// let url = Url::parse("https://sub.example.com").unwrap();
350    /// if let Host::Hostname(hostname) = url.host() {
351    ///     assert_eq!(hostname.subdomain(), Some("sub"));
352    /// }
353    ///
354    /// // Multi-level subdomain
355    /// let url = Url::parse("https://a.b.example.com").unwrap();
356    /// if let Host::Hostname(hostname) = url.host() {
357    ///     assert_eq!(hostname.subdomain(), Some("a.b"));
358    /// }
359    ///
360    /// // No subdomain
361    /// let url = Url::parse("https://example.com").unwrap();
362    /// if let Host::Hostname(hostname) = url.host() {
363    ///     assert_eq!(hostname.subdomain(), None);
364    /// }
365    /// ```
366    #[inline(always)]
367    pub fn subdomain(&self) -> Option<&str> {
368        self.subdomain.as_ref().map(|p| p.as_ref())
369    }
370}
371
372/// Represents the host component of a URL, which can be either a hostname or an IP address.
373#[derive(Debug)]
374pub enum Host<'url> {
375    /// A hostname (domain name).
376    Hostname(Hostname<'url>),
377    /// An IP address (either IPv4 or IPv6).
378    Ip(IpAddr),
379}
380
381impl fmt::Display for Host<'_> {
382    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
383        match self {
384            Host::Hostname(hostname) => write!(f, "{}", hostname.full_name()),
385            Host::Ip(ip) => write!(f, "{ip}"),
386        }
387    }
388}
389
390impl<'url> Host<'url> {
391    fn into_owned<'owned>(self) -> Host<'owned> {
392        match self {
393            Host::Hostname(h) => Host::Hostname(h.into_owned()),
394            Host::Ip(ip) => Host::Ip(ip),
395        }
396    }
397
398    /// Returns the hostname component if this is a `Host::Hostname` variant.
399    ///
400    /// # Returns
401    ///
402    /// * `Option<&Hostname>` - The hostname, or `None` if this is an IP address.
403    pub fn as_hostname(&self) -> Option<&Hostname<'_>> {
404        match self {
405            Host::Hostname(h) => Some(h),
406            _ => None,
407        }
408    }
409}
410
411/// Represents user information (username and password) in a URL.
412///
413/// This struct stores the credentials that may be present in a URL's authority component.
414/// It supports both ASCII and UTF-8 characters in usernames and passwords.
415///
416/// # Examples
417///
418/// ```
419/// use faup_rs::{Url, UserInfo};
420///
421/// // Parse a URL with user info
422/// let url = Url::parse("https://user:pass@example.com").unwrap();
423/// let user_info = url.userinfo().unwrap();
424///
425/// // Access username and password
426/// assert_eq!(user_info.username(), "user");
427/// assert_eq!(user_info.password(), Some("pass"));
428///
429/// // Parse a URL with only username
430/// let url = Url::parse("https://user@example.com").unwrap();
431/// let user_info = url.userinfo().unwrap();
432/// assert_eq!(user_info.username(), "user");
433/// assert_eq!(user_info.password(), None);
434///
435/// // Parse a URL with UTF-8 user info
436/// let url = Url::parse("https://用户:密码@example.com").unwrap();
437/// let user_info = url.userinfo().unwrap();
438/// assert_eq!(user_info.username(), "用户");
439/// assert_eq!(user_info.password(), Some("密码"));
440/// ```
441#[derive(Debug)]
442pub struct UserInfo<'url> {
443    username: Cow<'url, str>,
444    password: Option<Cow<'url, str>>,
445}
446
447impl<'url> UserInfo<'url> {
448    #[inline]
449    fn into_owned<'owned>(self) -> UserInfo<'owned> {
450        UserInfo {
451            username: Cow::Owned(self.username.into_owned()),
452            password: self.password.map(|p| Cow::Owned(p.into_owned())),
453        }
454    }
455
456    #[inline(always)]
457    fn from_pair(pair: Pair<'url, Rule>) -> Self {
458        let mut username = None;
459        let mut password = None;
460        for p in pair.into_inner() {
461            match p.as_rule() {
462                Rule::username => username = Some(Cow::Borrowed(p.as_str())),
463                Rule::password => password = Some(Cow::Borrowed(p.as_str())),
464                _ => {}
465            }
466        }
467        Self {
468            username: username.expect("username is guaranteed by parser"),
469            password,
470        }
471    }
472}
473
474impl UserInfo<'_> {
475    /// Returns the username component of the user information.
476    ///
477    /// # Returns
478    ///
479    /// * `&str` - The username.
480    ///
481    /// # Examples
482    ///
483    /// ```
484    /// use faup_rs::Url;
485    ///
486    /// let url = Url::parse("https://user@example.com").unwrap();
487    /// assert_eq!(url.userinfo().unwrap().username(), "user");
488    ///
489    /// // UTF-8 username
490    /// let url = Url::parse("https://用户@example.com").unwrap();
491    /// assert_eq!(url.userinfo().unwrap().username(), "用户");
492    /// ```
493    #[inline(always)]
494    pub fn username(&self) -> &str {
495        &self.username
496    }
497
498    /// Returns the password component of the user information, if present.
499    ///
500    /// # Returns
501    ///
502    /// * `Option<&str>` - The password, or `None` if not present.
503    ///
504    /// # Examples
505    ///
506    /// ```
507    /// use faup_rs::Url;
508    ///
509    /// // With password
510    /// let url = Url::parse("https://user:pass@example.com").unwrap();
511    /// assert_eq!(url.userinfo().unwrap().password(), Some("pass"));
512    ///
513    /// // Without password
514    /// let url = Url::parse("https://user@example.com").unwrap();
515    /// assert_eq!(url.userinfo().unwrap().password(), None);
516    ///
517    /// // UTF-8 password
518    /// let url = Url::parse("https://user:密码@example.com").unwrap();
519    /// assert_eq!(url.userinfo().unwrap().password(), Some("密码"));
520    /// ```
521    #[inline(always)]
522    pub fn password(&self) -> Option<&str> {
523        self.password.as_ref().map(|p| p.as_ref())
524    }
525}
526
527/// A parsed URL with support for hostnames, IPv4/IPv6 addresses, userinfo, ports, paths, queries, and fragments.
528///
529/// This struct represents a URL parsed from a string, with all components accessible individually.
530/// It supports both ASCII and UTF-8 characters in all components, and properly handles subdomains,
531/// custom TLDs, and internationalized domain names (IDNs).
532///
533/// # Examples
534///
535/// ```
536/// use faup_rs::Url;
537///
538/// // Parse a simple URL
539/// let url = Url::parse("https://example.com").unwrap();
540/// assert_eq!(url.scheme(), "https");
541/// assert_eq!(url.host().as_hostname().unwrap().full_name(), "example.com");
542///
543/// // Parse a URL with all components
544/// let url = Url::parse("https://user:pass@sub.example.com:8080/path?query=value#fragment").unwrap();
545/// assert_eq!(url.scheme(), "https");
546/// assert_eq!(url.userinfo().unwrap().username(), "user");
547/// assert_eq!(url.port(), Some(8080));
548/// assert_eq!(url.path(), Some("/path"));
549/// assert_eq!(url.query(), Some("query=value"));
550/// assert_eq!(url.fragment(), Some("fragment"));
551/// ```
552#[derive(Debug)]
553pub struct Url<'url> {
554    orig: Cow<'url, str>,
555    scheme: Cow<'url, str>,
556    userinfo: Option<UserInfo<'url>>,
557    host: Host<'url>,
558    port: Option<u16>,
559    path: Option<Cow<'url, str>>,
560    query: Option<Cow<'url, str>>,
561    fragment: Option<Cow<'url, str>>,
562}
563
564impl fmt::Display for Url<'_> {
565    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
566        write!(f, "{}", self.as_str())
567    }
568}
569
570impl<'url> Url<'url> {
571    fn from_pair(pair: Pair<'url, Rule>) -> Result<Self, Error> {
572        let orig = Cow::Borrowed(pair.as_str());
573        let mut scheme = None;
574        let mut userinfo = None;
575        let mut host = None;
576        let mut port = None;
577        let mut path = None;
578        let mut query = None;
579        let mut fragment = None;
580
581        for p in pair.into_inner() {
582            match p.as_rule() {
583                Rule::scheme => {
584                    scheme = Some(Cow::Borrowed(p.as_str()));
585                }
586                Rule::userinfo => userinfo = Some(UserInfo::from_pair(p)),
587                Rule::host => {
588                    // cannot panic guarantee by parser
589                    let host_pair = p.into_inner().next().unwrap();
590                    match host_pair.as_rule() {
591                        Rule::hostname => {
592                            host = Some(Host::Hostname(Hostname::from_str(host_pair.as_str())))
593                        }
594                        Rule::ipv4 => {
595                            host = Some(
596                                Ipv4Addr::from_str(host_pair.as_str())
597                                    .map(IpAddr::from)
598                                    .map(Host::Ip)
599                                    .map_err(|_| Error::InvalidIPv4)?,
600                            );
601                        }
602
603                        Rule::ipv6 => {
604                            host = Some(
605                                Ipv6Addr::from_str(
606                                    host_pair.as_str().trim_matches(|c| c == '[' || c == ']'),
607                                )
608                                .map(IpAddr::from)
609                                .map(Host::Ip)
610                                .map_err(|_| Error::InvalidIPv6)?,
611                            );
612                        }
613                        _ => {}
614                    }
615                }
616                Rule::port => {
617                    port = Some(u16::from_str(p.as_str()).map_err(|_| Error::InvalidPort)?)
618                }
619                Rule::path => {
620                    path = Some(Cow::Borrowed(p.as_str()));
621                }
622
623                Rule::query => {
624                    query = Some(Cow::Borrowed(&p.as_str()[1..]));
625                }
626
627                Rule::fragment => {
628                    fragment = Some(Cow::Borrowed(&p.as_str()[1..]));
629                }
630                _ => {}
631            }
632        }
633
634        Ok(Url {
635            orig,
636            scheme: scheme.unwrap(),
637            userinfo,
638            host: host.unwrap(),
639            port,
640            path,
641            query,
642            fragment,
643        })
644    }
645
646    /// Converts this borrowed `Url` into an owned `Url`.
647    ///
648    /// This is useful when you need to store the `Url` for longer than the lifetime of the input string.
649    ///
650    /// # Performance
651    ///
652    /// When using this method strings will be cloned.
653    ///
654    /// # Returns
655    ///
656    /// * `Url<'owned>` - An owned version of the URL.
657    ///
658    /// # Examples
659    ///
660    /// ```
661    /// use faup_rs::Url;
662    ///
663    /// let url = Url::parse("https://example.com").unwrap();
664    /// let owned_url = url.into_owned();
665    /// ```
666    pub fn into_owned<'owned>(self) -> Url<'owned> {
667        Url {
668            orig: Cow::Owned(self.orig.into_owned()),
669            scheme: Cow::Owned(self.scheme.into_owned()),
670            userinfo: self.userinfo.map(|u| u.into_owned()),
671            host: self.host.into_owned(),
672            port: self.port,
673            path: self.path.map(|p| Cow::Owned(p.into_owned())),
674            query: self.query.map(|q| Cow::Owned(q.into_owned())),
675            fragment: self.fragment.map(|f| Cow::Owned(f.into_owned())),
676        }
677    }
678
679    /// Creates a new `Url` by parsing a string slice.
680    ///
681    /// # Arguments
682    ///
683    /// * `s` - A string slice containing the URL to parse.
684    ///
685    /// # Returns
686    ///
687    /// * `Result<Url, Error>` - A parsed `Url` if successful, or an `Error` if parsing fails.
688    ///
689    /// # Examples
690    ///
691    /// ```
692    /// use faup_rs::Url;
693    ///
694    /// let url = Url::parse("https://example.com").unwrap();
695    /// assert_eq!(url.scheme(), "https");
696    /// assert_eq!(url.domain(), Some("example.com"));
697    /// assert_eq!(url.suffix(), Some("com"));
698    /// ```
699    pub fn parse(s: &'url str) -> Result<Self, Error> {
700        let mut pairs = UrlParser::parse(Rule::url, s).map_err(Box::new)?;
701        Self::from_pair(pairs.next().unwrap())
702    }
703
704    /// Returns the original URL string.
705    ///
706    /// # Returns
707    ///
708    /// * `&str` - The original URL string.
709    ///
710    /// # Examples
711    ///
712    /// ```
713    /// use faup_rs::Url;
714    ///
715    /// let url = Url::parse("https://example.com").unwrap();
716    /// assert_eq!(url.as_str(), "https://example.com");
717    /// ```
718    #[inline(always)]
719    pub fn as_str(&self) -> &str {
720        &self.orig
721    }
722
723    /// Returns the scheme of the URL.
724    ///
725    /// # Returns
726    ///
727    /// * `&str` - The URL scheme (e.g., "http", "https").
728    ///
729    /// # Examples
730    ///
731    /// ```
732    /// use faup_rs::Url;
733    ///
734    /// let url = Url::parse("https://example.com").unwrap();
735    /// assert_eq!(url.scheme(), "https");
736    /// ```
737    #[inline(always)]
738    pub fn scheme(&self) -> &str {
739        &self.scheme
740    }
741
742    /// Returns the user information component of the URL, if present.
743    ///
744    /// # Returns
745    ///
746    /// * `Option<&UserInfo>` - The user information, or `None` if not present.
747    ///
748    /// # Examples
749    ///
750    /// ```
751    /// use faup_rs::Url;
752    ///
753    /// let url = Url::parse("https://user:pass@example.com").unwrap();
754    /// assert_eq!(url.userinfo().unwrap().username(), "user");
755    /// assert_eq!(url.userinfo().unwrap().password(), Some("pass"));
756    /// ```
757    #[inline(always)]
758    pub fn userinfo(&self) -> Option<&UserInfo<'_>> {
759        self.userinfo.as_ref()
760    }
761
762    /// Returns the host component of the URL.
763    ///
764    /// # Returns
765    ///
766    /// * `&Host` - The host, which can be either a hostname or an IP address.
767    ///
768    /// # Examples
769    ///
770    /// ```
771    /// use faup_rs::Url;
772    ///
773    /// let url = Url::parse("https://sub2.sub1.example.com").unwrap();
774    /// let hostname = url.host().as_hostname().unwrap();
775    /// assert_eq!(hostname.full_name(), "sub2.sub1.example.com");
776    /// assert_eq!(hostname.domain(), Some("example.com"));
777    /// assert_eq!(hostname.suffix(), Some("com"));
778    /// assert_eq!(hostname.subdomain(), Some("sub2.sub1"));
779    /// ```
780    #[inline(always)]
781    pub fn host(&self) -> &Host<'_> {
782        &self.host
783    }
784
785    /// Returns the domain part of the hostname, if present.
786    ///
787    /// This is a convenience method that directly accesses the domain component
788    /// of the hostname, if the host is a hostname (not an IP address).
789    ///
790    /// # Returns
791    ///
792    /// * `Option<&str>` - The domain part of the hostname, or `None` if:
793    ///   - The host is an IP address
794    ///   - The hostname doesn't have a recognized domain
795    ///
796    /// # Examples
797    ///
798    /// ```
799    /// use faup_rs::Url;
800    ///
801    /// // With a domain name
802    /// let url = Url::parse("https://sub.example.com").unwrap();
803    /// assert_eq!(url.domain(), Some("example.com"));
804    ///
805    /// // With an IP address
806    /// let url = Url::parse("https://127.0.0.1").unwrap();
807    /// assert_eq!(url.domain(), None);
808    /// ```
809    #[inline(always)]
810    pub fn domain(&self) -> Option<&str> {
811        self.host.as_hostname().and_then(|h| h.domain())
812    }
813
814    /// Returns the subdomain part of the hostname, if present.
815    ///
816    /// This is a convenience method that directly accesses the subdomain component
817    /// of the hostname, if the host is a hostname (not an IP address).
818    ///
819    /// # Returns
820    ///
821    /// * `Option<&str>` - The subdomain part of the hostname, or `None` if:
822    ///   - The host is an IP address
823    ///   - The hostname doesn't have a subdomain
824    ///
825    /// # Examples
826    ///
827    /// ```
828    /// use faup_rs::Url;
829    ///
830    /// // With a subdomain
831    /// let url = Url::parse("https://sub.example.com").unwrap();
832    /// assert_eq!(url.subdomain(), Some("sub"));
833    ///
834    /// // Without a subdomain
835    /// let url = Url::parse("https://example.com").unwrap();
836    /// assert_eq!(url.subdomain(), None);
837    ///
838    /// // With an IP address
839    /// let url = Url::parse("https://127.0.0.1").unwrap();
840    /// assert_eq!(url.subdomain(), None);
841    /// ```
842    #[inline(always)]
843    pub fn subdomain(&self) -> Option<&str> {
844        self.host.as_hostname().and_then(|h| h.subdomain())
845    }
846
847    /// Returns the suffix (top-level domain) of the hostname, if present.
848    ///
849    /// This is a convenience method that directly accesses the suffix component
850    /// of the hostname, if the host is a hostname (not an IP address).
851    ///
852    /// # Returns
853    ///
854    /// * `Option<&str>` - The suffix (TLD) of the hostname, or `None` if:
855    ///   - The host is an IP address
856    ///   - The hostname doesn't have a recognized suffix
857    ///
858    /// # Examples
859    ///
860    /// ```
861    /// use faup_rs::Url;
862    ///
863    /// // With a standard TLD
864    /// let url = Url::parse("https://example.com").unwrap();
865    /// assert_eq!(url.suffix(), Some("com"));
866    ///
867    /// // With a custom TLD
868    /// let url = Url::parse("http://example.b32.i2p").unwrap();
869    /// assert_eq!(url.suffix(), Some("b32.i2p"));
870    ///
871    /// // With an IP address
872    /// let url = Url::parse("https://127.0.0.1").unwrap();
873    /// assert_eq!(url.suffix(), None);
874    /// ```
875    #[inline(always)]
876    pub fn suffix(&self) -> Option<&str> {
877        self.host.as_hostname().and_then(|h| h.suffix())
878    }
879
880    /// Returns the port number of the URL, if present.
881    ///
882    /// # Returns
883    ///
884    /// * `Option<u16>` - The port number, or `None` if not specified.
885    ///
886    /// # Examples
887    ///
888    /// ```
889    /// use faup_rs::Url;
890    ///
891    /// let url = Url::parse("https://example.com:8080").unwrap();
892    /// assert_eq!(url.port(), Some(8080));
893    /// ```
894    #[inline(always)]
895    pub fn port(&self) -> Option<u16> {
896        self.port
897    }
898
899    /// Returns the path component of the URL, if present.
900    ///
901    /// # Returns
902    ///
903    /// * `Option<&str>` - The path, or `None` if not present.
904    ///
905    /// # Examples
906    ///
907    /// ```
908    /// use faup_rs::Url;
909    ///
910    /// let url = Url::parse("https://example.com/path").unwrap();
911    /// assert_eq!(url.path(), Some("/path"));
912    /// ```
913    #[inline(always)]
914    pub fn path(&self) -> Option<&str> {
915        self.path.as_ref().map(|p| p.as_ref())
916    }
917
918    /// Returns the query component of the URL, if present.
919    ///
920    /// # Returns
921    ///
922    /// * `Option<&str>` - The query string, or `None` if not present.
923    ///
924    /// # Examples
925    ///
926    /// ```
927    /// use faup_rs::Url;
928    ///
929    /// let url = Url::parse("https://example.com?query=value").unwrap();
930    /// assert_eq!(url.query(), Some("query=value"));
931    /// ```
932    #[inline(always)]
933    pub fn query(&self) -> Option<&str> {
934        self.query.as_ref().map(|p| p.as_ref())
935    }
936
937    /// Returns the fragment component of the URL, if present.
938    ///
939    /// # Returns
940    ///
941    /// * `Option<&str>` - The fragment, or `None` if not present.
942    ///
943    /// # Examples
944    ///
945    /// ```
946    /// use faup_rs::Url;
947    ///
948    /// let url = Url::parse("https://example.com#fragment").unwrap();
949    /// assert_eq!(url.fragment(), Some("fragment"));
950    /// ```
951    #[inline(always)]
952    pub fn fragment(&self) -> Option<&str> {
953        self.fragment.as_ref().map(|p| p.as_ref())
954    }
955}
956
957#[cfg(test)]
958mod tests {
959    use super::*;
960    use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
961
962    /// Test basic URL parsing with various real-world examples
963    #[test]
964    fn test_real_world_examples() {
965        let test_urls = [
966            "https://www.example.co.uk",
967            "http://sub.domain.example.com/path/to/page",
968            "ftp://files.example.org/downloads/archive.zip",
969            "https://www.example.com/search?q=rust+programming&page=1",
970            "http://api.example.net/data?user=123&sort=desc",
971            "https://docs.example.com/guide#installation",
972            "http://example.com/page#section-1",
973            "https://example.com/path%20with%20spaces",
974            "http://localhost:3000/api/v1",
975            "http://toaster.dyrøy.no",
976            "http://full.custom-tld.test.b32.i2p",
977            "https://alex:adore-la-quiche@avec-des-œufs.be#et-des-lardons",
978            "https://%40lex:adore:la:quiche@%61vec-des-œufs.be/../../..some/directory/traversal/../#et-des-lardons",
979        ];
980
981        for url in test_urls {
982            println!("Testing: {url}");
983            let _ = Url::parse(url)
984                .inspect_err(|e| println!("Error parsing '{url}': {e}"))
985                .unwrap();
986        }
987    }
988
989    /// Test minimal URL components
990    #[test]
991    fn test_minimal_url() {
992        let url = Url::parse("https://example.com").unwrap();
993        assert_eq!(url.scheme(), "https");
994        assert_eq!(url.host().to_string(), "example.com");
995        assert_eq!(url.port(), None);
996        assert_eq!(url.path(), None);
997        assert_eq!(url.query(), None);
998        assert_eq!(url.fragment(), None);
999        assert!(url.userinfo().is_none());
1000
1001        let hn = url.host().as_hostname().unwrap();
1002        assert_eq!(hn.full_name(), "example.com");
1003        assert_eq!(hn.suffix(), Some("com"));
1004        assert_eq!(hn.domain(), Some("example.com"));
1005        assert_eq!(hn.subdomain(), None);
1006    }
1007
1008    /// Test URLs with user information
1009    #[test]
1010    fn test_user_info() {
1011        // With both username and password
1012        let url = Url::parse("https://user:pass@example.com").unwrap();
1013        assert_eq!(url.scheme(), "https");
1014        assert_eq!(url.host().to_string(), "example.com");
1015        let userinfo = url.userinfo().unwrap();
1016        assert_eq!(userinfo.username(), "user");
1017        assert_eq!(userinfo.password(), Some("pass"));
1018
1019        // With only username
1020        let url = Url::parse("ftp://user@example.com").unwrap();
1021        assert_eq!(url.scheme(), "ftp");
1022        let userinfo = url.userinfo().unwrap();
1023        assert_eq!(userinfo.username(), "user");
1024        assert_eq!(userinfo.password(), None);
1025
1026        // With UTF-8 user info
1027        let url = Url::parse("https://用户:密码@example.com").unwrap();
1028        let userinfo = url.userinfo().unwrap();
1029        assert_eq!(userinfo.username(), "用户");
1030        assert_eq!(userinfo.password(), Some("密码"));
1031    }
1032
1033    /// Test URLs with ports
1034    #[test]
1035    fn test_ports() {
1036        // With standard port
1037        let url = Url::parse("http://example.com:80").unwrap();
1038        assert_eq!(url.port(), Some(80));
1039
1040        // With custom port
1041        let url = Url::parse("http://example.com:8080").unwrap();
1042        assert_eq!(url.port(), Some(8080));
1043
1044        // Invalid port
1045        let err = Url::parse("http://example.com:99999").unwrap_err();
1046        assert!(matches!(err, Error::InvalidPort));
1047    }
1048
1049    /// Test URLs with paths
1050    #[test]
1051    fn test_paths() {
1052        // Simple path
1053        let url = Url::parse("https://example.com/path/to/resource").unwrap();
1054        assert_eq!(url.path(), Some("/path/to/resource"));
1055
1056        // Complex path
1057        let url = Url::parse("http://example.com/a/b/c.html").unwrap();
1058        assert_eq!(url.path(), Some("/a/b/c.html"));
1059
1060        // UTF-8 path
1061        let url = Url::parse("https://example.com/路径/资源").unwrap();
1062        assert_eq!(url.path(), Some("/路径/资源"));
1063
1064        // No path
1065        let url = Url::parse("https://example.com").unwrap();
1066        assert_eq!(url.path(), None);
1067    }
1068
1069    /// Test URLs with queries
1070    #[test]
1071    fn test_queries() {
1072        // Simple query
1073        let url = Url::parse("https://example.com?key=value").unwrap();
1074        assert_eq!(url.query(), Some("key=value"));
1075
1076        // UTF-8 query
1077        let url = Url::parse("https://example.com?查询=值").unwrap();
1078        assert_eq!(url.query(), Some("查询=值"));
1079
1080        // No query
1081        let url = Url::parse("https://example.com").unwrap();
1082        assert_eq!(url.query(), None);
1083    }
1084
1085    /// Test URLs with fragments
1086    #[test]
1087    fn test_fragments() {
1088        // Simple fragment
1089        let url = Url::parse("https://example.com#section1").unwrap();
1090        assert_eq!(url.fragment(), Some("section1"));
1091
1092        // UTF-8 fragment
1093        let url = Url::parse("https://example.com#片段").unwrap();
1094        assert_eq!(url.fragment(), Some("片段"));
1095
1096        // No fragment
1097        let url = Url::parse("https://example.com").unwrap();
1098        assert_eq!(url.fragment(), None);
1099    }
1100
1101    /// Test URLs with all components
1102    #[test]
1103    fn test_all_components() {
1104        let url = Url::parse(
1105            "https://user:pass@sub.example.com:8080/path/to/resource?key=value#section1",
1106        )
1107        .unwrap();
1108
1109        assert_eq!(url.scheme(), "https");
1110        let userinfo = url.userinfo().unwrap();
1111        assert_eq!(userinfo.username(), "user");
1112        assert_eq!(userinfo.password(), Some("pass"));
1113        assert_eq!(url.host().to_string(), "sub.example.com");
1114        assert_eq!(url.port(), Some(8080));
1115        assert_eq!(url.path(), Some("/path/to/resource"));
1116        assert_eq!(url.query(), Some("key=value"));
1117        assert_eq!(url.fragment(), Some("section1"));
1118    }
1119
1120    /// Test hostname parsing
1121    #[test]
1122    fn test_hostnames() {
1123        // Basic hostname
1124        let url = Url::parse("https://example.com").unwrap();
1125        let hn = url.host().as_hostname().unwrap();
1126        assert_eq!(hn.full_name(), "example.com");
1127        assert_eq!(hn.suffix(), Some("com"));
1128        assert_eq!(hn.domain(), Some("example.com"));
1129        assert_eq!(hn.subdomain(), None);
1130
1131        // Single-level subdomain
1132        let url = Url::parse("https://sub.example.com").unwrap();
1133        let hn = url.host().as_hostname().unwrap();
1134        assert_eq!(hn.full_name(), "sub.example.com");
1135        assert_eq!(hn.suffix(), Some("com"));
1136        assert_eq!(hn.domain(), Some("example.com"));
1137        assert_eq!(hn.subdomain(), Some("sub"));
1138
1139        // Multi-level subdomain
1140        let url = Url::parse("https://a.b.example.com").unwrap();
1141        let hn = url.host().as_hostname().unwrap();
1142        assert_eq!(hn.full_name(), "a.b.example.com");
1143        assert_eq!(hn.suffix(), Some("com"));
1144        assert_eq!(hn.domain(), Some("example.com"));
1145        assert_eq!(hn.subdomain(), Some("a.b"));
1146
1147        // Complex subdomain with all components
1148        let url = Url::parse(
1149            "https://user:pass@sub1.sub2.example.com:8080/path/to/resource?key=value#section1",
1150        )
1151        .unwrap();
1152        let hn = url.host().as_hostname().unwrap();
1153        assert_eq!(hn.full_name(), "sub1.sub2.example.com");
1154        assert_eq!(hn.suffix(), Some("com"));
1155        assert_eq!(hn.domain(), Some("example.com"));
1156        assert_eq!(hn.subdomain(), Some("sub1.sub2"));
1157
1158        // Custom TLD
1159        let url = Url::parse("http://example.b32.i2p").unwrap();
1160        let hn = url.host().as_hostname().unwrap();
1161        assert_eq!(hn.full_name(), "example.b32.i2p");
1162        assert_eq!(hn.suffix(), Some("b32.i2p"));
1163        assert_eq!(hn.domain(), Some("example.b32.i2p"));
1164        assert_eq!(hn.subdomain(), None);
1165
1166        // UTF-8 hostname
1167        let url = Url::parse("https://例子.测试").unwrap();
1168        let hn = url.host().as_hostname().unwrap();
1169        assert_eq!(hn.full_name(), "例子.测试");
1170        assert_eq!(hn.suffix(), Some("测试"));
1171        assert_eq!(hn.domain(), Some("例子.测试"));
1172        assert_eq!(hn.subdomain(), None);
1173
1174        // UTF-8 subdomain
1175        let url = Url::parse("https://子域.例子.测试").unwrap();
1176        let hn = url.host().as_hostname().unwrap();
1177        assert_eq!(hn.full_name(), "子域.例子.测试");
1178        assert_eq!(hn.suffix(), Some("测试"));
1179        assert_eq!(hn.domain(), Some("例子.测试"));
1180        assert_eq!(hn.subdomain(), Some("子域"));
1181    }
1182
1183    /// Test IP address hosts
1184    #[test]
1185    fn test_ip_hosts() {
1186        // IPv4
1187        let url = Url::parse("http://127.0.0.1").unwrap();
1188        match url.host() {
1189            Host::Ip(IpAddr::V4(ip)) => assert_eq!(ip, &Ipv4Addr::new(127, 0, 0, 1)),
1190            _ => panic!("Expected IPv4 address"),
1191        }
1192
1193        // IPv6
1194        let url = Url::parse("http://[::1]").unwrap();
1195        match url.host() {
1196            Host::Ip(IpAddr::V6(ip)) => assert_eq!(ip, &Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1)),
1197            _ => panic!("Expected IPv6 address"),
1198        }
1199
1200        // Invalid IPv4
1201        let err = Url::parse("http://999.999.999.999").unwrap_err();
1202        assert!(matches!(err, Error::InvalidIPv4));
1203
1204        // Invalid IPv6
1205        let err = Url::parse("http://[::::]").unwrap_err();
1206        assert!(matches!(err, Error::InvalidIPv6));
1207    }
1208
1209    /// Test edge cases
1210    #[test]
1211    fn test_edge_cases() {
1212        // Empty path
1213        let url = Url::parse("https://example.com/").unwrap();
1214        assert_eq!(url.path(), Some("/"));
1215
1216        // Empty query
1217        let url = Url::parse("https://example.com?").unwrap();
1218        assert_eq!(url.query(), Some(""));
1219
1220        // Empty fragment
1221        let url = Url::parse("https://example.com#").unwrap();
1222        assert_eq!(url.fragment(), Some(""));
1223
1224        // No subdomain
1225        let url = Url::parse("https://example.com").unwrap();
1226        let hn = url.host().as_hostname().unwrap();
1227        assert_eq!(hn.subdomain(), None);
1228    }
1229
1230    /// Test URLs with special characters
1231    #[test]
1232    fn test_special_characters() {
1233        // URL-encoded characters
1234        let url =
1235            Url::parse("https://%40lex:adore:la:quiche@%61vec-des-œufs.be#et-des-lardons").unwrap();
1236        assert_eq!(url.host().to_string(), "%61vec-des-œufs.be");
1237        let userinfo = url.userinfo().unwrap();
1238        assert_eq!(userinfo.username(), "%40lex");
1239        assert_eq!(userinfo.password(), Some("adore:la:quiche"));
1240        assert_eq!(url.fragment(), Some("et-des-lardons"));
1241
1242        // Path traversal
1243        let url = Url::parse("https://example.com/../../..some/directory/traversal/../").unwrap();
1244        assert_eq!(url.path(), Some("/../../..some/directory/traversal/../"));
1245    }
1246}