faup_rs/lib.rs
1//! # faup-rs: Fast URL Parser for Rust
2//!
3//! A high-performance, zero-allocation URL parser for Rust that handles:
4//! - Hostnames (with subdomains, custom TLDs, and IDNs)
5//! - IPv4/IPv6 addresses
6//! - User credentials (username/password)
7//! - Ports, paths, queries, and fragments
8//! - UTF-8 and URL-encoded characters
9//!
10//! ## Features
11//!
12//! ✅ **Zero-allocation parsing**: Borrows input strings where possible
13//!
14//! ✅ **Public Suffix List (PSL)**: Correctly identifies domain suffixes
15//!
16//! ✅ **Custom TLDs**: Extendable via the `CUSTOM_TLDS` constant
17//!
18//! ✅ **Comprehensive error handling**: Clear, actionable error types
19//!
20//! ✅ **UTF-8 support**: Full Unicode handling for all URL components
21//!
22//! ## Installation
23//!
24//! Add to your `Cargo.toml`:
25//! ```toml
26//! [dependencies]
27//! faup-rs = "0.1"
28//!```
29//!
30//! ## Usage
31//!
32//! ### Basic Parsing
33//! ```
34//! use faup_rs::Url;
35//!
36//! let url = Url::parse("https://user:pass@sub.example.com:8080/path?query=value#fragment").unwrap();
37//! assert_eq!(url.scheme(), "https");
38//! assert_eq!(url.host().to_string(), "sub.example.com");
39//! assert_eq!(url.port(), Some(8080));
40//! assert_eq!(url.path(), Some("/path"));
41//! assert_eq!(url.query(), Some("query=value"));
42//! assert_eq!(url.fragment(), Some("fragment"));
43//!```
44//!
45//! ### Hostname Components
46//! ```
47//! use faup_rs::{Url, Host};
48//!
49//! let url = Url::parse("https://sub.example.co.uk").unwrap();
50//! if let Host::Hostname(hostname) = url.host() {
51//! assert_eq!(hostname.full_name(), "sub.example.co.uk");
52//! assert_eq!(hostname.suffix(), Some("co.uk"));
53//! assert_eq!(hostname.domain(), Some("example.co.uk"));
54//! assert_eq!(hostname.subdomain(), Some("sub"));
55//! }
56//!```
57//!
58//! ### IP Addresses
59//! ```
60//! use faup_rs::Url;
61//!
62//! let url = Url::parse("http://[::1]").unwrap();
63//! assert!(matches!(url.host(), faup_rs::Host::Ip(ip) if ip.is_loopback()));
64//!```
65//!
66//! ### User Info (UTF-8 Support)
67//! ```
68//! use faup_rs::Url;
69//!
70//! let url = Url::parse("https://用户:密码@example.com").unwrap();
71//! let user_info = url.userinfo().unwrap();
72//! assert_eq!(user_info.username(), "用户");
73//! assert_eq!(user_info.password(), Some("密码"));
74//!```
75//!
76//! ### Custom TLDs
77//! ```
78//! use faup_rs::Url;
79//!
80//! let url = Url::parse("http://example.b32.i2p").unwrap();
81//! assert_eq!(url.suffix(), Some("b32.i2p"));
82//!```
83//!
84//! ## Examples
85//!
86//! ### Real-World URLs
87//! ```
88//! use faup_rs::Url;
89//!
90//! let urls = [
91//! "https://www.example.co.uk",
92//! "http://sub.domain.example.com/path/to/page",
93//! "https://例子.测试",
94//! "http://toaster.dyrøy.no",
95//! "http://full.custom-tld.test.b32.i2p",
96//! ];
97//! for url_str in urls {
98//! let url = Url::parse(url_str).unwrap();
99//! println!("Parsed: {}", url);
100//! }
101//!```
102//!
103//! ## License
104//!
105//! This project is licensed under the GNU General Public License v3.0 (GPLv3)..
106//!
107use std::{
108 borrow::Cow,
109 fmt,
110 net::{IpAddr, Ipv4Addr, Ipv6Addr},
111 str::FromStr,
112};
113
114use pest::{Parser, iterators::Pair};
115use pest_derive::Parser;
116use thiserror::Error;
117
118static CUSTOM_TLDS: &[&str] = &["b32.i2p"];
119
120#[derive(Debug, Error)]
121pub enum Error {
122 #[error("invalid port")]
123 InvalidPort,
124 #[error("invalid ipv4 address")]
125 InvalidIPv4,
126 #[error("invalid ipv6 address")]
127 InvalidIPv6,
128 #[error("parser error: {0}")]
129 Parse(#[from] Box<pest::error::Error<Rule>>),
130}
131
132#[derive(Parser)]
133#[grammar = "grammar.pest"]
134pub(crate) struct UrlParser;
135
136#[inline(always)]
137fn suffix(hostname: &str) -> Option<&str> {
138 for tld in CUSTOM_TLDS {
139 if hostname.ends_with(tld) {
140 return Some(tld);
141 }
142 }
143 psl::suffix_str(hostname)
144}
145
146/// Represents a parsed hostname with its components (subdomain, domain, and suffix).
147///
148/// The `Hostname` struct provides access to the different parts of a domain name,
149/// including support for internationalized domain names (IDNs), custom top-level domains (TLDs),
150/// and subdomains. It uses the Public Suffix List (via the `psl` crate) to properly identify
151/// domain suffixes, with additional support for custom TLDs.
152///
153/// # Examples
154///
155/// ```
156/// use faup_rs::{Url, Host};
157///
158/// // Parse a simple domain
159/// let url = Url::parse("https://example.com").unwrap();
160/// if let Host::Hostname(hostname) = url.host() {
161/// assert_eq!(hostname.full_name(), "example.com");
162/// assert_eq!(hostname.suffix(), Some("com"));
163/// assert_eq!(hostname.domain(), Some("example.com"));
164/// assert_eq!(hostname.subdomain(), None);
165/// }
166///
167/// // Parse a domain with subdomains
168/// let url = Url::parse("https://sub.example.co.uk").unwrap();
169/// if let Host::Hostname(hostname) = url.host() {
170/// assert_eq!(hostname.full_name(), "sub.example.co.uk");
171/// assert_eq!(hostname.suffix(), Some("co.uk"));
172/// assert_eq!(hostname.domain(), Some("example.co.uk"));
173/// assert_eq!(hostname.subdomain(), Some("sub"));
174/// }
175///
176/// // Parse a domain with UTF-8 characters
177/// let url = Url::parse("https://例子.测试").unwrap();
178/// if let Host::Hostname(hostname) = url.host() {
179/// assert_eq!(hostname.full_name(), "例子.测试");
180/// assert_eq!(hostname.suffix(), Some("测试"));
181/// assert_eq!(hostname.domain(), Some("例子.测试"));
182/// assert_eq!(hostname.subdomain(), None);
183/// }
184///
185/// // Parse a domain with custom TLD
186/// let url = Url::parse("http://example.b32.i2p").unwrap();
187/// if let Host::Hostname(hostname) = url.host() {
188/// assert_eq!(hostname.suffix(), Some("b32.i2p"));
189/// }
190/// ```
191#[derive(Debug)]
192pub struct Hostname<'url> {
193 hostname: Cow<'url, str>,
194 subdomain: Option<Cow<'url, str>>,
195 domain: Option<Cow<'url, str>>,
196 suffix: Option<Cow<'url, str>>,
197}
198
199impl<'url> Hostname<'url> {
200 fn into_owned<'owned>(self) -> Hostname<'owned> {
201 Hostname {
202 hostname: Cow::Owned(self.hostname.into_owned()),
203 subdomain: self.subdomain.map(|s| Cow::Owned(s.into_owned())),
204 domain: self.domain.map(|d| Cow::Owned(d.into_owned())),
205 suffix: self.suffix.map(|s| Cow::Owned(s.into_owned())),
206 }
207 }
208
209 fn from_str(hostname: &'url str) -> Self {
210 let suffix = suffix(hostname).map(Cow::Borrowed);
211
212 let domain = if let Some(suffix) = suffix.as_ref() {
213 let i = hostname.rfind(suffix.as_ref()).unwrap();
214 let dom_start = hostname[..i]
215 .trim_end_matches('.')
216 .rfind('.')
217 .map(|i| i + 1)
218 .unwrap_or_default();
219 Some(Cow::Borrowed(&hostname[dom_start..]))
220 } else {
221 None
222 };
223
224 let subdomain = if let Some(domain) = domain.as_ref() {
225 // cannot panic domain must be in hostname
226 let i = hostname.find(domain.as_ref()).unwrap().saturating_sub(1); // we get index after dot so we need to jump over it in order to process string backward
227 let subdomain = &hostname[..i];
228 if subdomain.is_empty() {
229 None
230 } else {
231 Some(Cow::Borrowed(subdomain))
232 }
233 } else {
234 None
235 };
236
237 Hostname {
238 hostname: Cow::Borrowed(hostname),
239 subdomain,
240 domain,
241 suffix,
242 }
243 }
244
245 /// Returns the complete hostname as a string.
246 ///
247 /// # Returns
248 ///
249 /// * `&str` - The full hostname.
250 ///
251 /// # Examples
252 ///
253 /// ```
254 /// use faup_rs::{Url, Host};
255 ///
256 /// let url = Url::parse("https://sub.example.com").unwrap();
257 /// if let Host::Hostname(hostname) = url.host() {
258 /// assert_eq!(hostname.full_name(), "sub.example.com");
259 /// }
260 /// ```
261 #[inline(always)]
262 pub fn full_name(&self) -> &str {
263 &self.hostname
264 }
265
266 /// Returns the suffix (top-level domain) of the hostname, if recognized.
267 ///
268 /// The suffix is determined using the Public Suffix List, with additional support
269 /// for custom TLDs defined in the `CUSTOM_TLDS` constant.
270 ///
271 /// # Returns
272 ///
273 /// * `Option<&str>` - The suffix (TLD), or `None` if not recognized.
274 ///
275 /// # Examples
276 ///
277 /// ```
278 /// use faup_rs::{Url, Host};
279 ///
280 /// // Standard TLD
281 /// let url = Url::parse("https://example.com").unwrap();
282 /// if let Host::Hostname(hostname) = url.host() {
283 /// assert_eq!(hostname.suffix(), Some("com"));
284 /// }
285 ///
286 /// // Multi-level TLD
287 /// let url = Url::parse("https://example.co.uk").unwrap();
288 /// if let Host::Hostname(hostname) = url.host() {
289 /// assert_eq!(hostname.suffix(), Some("co.uk"));
290 /// }
291 ///
292 /// // Custom TLD
293 /// let url = Url::parse("http://example.b32.i2p").unwrap();
294 /// if let Host::Hostname(hostname) = url.host() {
295 /// assert_eq!(hostname.suffix(), Some("b32.i2p"));
296 /// }
297 /// ```
298 #[inline(always)]
299 pub fn suffix(&self) -> Option<&str> {
300 self.suffix.as_ref().map(|p| p.as_ref())
301 }
302
303 /// Returns the domain part of the hostname, if recognized.
304 ///
305 /// The domain is the registrable part of the hostname, excluding any subdomains
306 /// and including the suffix.
307 ///
308 /// # Returns
309 ///
310 /// * `Option<&str>` - The domain, or `None` if not recognized.
311 ///
312 /// # Examples
313 ///
314 /// ```
315 /// use faup_rs::{Url, Host};
316 ///
317 /// // Simple domain
318 /// let url = Url::parse("https://example.com").unwrap();
319 /// if let Host::Hostname(hostname) = url.host() {
320 /// assert_eq!(hostname.domain(), Some("example.com"));
321 /// }
322 ///
323 /// // Domain with multi-level TLD
324 /// let url = Url::parse("https://example.co.uk").unwrap();
325 /// if let Host::Hostname(hostname) = url.host() {
326 /// assert_eq!(hostname.domain(), Some("example.co.uk"));
327 /// }
328 /// ```
329 #[inline(always)]
330 pub fn domain(&self) -> Option<&str> {
331 self.domain.as_ref().map(|p| p.as_ref())
332 }
333
334 /// Returns the subdomain part of the hostname, if present.
335 ///
336 /// The subdomain is everything before the domain. For example, in "sub.example.com",
337 /// "sub" is the subdomain.
338 ///
339 /// # Returns
340 ///
341 /// * `Option<&str>` - The subdomain, or `None` if not present.
342 ///
343 /// # Examples
344 ///
345 /// ```
346 /// use faup_rs::{Url, Host};
347 ///
348 /// // Single-level subdomain
349 /// let url = Url::parse("https://sub.example.com").unwrap();
350 /// if let Host::Hostname(hostname) = url.host() {
351 /// assert_eq!(hostname.subdomain(), Some("sub"));
352 /// }
353 ///
354 /// // Multi-level subdomain
355 /// let url = Url::parse("https://a.b.example.com").unwrap();
356 /// if let Host::Hostname(hostname) = url.host() {
357 /// assert_eq!(hostname.subdomain(), Some("a.b"));
358 /// }
359 ///
360 /// // No subdomain
361 /// let url = Url::parse("https://example.com").unwrap();
362 /// if let Host::Hostname(hostname) = url.host() {
363 /// assert_eq!(hostname.subdomain(), None);
364 /// }
365 /// ```
366 #[inline(always)]
367 pub fn subdomain(&self) -> Option<&str> {
368 self.subdomain.as_ref().map(|p| p.as_ref())
369 }
370}
371
372/// Represents the host component of a URL, which can be either a hostname or an IP address.
373#[derive(Debug)]
374pub enum Host<'url> {
375 /// A hostname (domain name).
376 Hostname(Hostname<'url>),
377 /// An IP address (either IPv4 or IPv6).
378 Ip(IpAddr),
379}
380
381impl fmt::Display for Host<'_> {
382 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
383 match self {
384 Host::Hostname(hostname) => write!(f, "{}", hostname.full_name()),
385 Host::Ip(ip) => write!(f, "{ip}"),
386 }
387 }
388}
389
390impl<'url> Host<'url> {
391 fn into_owned<'owned>(self) -> Host<'owned> {
392 match self {
393 Host::Hostname(h) => Host::Hostname(h.into_owned()),
394 Host::Ip(ip) => Host::Ip(ip),
395 }
396 }
397
398 /// Returns the hostname component if this is a `Host::Hostname` variant.
399 ///
400 /// # Returns
401 ///
402 /// * `Option<&Hostname>` - The hostname, or `None` if this is an IP address.
403 pub fn as_hostname(&self) -> Option<&Hostname<'_>> {
404 match self {
405 Host::Hostname(h) => Some(h),
406 _ => None,
407 }
408 }
409}
410
411/// Represents user information (username and password) in a URL.
412///
413/// This struct stores the credentials that may be present in a URL's authority component.
414/// It supports both ASCII and UTF-8 characters in usernames and passwords.
415///
416/// # Examples
417///
418/// ```
419/// use faup_rs::{Url, UserInfo};
420///
421/// // Parse a URL with user info
422/// let url = Url::parse("https://user:pass@example.com").unwrap();
423/// let user_info = url.userinfo().unwrap();
424///
425/// // Access username and password
426/// assert_eq!(user_info.username(), "user");
427/// assert_eq!(user_info.password(), Some("pass"));
428///
429/// // Parse a URL with only username
430/// let url = Url::parse("https://user@example.com").unwrap();
431/// let user_info = url.userinfo().unwrap();
432/// assert_eq!(user_info.username(), "user");
433/// assert_eq!(user_info.password(), None);
434///
435/// // Parse a URL with UTF-8 user info
436/// let url = Url::parse("https://用户:密码@example.com").unwrap();
437/// let user_info = url.userinfo().unwrap();
438/// assert_eq!(user_info.username(), "用户");
439/// assert_eq!(user_info.password(), Some("密码"));
440/// ```
441#[derive(Debug)]
442pub struct UserInfo<'url> {
443 username: Cow<'url, str>,
444 password: Option<Cow<'url, str>>,
445}
446
447impl<'url> UserInfo<'url> {
448 #[inline]
449 fn into_owned<'owned>(self) -> UserInfo<'owned> {
450 UserInfo {
451 username: Cow::Owned(self.username.into_owned()),
452 password: self.password.map(|p| Cow::Owned(p.into_owned())),
453 }
454 }
455
456 #[inline(always)]
457 fn from_pair(pair: Pair<'url, Rule>) -> Self {
458 let mut username = None;
459 let mut password = None;
460 for p in pair.into_inner() {
461 match p.as_rule() {
462 Rule::username => username = Some(Cow::Borrowed(p.as_str())),
463 Rule::password => password = Some(Cow::Borrowed(p.as_str())),
464 _ => {}
465 }
466 }
467 Self {
468 username: username.expect("username is guaranteed by parser"),
469 password,
470 }
471 }
472}
473
474impl UserInfo<'_> {
475 /// Returns the username component of the user information.
476 ///
477 /// # Returns
478 ///
479 /// * `&str` - The username.
480 ///
481 /// # Examples
482 ///
483 /// ```
484 /// use faup_rs::Url;
485 ///
486 /// let url = Url::parse("https://user@example.com").unwrap();
487 /// assert_eq!(url.userinfo().unwrap().username(), "user");
488 ///
489 /// // UTF-8 username
490 /// let url = Url::parse("https://用户@example.com").unwrap();
491 /// assert_eq!(url.userinfo().unwrap().username(), "用户");
492 /// ```
493 #[inline(always)]
494 pub fn username(&self) -> &str {
495 &self.username
496 }
497
498 /// Returns the password component of the user information, if present.
499 ///
500 /// # Returns
501 ///
502 /// * `Option<&str>` - The password, or `None` if not present.
503 ///
504 /// # Examples
505 ///
506 /// ```
507 /// use faup_rs::Url;
508 ///
509 /// // With password
510 /// let url = Url::parse("https://user:pass@example.com").unwrap();
511 /// assert_eq!(url.userinfo().unwrap().password(), Some("pass"));
512 ///
513 /// // Without password
514 /// let url = Url::parse("https://user@example.com").unwrap();
515 /// assert_eq!(url.userinfo().unwrap().password(), None);
516 ///
517 /// // UTF-8 password
518 /// let url = Url::parse("https://user:密码@example.com").unwrap();
519 /// assert_eq!(url.userinfo().unwrap().password(), Some("密码"));
520 /// ```
521 #[inline(always)]
522 pub fn password(&self) -> Option<&str> {
523 self.password.as_ref().map(|p| p.as_ref())
524 }
525}
526
527/// A parsed URL with support for hostnames, IPv4/IPv6 addresses, userinfo, ports, paths, queries, and fragments.
528///
529/// This struct represents a URL parsed from a string, with all components accessible individually.
530/// It supports both ASCII and UTF-8 characters in all components, and properly handles subdomains,
531/// custom TLDs, and internationalized domain names (IDNs).
532///
533/// # Examples
534///
535/// ```
536/// use faup_rs::Url;
537///
538/// // Parse a simple URL
539/// let url = Url::parse("https://example.com").unwrap();
540/// assert_eq!(url.scheme(), "https");
541/// assert_eq!(url.host().as_hostname().unwrap().full_name(), "example.com");
542///
543/// // Parse a URL with all components
544/// let url = Url::parse("https://user:pass@sub.example.com:8080/path?query=value#fragment").unwrap();
545/// assert_eq!(url.scheme(), "https");
546/// assert_eq!(url.userinfo().unwrap().username(), "user");
547/// assert_eq!(url.port(), Some(8080));
548/// assert_eq!(url.path(), Some("/path"));
549/// assert_eq!(url.query(), Some("query=value"));
550/// assert_eq!(url.fragment(), Some("fragment"));
551/// ```
552#[derive(Debug)]
553pub struct Url<'url> {
554 orig: Cow<'url, str>,
555 scheme: Cow<'url, str>,
556 userinfo: Option<UserInfo<'url>>,
557 host: Host<'url>,
558 port: Option<u16>,
559 path: Option<Cow<'url, str>>,
560 query: Option<Cow<'url, str>>,
561 fragment: Option<Cow<'url, str>>,
562}
563
564impl fmt::Display for Url<'_> {
565 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
566 write!(f, "{}", self.as_str())
567 }
568}
569
570impl<'url> Url<'url> {
571 fn from_pair(pair: Pair<'url, Rule>) -> Result<Self, Error> {
572 let orig = Cow::Borrowed(pair.as_str());
573 let mut scheme = None;
574 let mut userinfo = None;
575 let mut host = None;
576 let mut port = None;
577 let mut path = None;
578 let mut query = None;
579 let mut fragment = None;
580
581 for p in pair.into_inner() {
582 match p.as_rule() {
583 Rule::scheme => {
584 scheme = Some(Cow::Borrowed(p.as_str()));
585 }
586 Rule::userinfo => userinfo = Some(UserInfo::from_pair(p)),
587 Rule::host => {
588 // cannot panic guarantee by parser
589 let host_pair = p.into_inner().next().unwrap();
590 match host_pair.as_rule() {
591 Rule::hostname => {
592 host = Some(Host::Hostname(Hostname::from_str(host_pair.as_str())))
593 }
594 Rule::ipv4 => {
595 host = Some(
596 Ipv4Addr::from_str(host_pair.as_str())
597 .map(IpAddr::from)
598 .map(Host::Ip)
599 .map_err(|_| Error::InvalidIPv4)?,
600 );
601 }
602
603 Rule::ipv6 => {
604 host = Some(
605 Ipv6Addr::from_str(
606 host_pair.as_str().trim_matches(|c| c == '[' || c == ']'),
607 )
608 .map(IpAddr::from)
609 .map(Host::Ip)
610 .map_err(|_| Error::InvalidIPv6)?,
611 );
612 }
613 _ => {}
614 }
615 }
616 Rule::port => {
617 port = Some(u16::from_str(p.as_str()).map_err(|_| Error::InvalidPort)?)
618 }
619 Rule::path => {
620 path = Some(Cow::Borrowed(p.as_str()));
621 }
622
623 Rule::query => {
624 query = Some(Cow::Borrowed(&p.as_str()[1..]));
625 }
626
627 Rule::fragment => {
628 fragment = Some(Cow::Borrowed(&p.as_str()[1..]));
629 }
630 _ => {}
631 }
632 }
633
634 Ok(Url {
635 orig,
636 scheme: scheme.unwrap(),
637 userinfo,
638 host: host.unwrap(),
639 port,
640 path,
641 query,
642 fragment,
643 })
644 }
645
646 /// Converts this borrowed `Url` into an owned `Url`.
647 ///
648 /// This is useful when you need to store the `Url` for longer than the lifetime of the input string.
649 ///
650 /// # Performance
651 ///
652 /// When using this method strings will be cloned.
653 ///
654 /// # Returns
655 ///
656 /// * `Url<'owned>` - An owned version of the URL.
657 ///
658 /// # Examples
659 ///
660 /// ```
661 /// use faup_rs::Url;
662 ///
663 /// let url = Url::parse("https://example.com").unwrap();
664 /// let owned_url = url.into_owned();
665 /// ```
666 pub fn into_owned<'owned>(self) -> Url<'owned> {
667 Url {
668 orig: Cow::Owned(self.orig.into_owned()),
669 scheme: Cow::Owned(self.scheme.into_owned()),
670 userinfo: self.userinfo.map(|u| u.into_owned()),
671 host: self.host.into_owned(),
672 port: self.port,
673 path: self.path.map(|p| Cow::Owned(p.into_owned())),
674 query: self.query.map(|q| Cow::Owned(q.into_owned())),
675 fragment: self.fragment.map(|f| Cow::Owned(f.into_owned())),
676 }
677 }
678
679 /// Creates a new `Url` by parsing a string slice.
680 ///
681 /// # Arguments
682 ///
683 /// * `s` - A string slice containing the URL to parse.
684 ///
685 /// # Returns
686 ///
687 /// * `Result<Url, Error>` - A parsed `Url` if successful, or an `Error` if parsing fails.
688 ///
689 /// # Examples
690 ///
691 /// ```
692 /// use faup_rs::Url;
693 ///
694 /// let url = Url::parse("https://example.com").unwrap();
695 /// assert_eq!(url.scheme(), "https");
696 /// assert_eq!(url.domain(), Some("example.com"));
697 /// assert_eq!(url.suffix(), Some("com"));
698 /// ```
699 pub fn parse(s: &'url str) -> Result<Self, Error> {
700 let mut pairs = UrlParser::parse(Rule::url, s).map_err(Box::new)?;
701 Self::from_pair(pairs.next().unwrap())
702 }
703
704 /// Returns the original URL string.
705 ///
706 /// # Returns
707 ///
708 /// * `&str` - The original URL string.
709 ///
710 /// # Examples
711 ///
712 /// ```
713 /// use faup_rs::Url;
714 ///
715 /// let url = Url::parse("https://example.com").unwrap();
716 /// assert_eq!(url.as_str(), "https://example.com");
717 /// ```
718 #[inline(always)]
719 pub fn as_str(&self) -> &str {
720 &self.orig
721 }
722
723 /// Returns the scheme of the URL.
724 ///
725 /// # Returns
726 ///
727 /// * `&str` - The URL scheme (e.g., "http", "https").
728 ///
729 /// # Examples
730 ///
731 /// ```
732 /// use faup_rs::Url;
733 ///
734 /// let url = Url::parse("https://example.com").unwrap();
735 /// assert_eq!(url.scheme(), "https");
736 /// ```
737 #[inline(always)]
738 pub fn scheme(&self) -> &str {
739 &self.scheme
740 }
741
742 /// Returns the user information component of the URL, if present.
743 ///
744 /// # Returns
745 ///
746 /// * `Option<&UserInfo>` - The user information, or `None` if not present.
747 ///
748 /// # Examples
749 ///
750 /// ```
751 /// use faup_rs::Url;
752 ///
753 /// let url = Url::parse("https://user:pass@example.com").unwrap();
754 /// assert_eq!(url.userinfo().unwrap().username(), "user");
755 /// assert_eq!(url.userinfo().unwrap().password(), Some("pass"));
756 /// ```
757 #[inline(always)]
758 pub fn userinfo(&self) -> Option<&UserInfo<'_>> {
759 self.userinfo.as_ref()
760 }
761
762 /// Returns the host component of the URL.
763 ///
764 /// # Returns
765 ///
766 /// * `&Host` - The host, which can be either a hostname or an IP address.
767 ///
768 /// # Examples
769 ///
770 /// ```
771 /// use faup_rs::Url;
772 ///
773 /// let url = Url::parse("https://sub2.sub1.example.com").unwrap();
774 /// let hostname = url.host().as_hostname().unwrap();
775 /// assert_eq!(hostname.full_name(), "sub2.sub1.example.com");
776 /// assert_eq!(hostname.domain(), Some("example.com"));
777 /// assert_eq!(hostname.suffix(), Some("com"));
778 /// assert_eq!(hostname.subdomain(), Some("sub2.sub1"));
779 /// ```
780 #[inline(always)]
781 pub fn host(&self) -> &Host<'_> {
782 &self.host
783 }
784
785 /// Returns the domain part of the hostname, if present.
786 ///
787 /// This is a convenience method that directly accesses the domain component
788 /// of the hostname, if the host is a hostname (not an IP address).
789 ///
790 /// # Returns
791 ///
792 /// * `Option<&str>` - The domain part of the hostname, or `None` if:
793 /// - The host is an IP address
794 /// - The hostname doesn't have a recognized domain
795 ///
796 /// # Examples
797 ///
798 /// ```
799 /// use faup_rs::Url;
800 ///
801 /// // With a domain name
802 /// let url = Url::parse("https://sub.example.com").unwrap();
803 /// assert_eq!(url.domain(), Some("example.com"));
804 ///
805 /// // With an IP address
806 /// let url = Url::parse("https://127.0.0.1").unwrap();
807 /// assert_eq!(url.domain(), None);
808 /// ```
809 #[inline(always)]
810 pub fn domain(&self) -> Option<&str> {
811 self.host.as_hostname().and_then(|h| h.domain())
812 }
813
814 /// Returns the subdomain part of the hostname, if present.
815 ///
816 /// This is a convenience method that directly accesses the subdomain component
817 /// of the hostname, if the host is a hostname (not an IP address).
818 ///
819 /// # Returns
820 ///
821 /// * `Option<&str>` - The subdomain part of the hostname, or `None` if:
822 /// - The host is an IP address
823 /// - The hostname doesn't have a subdomain
824 ///
825 /// # Examples
826 ///
827 /// ```
828 /// use faup_rs::Url;
829 ///
830 /// // With a subdomain
831 /// let url = Url::parse("https://sub.example.com").unwrap();
832 /// assert_eq!(url.subdomain(), Some("sub"));
833 ///
834 /// // Without a subdomain
835 /// let url = Url::parse("https://example.com").unwrap();
836 /// assert_eq!(url.subdomain(), None);
837 ///
838 /// // With an IP address
839 /// let url = Url::parse("https://127.0.0.1").unwrap();
840 /// assert_eq!(url.subdomain(), None);
841 /// ```
842 #[inline(always)]
843 pub fn subdomain(&self) -> Option<&str> {
844 self.host.as_hostname().and_then(|h| h.subdomain())
845 }
846
847 /// Returns the suffix (top-level domain) of the hostname, if present.
848 ///
849 /// This is a convenience method that directly accesses the suffix component
850 /// of the hostname, if the host is a hostname (not an IP address).
851 ///
852 /// # Returns
853 ///
854 /// * `Option<&str>` - The suffix (TLD) of the hostname, or `None` if:
855 /// - The host is an IP address
856 /// - The hostname doesn't have a recognized suffix
857 ///
858 /// # Examples
859 ///
860 /// ```
861 /// use faup_rs::Url;
862 ///
863 /// // With a standard TLD
864 /// let url = Url::parse("https://example.com").unwrap();
865 /// assert_eq!(url.suffix(), Some("com"));
866 ///
867 /// // With a custom TLD
868 /// let url = Url::parse("http://example.b32.i2p").unwrap();
869 /// assert_eq!(url.suffix(), Some("b32.i2p"));
870 ///
871 /// // With an IP address
872 /// let url = Url::parse("https://127.0.0.1").unwrap();
873 /// assert_eq!(url.suffix(), None);
874 /// ```
875 #[inline(always)]
876 pub fn suffix(&self) -> Option<&str> {
877 self.host.as_hostname().and_then(|h| h.suffix())
878 }
879
880 /// Returns the port number of the URL, if present.
881 ///
882 /// # Returns
883 ///
884 /// * `Option<u16>` - The port number, or `None` if not specified.
885 ///
886 /// # Examples
887 ///
888 /// ```
889 /// use faup_rs::Url;
890 ///
891 /// let url = Url::parse("https://example.com:8080").unwrap();
892 /// assert_eq!(url.port(), Some(8080));
893 /// ```
894 #[inline(always)]
895 pub fn port(&self) -> Option<u16> {
896 self.port
897 }
898
899 /// Returns the path component of the URL, if present.
900 ///
901 /// # Returns
902 ///
903 /// * `Option<&str>` - The path, or `None` if not present.
904 ///
905 /// # Examples
906 ///
907 /// ```
908 /// use faup_rs::Url;
909 ///
910 /// let url = Url::parse("https://example.com/path").unwrap();
911 /// assert_eq!(url.path(), Some("/path"));
912 /// ```
913 #[inline(always)]
914 pub fn path(&self) -> Option<&str> {
915 self.path.as_ref().map(|p| p.as_ref())
916 }
917
918 /// Returns the query component of the URL, if present.
919 ///
920 /// # Returns
921 ///
922 /// * `Option<&str>` - The query string, or `None` if not present.
923 ///
924 /// # Examples
925 ///
926 /// ```
927 /// use faup_rs::Url;
928 ///
929 /// let url = Url::parse("https://example.com?query=value").unwrap();
930 /// assert_eq!(url.query(), Some("query=value"));
931 /// ```
932 #[inline(always)]
933 pub fn query(&self) -> Option<&str> {
934 self.query.as_ref().map(|p| p.as_ref())
935 }
936
937 /// Returns the fragment component of the URL, if present.
938 ///
939 /// # Returns
940 ///
941 /// * `Option<&str>` - The fragment, or `None` if not present.
942 ///
943 /// # Examples
944 ///
945 /// ```
946 /// use faup_rs::Url;
947 ///
948 /// let url = Url::parse("https://example.com#fragment").unwrap();
949 /// assert_eq!(url.fragment(), Some("fragment"));
950 /// ```
951 #[inline(always)]
952 pub fn fragment(&self) -> Option<&str> {
953 self.fragment.as_ref().map(|p| p.as_ref())
954 }
955}
956
957#[cfg(test)]
958mod tests {
959 use super::*;
960 use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
961
962 /// Test basic URL parsing with various real-world examples
963 #[test]
964 fn test_real_world_examples() {
965 let test_urls = [
966 "https://www.example.co.uk",
967 "http://sub.domain.example.com/path/to/page",
968 "ftp://files.example.org/downloads/archive.zip",
969 "https://www.example.com/search?q=rust+programming&page=1",
970 "http://api.example.net/data?user=123&sort=desc",
971 "https://docs.example.com/guide#installation",
972 "http://example.com/page#section-1",
973 "https://example.com/path%20with%20spaces",
974 "http://localhost:3000/api/v1",
975 "http://toaster.dyrøy.no",
976 "http://full.custom-tld.test.b32.i2p",
977 "https://alex:adore-la-quiche@avec-des-œufs.be#et-des-lardons",
978 "https://%40lex:adore:la:quiche@%61vec-des-œufs.be/../../..some/directory/traversal/../#et-des-lardons",
979 ];
980
981 for url in test_urls {
982 println!("Testing: {url}");
983 let _ = Url::parse(url)
984 .inspect_err(|e| println!("Error parsing '{url}': {e}"))
985 .unwrap();
986 }
987 }
988
989 /// Test minimal URL components
990 #[test]
991 fn test_minimal_url() {
992 let url = Url::parse("https://example.com").unwrap();
993 assert_eq!(url.scheme(), "https");
994 assert_eq!(url.host().to_string(), "example.com");
995 assert_eq!(url.port(), None);
996 assert_eq!(url.path(), None);
997 assert_eq!(url.query(), None);
998 assert_eq!(url.fragment(), None);
999 assert!(url.userinfo().is_none());
1000
1001 let hn = url.host().as_hostname().unwrap();
1002 assert_eq!(hn.full_name(), "example.com");
1003 assert_eq!(hn.suffix(), Some("com"));
1004 assert_eq!(hn.domain(), Some("example.com"));
1005 assert_eq!(hn.subdomain(), None);
1006 }
1007
1008 /// Test URLs with user information
1009 #[test]
1010 fn test_user_info() {
1011 // With both username and password
1012 let url = Url::parse("https://user:pass@example.com").unwrap();
1013 assert_eq!(url.scheme(), "https");
1014 assert_eq!(url.host().to_string(), "example.com");
1015 let userinfo = url.userinfo().unwrap();
1016 assert_eq!(userinfo.username(), "user");
1017 assert_eq!(userinfo.password(), Some("pass"));
1018
1019 // With only username
1020 let url = Url::parse("ftp://user@example.com").unwrap();
1021 assert_eq!(url.scheme(), "ftp");
1022 let userinfo = url.userinfo().unwrap();
1023 assert_eq!(userinfo.username(), "user");
1024 assert_eq!(userinfo.password(), None);
1025
1026 // With UTF-8 user info
1027 let url = Url::parse("https://用户:密码@example.com").unwrap();
1028 let userinfo = url.userinfo().unwrap();
1029 assert_eq!(userinfo.username(), "用户");
1030 assert_eq!(userinfo.password(), Some("密码"));
1031 }
1032
1033 /// Test URLs with ports
1034 #[test]
1035 fn test_ports() {
1036 // With standard port
1037 let url = Url::parse("http://example.com:80").unwrap();
1038 assert_eq!(url.port(), Some(80));
1039
1040 // With custom port
1041 let url = Url::parse("http://example.com:8080").unwrap();
1042 assert_eq!(url.port(), Some(8080));
1043
1044 // Invalid port
1045 let err = Url::parse("http://example.com:99999").unwrap_err();
1046 assert!(matches!(err, Error::InvalidPort));
1047 }
1048
1049 /// Test URLs with paths
1050 #[test]
1051 fn test_paths() {
1052 // Simple path
1053 let url = Url::parse("https://example.com/path/to/resource").unwrap();
1054 assert_eq!(url.path(), Some("/path/to/resource"));
1055
1056 // Complex path
1057 let url = Url::parse("http://example.com/a/b/c.html").unwrap();
1058 assert_eq!(url.path(), Some("/a/b/c.html"));
1059
1060 // UTF-8 path
1061 let url = Url::parse("https://example.com/路径/资源").unwrap();
1062 assert_eq!(url.path(), Some("/路径/资源"));
1063
1064 // No path
1065 let url = Url::parse("https://example.com").unwrap();
1066 assert_eq!(url.path(), None);
1067 }
1068
1069 /// Test URLs with queries
1070 #[test]
1071 fn test_queries() {
1072 // Simple query
1073 let url = Url::parse("https://example.com?key=value").unwrap();
1074 assert_eq!(url.query(), Some("key=value"));
1075
1076 // UTF-8 query
1077 let url = Url::parse("https://example.com?查询=值").unwrap();
1078 assert_eq!(url.query(), Some("查询=值"));
1079
1080 // No query
1081 let url = Url::parse("https://example.com").unwrap();
1082 assert_eq!(url.query(), None);
1083 }
1084
1085 /// Test URLs with fragments
1086 #[test]
1087 fn test_fragments() {
1088 // Simple fragment
1089 let url = Url::parse("https://example.com#section1").unwrap();
1090 assert_eq!(url.fragment(), Some("section1"));
1091
1092 // UTF-8 fragment
1093 let url = Url::parse("https://example.com#片段").unwrap();
1094 assert_eq!(url.fragment(), Some("片段"));
1095
1096 // No fragment
1097 let url = Url::parse("https://example.com").unwrap();
1098 assert_eq!(url.fragment(), None);
1099 }
1100
1101 /// Test URLs with all components
1102 #[test]
1103 fn test_all_components() {
1104 let url = Url::parse(
1105 "https://user:pass@sub.example.com:8080/path/to/resource?key=value#section1",
1106 )
1107 .unwrap();
1108
1109 assert_eq!(url.scheme(), "https");
1110 let userinfo = url.userinfo().unwrap();
1111 assert_eq!(userinfo.username(), "user");
1112 assert_eq!(userinfo.password(), Some("pass"));
1113 assert_eq!(url.host().to_string(), "sub.example.com");
1114 assert_eq!(url.port(), Some(8080));
1115 assert_eq!(url.path(), Some("/path/to/resource"));
1116 assert_eq!(url.query(), Some("key=value"));
1117 assert_eq!(url.fragment(), Some("section1"));
1118 }
1119
1120 /// Test hostname parsing
1121 #[test]
1122 fn test_hostnames() {
1123 // Basic hostname
1124 let url = Url::parse("https://example.com").unwrap();
1125 let hn = url.host().as_hostname().unwrap();
1126 assert_eq!(hn.full_name(), "example.com");
1127 assert_eq!(hn.suffix(), Some("com"));
1128 assert_eq!(hn.domain(), Some("example.com"));
1129 assert_eq!(hn.subdomain(), None);
1130
1131 // Single-level subdomain
1132 let url = Url::parse("https://sub.example.com").unwrap();
1133 let hn = url.host().as_hostname().unwrap();
1134 assert_eq!(hn.full_name(), "sub.example.com");
1135 assert_eq!(hn.suffix(), Some("com"));
1136 assert_eq!(hn.domain(), Some("example.com"));
1137 assert_eq!(hn.subdomain(), Some("sub"));
1138
1139 // Multi-level subdomain
1140 let url = Url::parse("https://a.b.example.com").unwrap();
1141 let hn = url.host().as_hostname().unwrap();
1142 assert_eq!(hn.full_name(), "a.b.example.com");
1143 assert_eq!(hn.suffix(), Some("com"));
1144 assert_eq!(hn.domain(), Some("example.com"));
1145 assert_eq!(hn.subdomain(), Some("a.b"));
1146
1147 // Complex subdomain with all components
1148 let url = Url::parse(
1149 "https://user:pass@sub1.sub2.example.com:8080/path/to/resource?key=value#section1",
1150 )
1151 .unwrap();
1152 let hn = url.host().as_hostname().unwrap();
1153 assert_eq!(hn.full_name(), "sub1.sub2.example.com");
1154 assert_eq!(hn.suffix(), Some("com"));
1155 assert_eq!(hn.domain(), Some("example.com"));
1156 assert_eq!(hn.subdomain(), Some("sub1.sub2"));
1157
1158 // Custom TLD
1159 let url = Url::parse("http://example.b32.i2p").unwrap();
1160 let hn = url.host().as_hostname().unwrap();
1161 assert_eq!(hn.full_name(), "example.b32.i2p");
1162 assert_eq!(hn.suffix(), Some("b32.i2p"));
1163 assert_eq!(hn.domain(), Some("example.b32.i2p"));
1164 assert_eq!(hn.subdomain(), None);
1165
1166 // UTF-8 hostname
1167 let url = Url::parse("https://例子.测试").unwrap();
1168 let hn = url.host().as_hostname().unwrap();
1169 assert_eq!(hn.full_name(), "例子.测试");
1170 assert_eq!(hn.suffix(), Some("测试"));
1171 assert_eq!(hn.domain(), Some("例子.测试"));
1172 assert_eq!(hn.subdomain(), None);
1173
1174 // UTF-8 subdomain
1175 let url = Url::parse("https://子域.例子.测试").unwrap();
1176 let hn = url.host().as_hostname().unwrap();
1177 assert_eq!(hn.full_name(), "子域.例子.测试");
1178 assert_eq!(hn.suffix(), Some("测试"));
1179 assert_eq!(hn.domain(), Some("例子.测试"));
1180 assert_eq!(hn.subdomain(), Some("子域"));
1181 }
1182
1183 /// Test IP address hosts
1184 #[test]
1185 fn test_ip_hosts() {
1186 // IPv4
1187 let url = Url::parse("http://127.0.0.1").unwrap();
1188 match url.host() {
1189 Host::Ip(IpAddr::V4(ip)) => assert_eq!(ip, &Ipv4Addr::new(127, 0, 0, 1)),
1190 _ => panic!("Expected IPv4 address"),
1191 }
1192
1193 // IPv6
1194 let url = Url::parse("http://[::1]").unwrap();
1195 match url.host() {
1196 Host::Ip(IpAddr::V6(ip)) => assert_eq!(ip, &Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1)),
1197 _ => panic!("Expected IPv6 address"),
1198 }
1199
1200 // Invalid IPv4
1201 let err = Url::parse("http://999.999.999.999").unwrap_err();
1202 assert!(matches!(err, Error::InvalidIPv4));
1203
1204 // Invalid IPv6
1205 let err = Url::parse("http://[::::]").unwrap_err();
1206 assert!(matches!(err, Error::InvalidIPv6));
1207 }
1208
1209 /// Test edge cases
1210 #[test]
1211 fn test_edge_cases() {
1212 // Empty path
1213 let url = Url::parse("https://example.com/").unwrap();
1214 assert_eq!(url.path(), Some("/"));
1215
1216 // Empty query
1217 let url = Url::parse("https://example.com?").unwrap();
1218 assert_eq!(url.query(), Some(""));
1219
1220 // Empty fragment
1221 let url = Url::parse("https://example.com#").unwrap();
1222 assert_eq!(url.fragment(), Some(""));
1223
1224 // No subdomain
1225 let url = Url::parse("https://example.com").unwrap();
1226 let hn = url.host().as_hostname().unwrap();
1227 assert_eq!(hn.subdomain(), None);
1228 }
1229
1230 /// Test URLs with special characters
1231 #[test]
1232 fn test_special_characters() {
1233 // URL-encoded characters
1234 let url =
1235 Url::parse("https://%40lex:adore:la:quiche@%61vec-des-œufs.be#et-des-lardons").unwrap();
1236 assert_eq!(url.host().to_string(), "%61vec-des-œufs.be");
1237 let userinfo = url.userinfo().unwrap();
1238 assert_eq!(userinfo.username(), "%40lex");
1239 assert_eq!(userinfo.password(), Some("adore:la:quiche"));
1240 assert_eq!(url.fragment(), Some("et-des-lardons"));
1241
1242 // Path traversal
1243 let url = Url::parse("https://example.com/../../..some/directory/traversal/../").unwrap();
1244 assert_eq!(url.path(), Some("/../../..some/directory/traversal/../"));
1245 }
1246}