url_parse/core/
domain.rs

1use crate::core::global::Domain;
2use crate::core::Parser;
3use crate::utils::Utils;
4use regex::Regex;
5
6impl Parser {
7    /// Extract the domain fields from the url.
8    ///
9    /// # Example
10    /// ```rust
11    /// use url_parse::core::Parser;
12    /// use url_parse::core::global::Domain;
13    /// let input = "https://www.example.com:443/blog/article/search?docid=720&hl=en#dayone";
14    /// let expected = Domain {
15    ///     subdomain: Some("www"),
16    ///     domain: Some("example"),
17    ///     top_level_domain: Some("com"),
18    /// };
19    /// let result = Parser::new(None).domain(input);
20    /// assert_eq!(result, expected);
21    /// ```
22    pub fn domain<'a>(&self, input: &'a str) -> Domain<'a> {
23        let input = Utils::substring_after_login(self, input);
24        let input = Utils::substring_before_port(self, input);
25        let input = match input.find('/') {
26            Some(pos) => &input[..pos],
27            None => input,
28        };
29        return self
30            .domain_ipv4(input)
31            .or_else(|| self.subdomain_domain_top_level_domain(input))
32            .or_else(|| self.subdomain_domain(input))
33            .or_else(|| self.domain_alias(input))
34            .unwrap_or_else(Domain::empty);
35    }
36
37    /// Mixes out the subdomain.domain part (i.e.: google.com -> subdomain(None), domain(google), top_level_domain(com))
38    fn subdomain_domain<'a>(&self, input: &'a str) -> Option<Domain<'a>> {
39        let re = Regex::new(r"(.*?)\.(.*)").unwrap();
40        let caps = re.captures(input);
41
42        caps.as_ref()?;
43
44        let caps = caps.unwrap();
45        return Some(Domain {
46            subdomain: None,
47            domain: Some(caps.get(1).unwrap().as_str()),
48            top_level_domain: Some(caps.get(2).unwrap().as_str()),
49        });
50    }
51
52    /// Mixes out the subdomain.domain.top_level_domain part (i.e.: www.google.com -> subdomain(www), domain(google), top_level_domain(com))
53    fn subdomain_domain_top_level_domain<'a>(&self, input: &'a str) -> Option<Domain<'a>> {
54        let re = Regex::new(r"(.*?)\.(.*)\.(.*)").unwrap();
55        let caps = re.captures(input);
56
57        caps.as_ref()?;
58
59        let caps = caps.unwrap();
60        return Some(Domain {
61            subdomain: Some(caps.get(1).unwrap().as_str()),
62            domain: Some(caps.get(2).unwrap().as_str()),
63            top_level_domain: Some(caps.get(3).unwrap().as_str()),
64        });
65    }
66
67    /// Mixes out the ip v4 into a Domain structure.
68    fn domain_ipv4<'a>(&self, input: &'a str) -> Option<Domain<'a>> {
69        let re = Regex::new(r"([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)").unwrap();
70        let caps = re.captures(input);
71        caps.as_ref()?;
72        return Some(Domain {
73            subdomain: None,
74            domain: Some(caps.unwrap().get(0).unwrap().as_str()),
75            top_level_domain: None,
76        });
77    }
78
79    /// Mixes out single-word alias (i.e.: "localhost") into a Domain structure.
80    fn domain_alias<'a>(&self, input: &'a str) -> Option<Domain<'a>> {
81        let re = Regex::new(r".+").unwrap();
82        let caps = re.captures(input);
83        caps.as_ref()?;
84        return Some(Domain {
85            subdomain: None,
86            domain: Some(caps.unwrap().get(0).unwrap().as_str()),
87            top_level_domain: None,
88        });
89    }
90}
91
92#[cfg(test)]
93mod tests {
94    use super::*;
95
96    #[test]
97    fn test_domain_ipv4_when_typical() {
98        let input = "https://192.168.178.242/dir";
99        let expected = Domain {
100            subdomain: None,
101            domain: Some("192.168.178.242"),
102            top_level_domain: None,
103        };
104        let result = Parser::new(None).domain_ipv4(input).unwrap();
105
106        assert_eq!(result, expected);
107    }
108
109    #[test]
110    fn test_domain_ipv4_when_port() {
111        let input = "https://1.2.3.4:443/blog/article/search?docid=720&hl=en#dayone";
112        let expected = Domain {
113            subdomain: None,
114            domain: Some("1.2.3.4"),
115            top_level_domain: None,
116        };
117        let result = Parser::new(None).domain_ipv4(input).unwrap();
118
119        assert_eq!(result, expected);
120    }
121
122    #[test]
123    fn test_domain_works_when_typical() {
124        let input = "https://www.example.com:443/blog/article/search?docid=720&hl=en#dayone";
125        let expected = Domain {
126            subdomain: Some("www"),
127            domain: Some("example"),
128            top_level_domain: Some("com"),
129        };
130        let result = Parser::new(None).domain(input);
131        assert_eq!(result, expected);
132    }
133
134    #[test]
135    fn test_domain_works_when_no_subdomain() {
136        let input = "https://example.com:443/blog/article/search?docid=720&hl=en#dayone";
137        let expected = Domain {
138            subdomain: None,
139            domain: Some("example"),
140            top_level_domain: Some("com"),
141        };
142        let result = Parser::new(None).domain(input);
143        assert_eq!(result, expected);
144    }
145
146    #[test]
147    fn test_domain_works_when_typical_long_subdomain() {
148        let input = "https://www.example.co.uk:443/blog/article/search?docid=720&hl=en#dayone";
149        let expected = Domain {
150            subdomain: Some("www"),
151            domain: Some("example.co"),
152            top_level_domain: Some("uk"),
153        };
154        let result = Parser::new(None).domain(input);
155        assert_eq!(result, expected);
156    }
157
158    #[test]
159    fn test_domain_works_when_no_port() {
160        let input = "https://www.example.co.uk/blog/article/search?docid=720&hl=en#dayone";
161        let expected = Domain {
162            subdomain: Some("www"),
163            domain: Some("example.co"),
164            top_level_domain: Some("uk"),
165        };
166        let result = Parser::new(None).domain(input);
167        assert_eq!(result, expected);
168    }
169
170    #[test]
171    fn test_subdomain_domain_fails_when_garbage() {
172        let input = "foobar";
173        let expected = None;
174        let result = Parser::new(None).subdomain_domain(input);
175        assert_eq!(result, expected);
176    }
177
178    #[test]
179    fn test_domain_ipv4_fails_when_garbage() {
180        let input = "foobar";
181        let expected = None;
182        let result = Parser::new(None).subdomain_domain(input);
183        assert_eq!(result, expected);
184    }
185
186    #[test]
187    fn test_parse_works_when_localhost() {
188        let domain = Parser::new(None).domain("ssh://user@localhost:2223/file");
189        let result = domain.domain.unwrap();
190        assert_eq!(result, "localhost");
191    }
192
193    #[test]
194    fn test_parse_works_when_empty() {
195        let domain = Parser::new(None).domain("");
196        let result = domain.domain;
197        assert!(result.is_none());
198    }
199
200    #[test]
201    fn test_parse_works_when_localhost_ip() {
202        let domain = Parser::new(None)
203            .domain("ftp://127.0.0.1:21/subfolder/test_ftp_put_works_when_subfolder");
204        let expected = "127.0.0.1";
205        let result = domain.domain.unwrap();
206        assert_eq!(result, expected);
207    }
208}