1use std::borrow::Cow;
2
3#[inline]
4fn strip_special_schemes(url: &str) -> &str {
5 let url = url.strip_prefix("blob:").unwrap_or(url);
6 url.strip_prefix("filesystem:").unwrap_or(url)
7}
8
9#[inline]
12pub fn host_and_rest(url: &str) -> Option<(&str, &str)> {
13 let url = strip_special_schemes(url);
14
15 let host_start = if let Some(pos) = url.find("://") {
16 pos + 3
17 } else if url.starts_with("//") {
18 2
19 } else {
20 return None;
21 };
22
23 let mut rest_start = url.len();
25 if let Some(i) = url[host_start..].find('/') {
26 rest_start = host_start + i;
27 }
28 if let Some(i) = url[host_start..].find('?') {
29 rest_start = rest_start.min(host_start + i);
30 }
31 if let Some(i) = url[host_start..].find('#') {
32 rest_start = rest_start.min(host_start + i);
33 }
34
35 let authority = &url[host_start..rest_start];
36 if authority.is_empty() {
37 return None;
38 }
39
40 let authority = authority.rsplit('@').next().unwrap_or(authority);
42
43 if authority.as_bytes().first() == Some(&b'[') {
45 let close = authority.find(']')?;
46 let host = &authority[1..close];
47 return Some((host, &url[rest_start..]));
48 }
49
50 let host_end = authority.find(':').unwrap_or(authority.len());
52 let host = &authority[..host_end];
53 if host.is_empty() {
54 return None;
55 }
56
57 Some((host, &url[rest_start..]))
58}
59
60#[inline]
61fn eq_ignore_ascii_case(a: &str, b: &str) -> bool {
62 a.len() == b.len()
63 && a.as_bytes()
64 .iter()
65 .zip(b.as_bytes().iter())
66 .all(|(x, y)| x.to_ascii_lowercase() == y.to_ascii_lowercase())
67}
68
69#[inline]
70fn ends_with_ignore_ascii_case(hay: &str, suf: &str) -> bool {
71 if suf.len() > hay.len() {
72 return false;
73 }
74 let a = &hay.as_bytes()[hay.len() - suf.len()..];
75 let b = suf.as_bytes();
76 a.iter()
77 .zip(b.iter())
78 .all(|(x, y)| x.to_ascii_lowercase() == y.to_ascii_lowercase())
79}
80
81#[inline]
84pub fn host_is_subdomain_of(host: &str, base: &str) -> bool {
85 let host = host.trim_end_matches('.');
86 let base = base.trim_end_matches('.');
87
88 if base.is_empty() {
89 return false;
90 }
91
92 if eq_ignore_ascii_case(host, base) {
93 return true;
94 }
95
96 if host.len() <= base.len() {
97 return false;
98 }
99
100 let dot_pos = host.len() - base.len() - 1;
101 host.as_bytes().get(dot_pos) == Some(&b'.') && ends_with_ignore_ascii_case(host, base)
102}
103
104static COMMON_SUBDOMAIN_LABELS: phf::Set<&'static str> = phf::phf_set! {
106 "www","m","amp","api","cdn","static","assets","img","images","media","files",
107 "login","auth","sso","id","account","accounts",
108 "app","apps","dashboard","admin","portal","console",
109 "status","support","help","docs","blog",
110 "dev","staging","stage","test","qa","uat","beta","alpha","preview","demo","sandbox",
111 "uploads","download","storage","origin","edge","cache",
112 "mail","email","smtp","mx","webmail",
113 "graphql","rpc","ws",
114};
115
116#[inline]
117fn is_common_subdomain_label(lbl: &str) -> bool {
119 if lbl.is_empty() {
120 return false;
121 }
122 let lower = lbl.to_ascii_lowercase(); COMMON_SUBDOMAIN_LABELS.contains(lower.as_str())
124}
125
126#[inline]
127pub fn base_domain_from_url<'a>(main_url: &'a str) -> Option<&'a str> {
128 let (host, _) = host_and_rest(main_url)?;
129 Some(base_domain_from_host(host))
130}
131
132#[inline]
135pub fn rel_for_ignore_script<'a>(base_domain: &str, url: &'a str) -> Cow<'a, str> {
136 if url.starts_with('/') {
137 return Cow::Borrowed(url);
138 }
139
140 let base = base_domain.trim_end_matches('.');
141 if base.is_empty() {
142 return Cow::Borrowed(url);
143 }
144
145 if let Some((host, rest)) = host_and_rest(url) {
146 if host_is_subdomain_of(host, base) {
147 if rest.starts_with('/') {
149 return Cow::Borrowed(rest);
150 }
151 return Cow::Borrowed("/");
153 }
154 }
155
156 Cow::Borrowed(url)
157}
158
159#[inline]
160fn is_common_cc_sld(sld: &str) -> bool {
162 let s = sld.as_bytes();
163 match s.len() {
164 2 => matches!(
165 [s[0].to_ascii_lowercase(), s[1].to_ascii_lowercase()],
166 [b'c', b'o'] | [b'a', b'c'] | [b'g', b'o'] | [b'o', b'r'] | [b'n', b'e'] | [b'e', b'd'] | [b'g', b'r'] | [b'l', b'g'] | [b'a', b'd'] ),
176 3 => matches!(
177 [
178 s[0].to_ascii_lowercase(),
179 s[1].to_ascii_lowercase(),
180 s[2].to_ascii_lowercase()
181 ],
182 [b'c', b'o', b'm'] | [b'n', b'e', b't'] | [b'o', b'r', b'g'] | [b'g', b'o', b'v'] | [b'e', b'd', b'u'] | [b'm', b'i', b'l'] | [b'n', b'i', b'c'] | [b's', b'c', b'h'] | [b'g', b'o', b'b'] ),
194 4 => matches!(
195 [
196 s[0].to_ascii_lowercase(),
197 s[1].to_ascii_lowercase(),
198 s[2].to_ascii_lowercase(),
199 s[3].to_ascii_lowercase()
200 ],
201 [b'g', b'o', b'u', b'v'] ),
203 _ => false,
204 }
205}
206
207#[inline]
208pub fn base_domain_from_host(host: &str) -> &str {
215 let mut h = host.trim_end_matches('.');
216 if let Some(x) = h.strip_prefix("www.") {
217 h = x;
218 }
219 if let Some(x) = h.strip_prefix("m.") {
220 h = x;
221 }
222
223 let last_dot = match h.rfind('.') {
225 Some(p) => p,
226 None => return h,
227 };
228 let prev_dot = match h[..last_dot].rfind('.') {
229 Some(p) => p,
230 None => return h, };
232
233 let tld = &h[last_dot + 1..];
234 let sld = &h[prev_dot + 1..last_dot];
235
236 let mut base = &h[prev_dot + 1..]; if tld.len() == 2 && is_common_cc_sld(sld) {
239 if let Some(prev2_dot) = h[..prev_dot].rfind('.') {
240 base = &h[prev2_dot + 1..]; }
242 }
243
244 if h.len() > base.len() + 1 {
245 let base_start = h.len() - base.len();
246 let boundary = base_start - 1;
247 if h.as_bytes().get(boundary) == Some(&b'.') {
248 let left_part = &h[..boundary];
249 let (lbl_start, lbl) = match left_part.rfind('.') {
251 Some(p) => (p + 1, &left_part[p + 1..]),
252 None => (0, left_part),
253 };
254
255 if !lbl.is_empty() && !is_common_subdomain_label(lbl) {
256 return &h[lbl_start..];
258 }
259 }
260 }
261
262 base
263}
264
265#[cfg(test)]
266mod tests {
267 use super::*;
268
269 #[test]
270 fn test_domain_match_basic_and_subdomains() {
271 let base = "logrocket.com";
272
273 assert!(host_is_subdomain_of("logrocket.com", base));
274 assert!(host_is_subdomain_of("staging.logrocket.com", base));
275 assert!(host_is_subdomain_of("a.b.c.logrocket.com", base));
276
277 assert!(host_is_subdomain_of(
279 "StAgInG.LoGrOcKeT.CoM",
280 "LOGROCKET.COM"
281 ));
282 }
283
284 #[test]
285 fn test_domain_match_no_false_positives() {
286 let base = "logrocket.com";
287
288 assert!(!host_is_subdomain_of("evil-logrocket.com", base));
290 assert!(!host_is_subdomain_of("logrocket.com.evil.com", base));
291 assert!(!host_is_subdomain_of("staginglogrocket.com", base));
292 assert!(!host_is_subdomain_of("logrocket.co", base));
293 }
294
295 #[test]
296 fn test_host_and_rest_handles_userinfo_port_ipv6() {
297 let (h, rest) =
298 host_and_rest("https://user:pass@staging.logrocket.com:8443/a.js?x=1#y").unwrap();
299 assert_eq!(h, "staging.logrocket.com");
300 assert_eq!(rest, "/a.js?x=1#y");
301
302 let (h, rest) = host_and_rest("http://[::1]:8080/path").unwrap();
303 assert_eq!(h, "::1");
304 assert_eq!(rest, "/path");
305 }
306
307 #[test]
308 fn test_rel_for_ignore_script_logrocket_example() {
309 let base = "logrocket.com";
310
311 let main = "https://logrocket.com/careers";
312 assert_eq!(rel_for_ignore_script(base, main).as_ref(), "/careers");
313
314 let script = "https://staging.logrocket.com/LogRocket.min.js";
315 assert_eq!(
316 rel_for_ignore_script(base, script).as_ref(),
317 "/LogRocket.min.js"
318 );
319
320 let other = "https://cdn.other.com/app.js";
322 assert_eq!(rel_for_ignore_script(base, other).as_ref(), other);
323
324 assert_eq!(
326 rel_for_ignore_script(base, "/static/app.js").as_ref(),
327 "/static/app.js"
328 );
329 }
330
331 #[test]
332 fn test_rel_for_ignore_script_query_only_same_site() {
333 let base = "example.com";
334 let u = "https://sub.example.com?x=1";
335 assert_eq!(rel_for_ignore_script(base, u).as_ref(), "/");
336 }
337
338 #[test]
339 fn test_rel_for_ignore_script_special_schemes() {
340 let base = "example.com";
341 let u = "blob:https://example.com/path/to/blob";
342 assert_eq!(rel_for_ignore_script(base, u).as_ref(), "/path/to/blob");
343 }
344
345 #[test]
346 fn test_base_domain_tenant_subdomain() {
347 let base = base_domain_from_host("logrocket.chilipiper.com");
348 assert_eq!(base, "logrocket.chilipiper.com");
349
350 let u = "https://assets.logrocket.chilipiper.com/a.js";
352 assert_eq!(rel_for_ignore_script(base, u).as_ref(), "/a.js");
353
354 let other = "https://othertenant.chilipiper.com/a.js";
356 assert_eq!(rel_for_ignore_script(base, other).as_ref(), other);
357 }
358}