1use std::borrow::Cow;
2
3#[inline]
4fn strip_special_schemes(url: &str) -> &str {
5 let url = url.strip_prefix("blob:").unwrap_or(url);
6 url.strip_prefix("filesystem:").unwrap_or(url)
7}
8
9#[inline]
12pub fn host_and_rest(url: &str) -> Option<(&str, &str)> {
13 let url = strip_special_schemes(url);
14
15 let host_start = if let Some(pos) = url.find("://") {
16 pos + 3
17 } else if url.starts_with("//") {
18 2
19 } else {
20 return None;
21 };
22
23 let mut rest_start = url.len();
25 if let Some(i) = url[host_start..].find('/') {
26 rest_start = host_start + i;
27 }
28 if let Some(i) = url[host_start..].find('?') {
29 rest_start = rest_start.min(host_start + i);
30 }
31 if let Some(i) = url[host_start..].find('#') {
32 rest_start = rest_start.min(host_start + i);
33 }
34
35 let authority = &url[host_start..rest_start];
36 if authority.is_empty() {
37 return None;
38 }
39
40 let authority = authority.rsplit('@').next().unwrap_or(authority);
42
43 if authority.as_bytes().first() == Some(&b'[') {
45 let close = authority.find(']')?;
46 let host = &authority[1..close];
47 return Some((host, &url[rest_start..]));
48 }
49
50 let host_end = authority.find(':').unwrap_or(authority.len());
52 let host = &authority[..host_end];
53 if host.is_empty() {
54 return None;
55 }
56
57 Some((host, &url[rest_start..]))
58}
59
60#[inline]
61fn eq_ignore_ascii_case(a: &str, b: &str) -> bool {
62 a.len() == b.len()
63 && a.as_bytes()
64 .iter()
65 .zip(b.as_bytes().iter())
66 .all(|(x, y)| x.to_ascii_lowercase() == y.to_ascii_lowercase())
67}
68
69#[inline]
70pub fn ends_with_ignore_ascii_case(hay: &str, suf: &str) -> bool {
71 if suf.len() > hay.len() {
72 return false;
73 }
74 let a = &hay.as_bytes()[hay.len() - suf.len()..];
75 let b = suf.as_bytes();
76 a.iter()
77 .zip(b.iter())
78 .all(|(x, y)| x.to_ascii_lowercase() == y.to_ascii_lowercase())
79}
80
81#[inline]
82pub fn base_domain_from_any(s: &str) -> &str {
83 if let Some((h, _)) = host_and_rest(s) {
84 base_domain_from_host(h)
85 } else {
86 base_domain_from_host(s)
87 }
88}
89
90#[inline]
91pub fn first_label(host: &str) -> &str {
92 let h = host.trim_end_matches('.');
93 match h.find('.') {
94 Some(i) => &h[..i],
95 None => h,
96 }
97}
98
99#[inline]
100pub fn host_contains_label_icase(host: &str, label: &str) -> bool {
101 let host = host.trim_end_matches('.');
102 let label = label.trim_matches('.');
103
104 if host.is_empty() || label.is_empty() {
105 return false;
106 }
107
108 let hb = host.as_bytes();
109 let lb = label.as_bytes();
110
111 let mut i = 0usize;
112 while i < hb.len() {
113 while i < hb.len() && hb[i] == b'.' {
114 i += 1;
115 }
116 if i >= hb.len() {
117 break;
118 }
119
120 let start = i;
121 while i < hb.len() && hb[i] != b'.' {
122 i += 1;
123 }
124 let end = i;
125
126 if end - start == lb.len() {
127 let mut ok = true;
128 for k in 0..lb.len() {
129 if hb[start + k].to_ascii_lowercase() != lb[k].to_ascii_lowercase() {
130 ok = false;
131 break;
132 }
133 }
134 if ok {
135 return true;
136 }
137 }
138 }
139
140 false
141}
142
143#[inline]
146pub fn host_is_subdomain_of(host: &str, base: &str) -> bool {
147 let host = host.trim_end_matches('.');
148 let base = base.trim_end_matches('.');
149
150 if base.is_empty() {
151 return false;
152 }
153
154 if eq_ignore_ascii_case(host, base) {
155 return true;
156 }
157
158 if host.len() <= base.len() {
159 return false;
160 }
161
162 let dot_pos = host.len() - base.len() - 1;
163 host.as_bytes().get(dot_pos) == Some(&b'.') && ends_with_ignore_ascii_case(host, base)
164}
165
166static COMMON_SUBDOMAIN_LABELS: phf::Set<&'static str> = phf::phf_set! {
168 "www","m","amp","api","cdn","static","assets","img","images","media","files",
169 "login","auth","sso","id","account","accounts",
170 "app","apps","dashboard","admin","portal","console",
171 "status","support","help","docs","blog",
172 "dev","staging","stage","test","qa","uat","beta","alpha","preview","demo","sandbox",
173 "uploads","download","storage","origin","edge","cache",
174 "mail","email","smtp","mx","webmail",
175 "graphql","rpc","ws",
176};
177
178#[inline]
179fn is_common_subdomain_label(lbl: &str) -> bool {
181 if lbl.is_empty() {
182 return false;
183 }
184 let lower = lbl.to_ascii_lowercase(); COMMON_SUBDOMAIN_LABELS.contains(lower.as_str())
186}
187
188#[inline]
189pub fn base_domain_from_url<'a>(main_url: &'a str) -> Option<&'a str> {
190 let (host, _) = host_and_rest(main_url)?;
191 Some(base_domain_from_host(host))
192}
193
194#[inline]
197pub fn rel_for_ignore_script<'a>(main_host_or_base: &str, url: &'a str) -> Cow<'a, str> {
198 if url.starts_with('/') {
199 return Cow::Borrowed(url);
200 }
201
202 let base = base_domain_from_host(main_host_or_base.trim_end_matches('.'));
203 let base = base.trim_end_matches('.');
204 if base.is_empty() {
205 return Cow::Borrowed(url);
206 }
207
208 let brand = first_label(base);
209
210 if let Some((host, rest)) = host_and_rest(url) {
211 if host_is_subdomain_of(host, base) || host_contains_label_icase(host, brand) {
212 if rest.starts_with('/') {
213 return Cow::Borrowed(rest);
214 }
215 return Cow::Borrowed("/");
216 }
217 }
218
219 Cow::Borrowed(url)
220}
221
222#[inline]
223fn is_common_cc_sld(sld: &str) -> bool {
225 let s = sld.as_bytes();
226 match s.len() {
227 2 => matches!(
228 [s[0].to_ascii_lowercase(), s[1].to_ascii_lowercase()],
229 [b'c', b'o'] | [b'a', b'c'] | [b'g', b'o'] | [b'o', b'r'] | [b'n', b'e'] | [b'e', b'd'] | [b'g', b'r'] | [b'l', b'g'] | [b'a', b'd'] ),
239 3 => matches!(
240 [
241 s[0].to_ascii_lowercase(),
242 s[1].to_ascii_lowercase(),
243 s[2].to_ascii_lowercase()
244 ],
245 [b'c', b'o', b'm'] | [b'n', b'e', b't'] | [b'o', b'r', b'g'] | [b'g', b'o', b'v'] | [b'e', b'd', b'u'] | [b'm', b'i', b'l'] | [b'n', b'i', b'c'] | [b's', b'c', b'h'] | [b'g', b'o', b'b'] ),
257 4 => matches!(
258 [
259 s[0].to_ascii_lowercase(),
260 s[1].to_ascii_lowercase(),
261 s[2].to_ascii_lowercase(),
262 s[3].to_ascii_lowercase()
263 ],
264 [b'g', b'o', b'u', b'v'] ),
266 _ => false,
267 }
268}
269
270#[inline]
271pub fn base_domain_from_host(host: &str) -> &str {
278 let mut h = host.trim_end_matches('.');
279 if let Some(x) = h.strip_prefix("www.") {
280 h = x;
281 }
282 if let Some(x) = h.strip_prefix("m.") {
283 h = x;
284 }
285
286 let last_dot = match h.rfind('.') {
288 Some(p) => p,
289 None => return h,
290 };
291 let prev_dot = match h[..last_dot].rfind('.') {
292 Some(p) => p,
293 None => return h, };
295
296 let tld = &h[last_dot + 1..];
297 let sld = &h[prev_dot + 1..last_dot];
298
299 let mut base = &h[prev_dot + 1..]; if tld.len() == 2 && is_common_cc_sld(sld) {
302 if let Some(prev2_dot) = h[..prev_dot].rfind('.') {
303 base = &h[prev2_dot + 1..]; }
305 }
306
307 if h.len() > base.len() + 1 {
308 let base_start = h.len() - base.len();
309 let boundary = base_start - 1;
310 if h.as_bytes().get(boundary) == Some(&b'.') {
311 let left_part = &h[..boundary];
312 let (lbl_start, lbl) = match left_part.rfind('.') {
314 Some(p) => (p + 1, &left_part[p + 1..]),
315 None => (0, left_part),
316 };
317
318 if !lbl.is_empty() && !is_common_subdomain_label(lbl) {
319 return &h[lbl_start..];
321 }
322 }
323 }
324
325 base
326}
327
328#[cfg(test)]
329mod tests {
330 use super::*;
331
332 #[test]
333 fn test_domain_match_basic_and_subdomains() {
334 let base = "mainr.com";
335
336 assert!(host_is_subdomain_of("mainr.com", base));
337 assert!(host_is_subdomain_of("staging.mainr.com", base));
338 assert!(host_is_subdomain_of("a.b.c.mainr.com", base));
339
340 assert!(host_is_subdomain_of("StAgInG.mainr.CoM", "mainr.COM"));
342 }
343
344 #[test]
345 fn test_domain_match_no_false_positives() {
346 let base = "mainr.com";
347
348 assert!(!host_is_subdomain_of("evil-mainr.com", base));
350 assert!(!host_is_subdomain_of("mainr.com.evil.com", base));
351 assert!(!host_is_subdomain_of("stagingmainr.com", base));
352 assert!(!host_is_subdomain_of("mainr.co", base));
353 }
354
355 #[test]
356 fn test_host_and_rest_handles_userinfo_port_ipv6() {
357 let (h, rest) =
358 host_and_rest("https://user:pass@staging.mainr.com:8443/a.js?x=1#y").unwrap();
359 assert_eq!(h, "staging.mainr.com");
360 assert_eq!(rest, "/a.js?x=1#y");
361
362 let (h, rest) = host_and_rest("http://[::1]:8080/path").unwrap();
363 assert_eq!(h, "::1");
364 assert_eq!(rest, "/path");
365 }
366
367 #[test]
368 fn test_rel_for_ignore_script_mainr_example() {
369 let base = "mainr.com";
370
371 let main = "https://mainr.com/careers";
372 assert_eq!(rel_for_ignore_script(base, main).as_ref(), "/careers");
373
374 let script = "https://staging.mainr.com/mainr.min.js";
375 assert_eq!(
376 rel_for_ignore_script(base, script).as_ref(),
377 "/mainr.min.js"
378 );
379
380 let other = "https://cdn.other.com/app.js";
382 assert_eq!(rel_for_ignore_script(base, other).as_ref(), other);
383
384 assert_eq!(
386 rel_for_ignore_script(base, "/static/app.js").as_ref(),
387 "/static/app.js"
388 );
389 }
390
391 #[test]
392 fn test_rel_for_ignore_script_query_only_same_site() {
393 let base = "example.com";
394 let u = "https://sub.example.com?x=1";
395 assert_eq!(rel_for_ignore_script(base, u).as_ref(), "/");
396 }
397
398 #[test]
399 fn test_rel_for_ignore_script_special_schemes() {
400 let base = "example.com";
401 let u = "blob:https://example.com/path/to/blob";
402 assert_eq!(rel_for_ignore_script(base, u).as_ref(), "/path/to/blob");
403 }
404
405 #[test]
406 fn test_base_domain_tenant_subdomain() {
407 let base = base_domain_from_host("mainr.chilipiper.com");
408 assert_eq!(base, "mainr.chilipiper.com");
409
410 let u = "https://assets.mainr.chilipiper.com/a.js";
412 assert_eq!(rel_for_ignore_script(base, u).as_ref(), "/a.js");
413
414 let other = "https://othertenant.chilipiper.com/a.js";
416 assert_eq!(rel_for_ignore_script(base, other).as_ref(), other);
417 }
418
419 #[test]
420 fn test_brand_label_allows_vendor_subdomain() {
421 let base = "mainr.com";
422 let u = "https://mainr.chilipiper.com/concierge-js/cjs/concierge.js";
423 assert_eq!(
424 rel_for_ignore_script(base, u).as_ref(),
425 "/concierge-js/cjs/concierge.js"
426 );
427
428 let bad = "https://evil-mainr.com/x.js";
430 assert_eq!(rel_for_ignore_script(base, bad).as_ref(), bad);
431 }
432
433 #[test]
434 fn test_allows_vendor_host_when_brand_label_matches_main_site() {
435 let main_host = "www.mainr.com";
437
438 let u = "https://mainr.chilipiper.com/concierge-js/cjs/concierge.js";
439 assert_eq!(
440 rel_for_ignore_script(main_host, u).as_ref(),
441 "/concierge-js/cjs/concierge.js"
442 );
443 }
444}