1use std::borrow::Cow;
2
3#[inline]
4fn strip_special_schemes(url: &str) -> &str {
5 let url = url.strip_prefix("blob:").unwrap_or(url);
6 url.strip_prefix("filesystem:").unwrap_or(url)
7}
8
9#[inline]
12pub fn host_and_rest(url: &str) -> Option<(&str, &str)> {
13 let url = strip_special_schemes(url);
14
15 let host_start = if let Some(pos) = url.find("://") {
16 pos + 3
17 } else if url.starts_with("//") {
18 2
19 } else {
20 return None;
21 };
22
23 let mut rest_start = url.len();
25 if let Some(i) = url[host_start..].find('/') {
26 rest_start = host_start + i;
27 }
28 if let Some(i) = url[host_start..].find('?') {
29 rest_start = rest_start.min(host_start + i);
30 }
31 if let Some(i) = url[host_start..].find('#') {
32 rest_start = rest_start.min(host_start + i);
33 }
34
35 let authority = &url[host_start..rest_start];
36 if authority.is_empty() {
37 return None;
38 }
39
40 let authority = authority.rsplit('@').next().unwrap_or(authority);
42
43 if authority.as_bytes().first() == Some(&b'[') {
45 let close = authority.find(']')?;
46 let host = &authority[1..close];
47 return Some((host, &url[rest_start..]));
48 }
49
50 let host_end = authority.find(':').unwrap_or(authority.len());
52 let host = &authority[..host_end];
53 if host.is_empty() {
54 return None;
55 }
56
57 Some((host, &url[rest_start..]))
58}
59
60#[inline]
61fn eq_ignore_ascii_case(a: &str, b: &str) -> bool {
62 a.eq_ignore_ascii_case(b)
63}
64
65#[inline]
66pub fn ends_with_ignore_ascii_case(hay: &str, suf: &str) -> bool {
67 if suf.len() > hay.len() {
68 return false;
69 }
70 hay[hay.len() - suf.len()..].eq_ignore_ascii_case(suf)
71}
72
73#[inline]
74pub fn base_domain_from_any(s: &str) -> &str {
75 if let Some((h, _)) = host_and_rest(s) {
76 base_domain_from_host(h)
77 } else {
78 base_domain_from_host(s)
79 }
80}
81
82#[inline]
83pub fn first_label(host: &str) -> &str {
84 let h = host.trim_end_matches('.');
85 match h.find('.') {
86 Some(i) => &h[..i],
87 None => h,
88 }
89}
90
91#[inline]
92pub fn host_contains_label_icase(host: &str, label: &str) -> bool {
93 let host = host.trim_end_matches('.');
94 let label = label.trim_matches('.');
95
96 if host.is_empty() || label.is_empty() {
97 return false;
98 }
99
100 let hb = host.as_bytes();
101 let lb = label.as_bytes();
102
103 let mut i = 0usize;
104 while i < hb.len() {
105 while i < hb.len() && hb[i] == b'.' {
106 i += 1;
107 }
108 if i >= hb.len() {
109 break;
110 }
111
112 let start = i;
113 while i < hb.len() && hb[i] != b'.' {
114 i += 1;
115 }
116 let end = i;
117
118 if end - start == lb.len() && hb[start..end].eq_ignore_ascii_case(lb) {
119 return true;
120 }
121 }
122
123 false
124}
125
126#[inline]
129pub fn host_is_subdomain_of(host: &str, base: &str) -> bool {
130 let host = host.trim_end_matches('.');
131 let base = base.trim_end_matches('.');
132
133 if base.is_empty() {
134 return false;
135 }
136
137 if eq_ignore_ascii_case(host, base) {
138 return true;
139 }
140
141 if host.len() <= base.len() {
142 return false;
143 }
144
145 let dot_pos = host.len() - base.len() - 1;
146 host.as_bytes().get(dot_pos) == Some(&b'.') && ends_with_ignore_ascii_case(host, base)
147}
148
149static COMMON_SUBDOMAIN_LABELS: phf::Set<&'static str> = phf::phf_set! {
151 "www","m","amp","api","cdn","static","assets","img","images","media","files",
152 "login","auth","sso","id","account","accounts",
153 "app","apps","dashboard","admin","portal","console",
154 "status","support","help","docs","blog",
155 "dev","staging","stage","test","qa","uat","beta","alpha","preview","demo","sandbox",
156 "uploads","download","storage","origin","edge","cache",
157 "mail","email","smtp","mx","webmail",
158 "graphql","rpc","ws",
159};
160
161#[inline]
162fn is_common_subdomain_label(lbl: &str) -> bool {
164 if lbl.is_empty() {
165 return false;
166 }
167 let lower = lbl.to_ascii_lowercase(); COMMON_SUBDOMAIN_LABELS.contains(lower.as_str())
169}
170
171#[inline]
172pub fn base_domain_from_url(main_url: &str) -> Option<&str> {
173 let (host, _) = host_and_rest(main_url)?;
174 Some(base_domain_from_host(host))
175}
176
177#[inline]
180pub fn rel_for_ignore_script<'a>(main_host_or_base: &str, url: &'a str) -> Cow<'a, str> {
181 if url.starts_with('/') {
182 return Cow::Borrowed(url);
183 }
184
185 let base = base_domain_from_host(main_host_or_base.trim_end_matches('.'));
186 let base = base.trim_end_matches('.');
187 if base.is_empty() {
188 return Cow::Borrowed(url);
189 }
190
191 let brand = first_label(base);
192
193 if let Some((host, rest)) = host_and_rest(url) {
194 if host_is_subdomain_of(host, base) || host_contains_label_icase(host, brand) {
195 if rest.starts_with('/') {
196 return Cow::Borrowed(rest);
197 }
198 return Cow::Borrowed("/");
199 }
200 }
201
202 Cow::Borrowed(url)
203}
204
205#[inline]
206fn is_common_cc_sld(sld: &str) -> bool {
208 let s = sld.as_bytes();
209 match s.len() {
210 2 => matches!(
211 [s[0].to_ascii_lowercase(), s[1].to_ascii_lowercase()],
212 [b'c', b'o'] | [b'a', b'c'] | [b'g', b'o'] | [b'o', b'r'] | [b'n', b'e'] | [b'e', b'd'] | [b'g', b'r'] | [b'l', b'g'] | [b'a', b'd'] ),
222 3 => matches!(
223 [
224 s[0].to_ascii_lowercase(),
225 s[1].to_ascii_lowercase(),
226 s[2].to_ascii_lowercase()
227 ],
228 [b'c', b'o', b'm'] | [b'n', b'e', b't'] | [b'o', b'r', b'g'] | [b'g', b'o', b'v'] | [b'e', b'd', b'u'] | [b'm', b'i', b'l'] | [b'n', b'i', b'c'] | [b's', b'c', b'h'] | [b'g', b'o', b'b'] ),
240 4 => matches!(
241 [
242 s[0].to_ascii_lowercase(),
243 s[1].to_ascii_lowercase(),
244 s[2].to_ascii_lowercase(),
245 s[3].to_ascii_lowercase()
246 ],
247 [b'g', b'o', b'u', b'v'] ),
249 _ => false,
250 }
251}
252
253#[inline]
254pub fn base_domain_from_host(host: &str) -> &str {
261 let mut h = host.trim_end_matches('.');
262 if let Some(x) = h.strip_prefix("www.") {
263 h = x;
264 }
265 if let Some(x) = h.strip_prefix("m.") {
266 h = x;
267 }
268
269 let last_dot = match h.rfind('.') {
271 Some(p) => p,
272 None => return h,
273 };
274 let prev_dot = match h[..last_dot].rfind('.') {
275 Some(p) => p,
276 None => return h, };
278
279 let tld = &h[last_dot + 1..];
280 let sld = &h[prev_dot + 1..last_dot];
281
282 let mut base = &h[prev_dot + 1..]; if tld.len() == 2 && is_common_cc_sld(sld) {
285 if let Some(prev2_dot) = h[..prev_dot].rfind('.') {
286 base = &h[prev2_dot + 1..]; }
288 }
289
290 if h.len() > base.len() + 1 {
291 let base_start = h.len() - base.len();
292 let boundary = base_start - 1;
293 if h.as_bytes().get(boundary) == Some(&b'.') {
294 let left_part = &h[..boundary];
295 let (lbl_start, lbl) = match left_part.rfind('.') {
297 Some(p) => (p + 1, &left_part[p + 1..]),
298 None => (0, left_part),
299 };
300
301 if !lbl.is_empty() && !is_common_subdomain_label(lbl) {
302 return &h[lbl_start..];
304 }
305 }
306 }
307
308 base
309}
310
311#[cfg(test)]
312mod tests {
313 use super::*;
314
315 #[test]
316 fn test_domain_match_basic_and_subdomains() {
317 let base = "mainr.com";
318
319 assert!(host_is_subdomain_of("mainr.com", base));
320 assert!(host_is_subdomain_of("staging.mainr.com", base));
321 assert!(host_is_subdomain_of("a.b.c.mainr.com", base));
322
323 assert!(host_is_subdomain_of("StAgInG.mainr.CoM", "mainr.COM"));
325 }
326
327 #[test]
328 fn test_domain_match_no_false_positives() {
329 let base = "mainr.com";
330
331 assert!(!host_is_subdomain_of("evil-mainr.com", base));
333 assert!(!host_is_subdomain_of("mainr.com.evil.com", base));
334 assert!(!host_is_subdomain_of("stagingmainr.com", base));
335 assert!(!host_is_subdomain_of("mainr.co", base));
336 }
337
338 #[test]
339 fn test_host_and_rest_handles_userinfo_port_ipv6() {
340 let (h, rest) =
341 host_and_rest("https://user:pass@staging.mainr.com:8443/a.js?x=1#y").unwrap();
342 assert_eq!(h, "staging.mainr.com");
343 assert_eq!(rest, "/a.js?x=1#y");
344
345 let (h, rest) = host_and_rest("http://[::1]:8080/path").unwrap();
346 assert_eq!(h, "::1");
347 assert_eq!(rest, "/path");
348 }
349
350 #[test]
351 fn test_rel_for_ignore_script_mainr_example() {
352 let base = "mainr.com";
353
354 let main = "https://mainr.com/careers";
355 assert_eq!(rel_for_ignore_script(base, main).as_ref(), "/careers");
356
357 let script = "https://staging.mainr.com/mainr.min.js";
358 assert_eq!(
359 rel_for_ignore_script(base, script).as_ref(),
360 "/mainr.min.js"
361 );
362
363 let other = "https://cdn.other.com/app.js";
365 assert_eq!(rel_for_ignore_script(base, other).as_ref(), other);
366
367 assert_eq!(
369 rel_for_ignore_script(base, "/static/app.js").as_ref(),
370 "/static/app.js"
371 );
372 }
373
374 #[test]
375 fn test_rel_for_ignore_script_query_only_same_site() {
376 let base = "example.com";
377 let u = "https://sub.example.com?x=1";
378 assert_eq!(rel_for_ignore_script(base, u).as_ref(), "/");
379 }
380
381 #[test]
382 fn test_rel_for_ignore_script_special_schemes() {
383 let base = "example.com";
384 let u = "blob:https://example.com/path/to/blob";
385 assert_eq!(rel_for_ignore_script(base, u).as_ref(), "/path/to/blob");
386 }
387
388 #[test]
389 fn test_base_domain_tenant_subdomain() {
390 let base = base_domain_from_host("mainr.chilipiper.com");
391 assert_eq!(base, "mainr.chilipiper.com");
392
393 let u = "https://assets.mainr.chilipiper.com/a.js";
395 assert_eq!(rel_for_ignore_script(base, u).as_ref(), "/a.js");
396
397 let other = "https://othertenant.chilipiper.com/a.js";
399 assert_eq!(rel_for_ignore_script(base, other).as_ref(), other);
400 }
401
402 #[test]
403 fn test_brand_label_allows_vendor_subdomain() {
404 let base = "mainr.com";
405 let u = "https://mainr.chilipiper.com/concierge-js/cjs/concierge.js";
406 assert_eq!(
407 rel_for_ignore_script(base, u).as_ref(),
408 "/concierge-js/cjs/concierge.js"
409 );
410
411 let bad = "https://evil-mainr.com/x.js";
413 assert_eq!(rel_for_ignore_script(base, bad).as_ref(), bad);
414 }
415
416 #[test]
417 fn test_allows_vendor_host_when_brand_label_matches_main_site() {
418 let main_host = "www.mainr.com";
420
421 let u = "https://mainr.chilipiper.com/concierge-js/cjs/concierge.js";
422 assert_eq!(
423 rel_for_ignore_script(main_host, u).as_ref(),
424 "/concierge-js/cjs/concierge.js"
425 );
426 }
427}