1use std::borrow::Cow;
2
3use memchr::{memchr, memchr3, memrchr};
4
5#[inline]
6fn strip_special_schemes(url: &str) -> &str {
7 let url = url.strip_prefix("blob:").unwrap_or(url);
8 url.strip_prefix("filesystem:").unwrap_or(url)
9}
10
11#[inline]
14pub fn host_and_rest(url: &str) -> Option<(&str, &str)> {
15 let url = strip_special_schemes(url);
16 let bytes = url.as_bytes();
17
18 let host_start = if let Some(pos) = memchr(b':', bytes) {
19 if bytes.get(pos + 1) == Some(&b'/') && bytes.get(pos + 2) == Some(&b'/') {
20 pos + 3
21 } else if bytes.starts_with(b"//") {
22 2
23 } else {
24 return None;
25 }
26 } else if bytes.starts_with(b"//") {
27 2
28 } else {
29 return None;
30 };
31
32 let rest_start =
34 memchr3(b'/', b'?', b'#', &bytes[host_start..]).map_or(url.len(), |i| host_start + i);
35
36 let authority = &url[host_start..rest_start];
37 if authority.is_empty() {
38 return None;
39 }
40
41 let authority = match memrchr(b'@', authority.as_bytes()) {
43 Some(pos) => &authority[pos + 1..],
44 None => authority,
45 };
46
47 let ab = authority.as_bytes();
48
49 if ab.first() == Some(&b'[') {
51 let close = memchr(b']', ab)?;
52 let host = &authority[1..close];
53 return Some((host, &url[rest_start..]));
54 }
55
56 let host_end = memchr(b':', ab).unwrap_or(ab.len());
58 let host = &authority[..host_end];
59 if host.is_empty() {
60 return None;
61 }
62
63 Some((host, &url[rest_start..]))
64}
65
66#[inline]
67fn eq_ignore_ascii_case(a: &str, b: &str) -> bool {
68 a.eq_ignore_ascii_case(b)
69}
70
71#[inline]
72pub fn ends_with_ignore_ascii_case(hay: &str, suf: &str) -> bool {
73 if suf.len() > hay.len() {
74 return false;
75 }
76 hay[hay.len() - suf.len()..].eq_ignore_ascii_case(suf)
77}
78
79#[inline]
80pub fn base_domain_from_any(s: &str) -> &str {
81 if let Some((h, _)) = host_and_rest(s) {
82 base_domain_from_host(h)
83 } else {
84 base_domain_from_host(s)
85 }
86}
87
88#[inline]
89pub fn first_label(host: &str) -> &str {
90 let h = host.trim_end_matches('.');
91 match memchr(b'.', h.as_bytes()) {
92 Some(i) => &h[..i],
93 None => h,
94 }
95}
96
97#[inline]
98pub fn host_contains_label_icase(host: &str, label: &str) -> bool {
99 let host = host.trim_end_matches('.');
100 let label = label.trim_matches('.');
101
102 if host.is_empty() || label.is_empty() {
103 return false;
104 }
105
106 let hb = host.as_bytes();
107 let lb = label.as_bytes();
108
109 let mut start = 0usize;
111
112 while start < hb.len() && hb[start] == b'.' {
114 start += 1;
115 }
116
117 while start < hb.len() {
118 let end = memchr(b'.', &hb[start..]).map_or(hb.len(), |i| start + i);
119
120 if end - start == lb.len() && hb[start..end].eq_ignore_ascii_case(lb) {
121 return true;
122 }
123
124 start = end + 1;
126 while start < hb.len() && hb[start] == b'.' {
127 start += 1;
128 }
129 }
130
131 false
132}
133
134#[inline]
137pub fn host_is_subdomain_of(host: &str, base: &str) -> bool {
138 let host = host.trim_end_matches('.');
139 let base = base.trim_end_matches('.');
140
141 if base.is_empty() {
142 return false;
143 }
144
145 if eq_ignore_ascii_case(host, base) {
146 return true;
147 }
148
149 if host.len() <= base.len() {
150 return false;
151 }
152
153 let dot_pos = host.len() - base.len() - 1;
154 host.as_bytes().get(dot_pos) == Some(&b'.') && ends_with_ignore_ascii_case(host, base)
155}
156
157static COMMON_SUBDOMAIN_LABELS: phf::Set<&'static str> = phf::phf_set! {
159 "www","m","amp","api","cdn","static","assets","img","images","media","files",
160 "login","auth","sso","id","account","accounts",
161 "app","apps","dashboard","admin","portal","console",
162 "status","support","help","docs","blog",
163 "dev","staging","stage","test","qa","uat","beta","alpha","preview","demo","sandbox",
164 "uploads","download","storage","origin","edge","cache",
165 "mail","email","smtp","mx","webmail",
166 "graphql","rpc","ws",
167};
168
169#[inline]
170fn is_common_subdomain_label(lbl: &str) -> bool {
172 if lbl.is_empty() {
173 return false;
174 }
175 let lower = lbl.to_ascii_lowercase();
176 COMMON_SUBDOMAIN_LABELS.contains(lower.as_str())
177}
178
179#[inline]
180pub fn base_domain_from_url(main_url: &str) -> Option<&str> {
181 let (host, _) = host_and_rest(main_url)?;
182 Some(base_domain_from_host(host))
183}
184
185#[inline]
188pub fn rel_for_ignore_script<'a>(main_host_or_base: &str, url: &'a str) -> Cow<'a, str> {
189 if url.starts_with('/') {
190 return Cow::Borrowed(url);
191 }
192
193 let base = base_domain_from_host(main_host_or_base.trim_end_matches('.'));
194 let base = base.trim_end_matches('.');
195 if base.is_empty() {
196 return Cow::Borrowed(url);
197 }
198
199 let brand = first_label(base);
200
201 if let Some((host, rest)) = host_and_rest(url) {
202 if host_is_subdomain_of(host, base) || host_contains_label_icase(host, brand) {
203 if rest.starts_with('/') {
204 return Cow::Borrowed(rest);
205 }
206 return Cow::Borrowed("/");
207 }
208 }
209
210 Cow::Borrowed(url)
211}
212
213#[inline]
214fn is_common_cc_sld(sld: &str) -> bool {
216 let s = sld.as_bytes();
217 match s.len() {
218 2 => matches!(
219 [s[0].to_ascii_lowercase(), s[1].to_ascii_lowercase()],
220 [b'c', b'o'] | [b'a', b'c'] | [b'g', b'o'] | [b'o', b'r'] | [b'n', b'e'] | [b'e', b'd'] | [b'g', b'r'] | [b'l', b'g'] | [b'a', b'd'] ),
230 3 => matches!(
231 [
232 s[0].to_ascii_lowercase(),
233 s[1].to_ascii_lowercase(),
234 s[2].to_ascii_lowercase()
235 ],
236 [b'c', b'o', b'm'] | [b'n', b'e', b't'] | [b'o', b'r', b'g'] | [b'g', b'o', b'v'] | [b'e', b'd', b'u'] | [b'm', b'i', b'l'] | [b'n', b'i', b'c'] | [b's', b'c', b'h'] | [b'g', b'o', b'b'] ),
248 4 => matches!(
249 [
250 s[0].to_ascii_lowercase(),
251 s[1].to_ascii_lowercase(),
252 s[2].to_ascii_lowercase(),
253 s[3].to_ascii_lowercase()
254 ],
255 [b'g', b'o', b'u', b'v'] ),
257 _ => false,
258 }
259}
260
261#[inline]
262pub fn base_domain_from_host(host: &str) -> &str {
269 let mut h = host.trim_end_matches('.');
270 if let Some(x) = h.strip_prefix("www.") {
271 h = x;
272 }
273 if let Some(x) = h.strip_prefix("m.") {
274 h = x;
275 }
276
277 let hb = h.as_bytes();
279 let last_dot = match memrchr(b'.', hb) {
280 Some(p) => p,
281 None => return h,
282 };
283 let prev_dot = match memrchr(b'.', &hb[..last_dot]) {
284 Some(p) => p,
285 None => return h, };
287
288 let tld = &h[last_dot + 1..];
289 let sld = &h[prev_dot + 1..last_dot];
290
291 let mut base = &h[prev_dot + 1..]; if tld.len() == 2 && is_common_cc_sld(sld) {
294 if let Some(prev2_dot) = memrchr(b'.', &hb[..prev_dot]) {
295 base = &h[prev2_dot + 1..]; }
297 }
298
299 if h.len() > base.len() + 1 {
300 let base_start = h.len() - base.len();
301 let boundary = base_start - 1;
302 if hb.get(boundary) == Some(&b'.') {
303 let left_part = &h[..boundary];
304 let (lbl_start, lbl) = match memrchr(b'.', left_part.as_bytes()) {
306 Some(p) => (p + 1, &left_part[p + 1..]),
307 None => (0, left_part),
308 };
309
310 if !lbl.is_empty() && !is_common_subdomain_label(lbl) {
311 return &h[lbl_start..];
313 }
314 }
315 }
316
317 base
318}
319
320#[cfg(test)]
321mod tests {
322 use super::*;
323
324 #[test]
325 fn test_domain_match_basic_and_subdomains() {
326 let base = "mainr.com";
327
328 assert!(host_is_subdomain_of("mainr.com", base));
329 assert!(host_is_subdomain_of("staging.mainr.com", base));
330 assert!(host_is_subdomain_of("a.b.c.mainr.com", base));
331
332 assert!(host_is_subdomain_of("StAgInG.mainr.CoM", "mainr.COM"));
334 }
335
336 #[test]
337 fn test_domain_match_no_false_positives() {
338 let base = "mainr.com";
339
340 assert!(!host_is_subdomain_of("evil-mainr.com", base));
342 assert!(!host_is_subdomain_of("mainr.com.evil.com", base));
343 assert!(!host_is_subdomain_of("stagingmainr.com", base));
344 assert!(!host_is_subdomain_of("mainr.co", base));
345 }
346
347 #[test]
348 fn test_host_and_rest_handles_userinfo_port_ipv6() {
349 let (h, rest) =
350 host_and_rest("https://user:pass@staging.mainr.com:8443/a.js?x=1#y").unwrap();
351 assert_eq!(h, "staging.mainr.com");
352 assert_eq!(rest, "/a.js?x=1#y");
353
354 let (h, rest) = host_and_rest("http://[::1]:8080/path").unwrap();
355 assert_eq!(h, "::1");
356 assert_eq!(rest, "/path");
357 }
358
359 #[test]
360 fn test_rel_for_ignore_script_mainr_example() {
361 let base = "mainr.com";
362
363 let main = "https://mainr.com/careers";
364 assert_eq!(rel_for_ignore_script(base, main).as_ref(), "/careers");
365
366 let script = "https://staging.mainr.com/mainr.min.js";
367 assert_eq!(
368 rel_for_ignore_script(base, script).as_ref(),
369 "/mainr.min.js"
370 );
371
372 let other = "https://cdn.other.com/app.js";
374 assert_eq!(rel_for_ignore_script(base, other).as_ref(), other);
375
376 assert_eq!(
378 rel_for_ignore_script(base, "/static/app.js").as_ref(),
379 "/static/app.js"
380 );
381 }
382
383 #[test]
384 fn test_rel_for_ignore_script_query_only_same_site() {
385 let base = "example.com";
386 let u = "https://sub.example.com?x=1";
387 assert_eq!(rel_for_ignore_script(base, u).as_ref(), "/");
388 }
389
390 #[test]
391 fn test_rel_for_ignore_script_special_schemes() {
392 let base = "example.com";
393 let u = "blob:https://example.com/path/to/blob";
394 assert_eq!(rel_for_ignore_script(base, u).as_ref(), "/path/to/blob");
395 }
396
397 #[test]
398 fn test_base_domain_tenant_subdomain() {
399 let base = base_domain_from_host("mainr.chilipiper.com");
400 assert_eq!(base, "mainr.chilipiper.com");
401
402 let u = "https://assets.mainr.chilipiper.com/a.js";
404 assert_eq!(rel_for_ignore_script(base, u).as_ref(), "/a.js");
405
406 let other = "https://othertenant.chilipiper.com/a.js";
408 assert_eq!(rel_for_ignore_script(base, other).as_ref(), other);
409 }
410
411 #[test]
412 fn test_brand_label_allows_vendor_subdomain() {
413 let base = "mainr.com";
414 let u = "https://mainr.chilipiper.com/concierge-js/cjs/concierge.js";
415 assert_eq!(
416 rel_for_ignore_script(base, u).as_ref(),
417 "/concierge-js/cjs/concierge.js"
418 );
419
420 let bad = "https://evil-mainr.com/x.js";
422 assert_eq!(rel_for_ignore_script(base, bad).as_ref(), bad);
423 }
424
425 #[test]
426 fn test_allows_vendor_host_when_brand_label_matches_main_site() {
427 let main_host = "www.mainr.com";
429
430 let u = "https://mainr.chilipiper.com/concierge-js/cjs/concierge.js";
431 assert_eq!(
432 rel_for_ignore_script(main_host, u).as_ref(),
433 "/concierge-js/cjs/concierge.js"
434 );
435 }
436
437 #[test]
440 fn test_host_and_rest_edge_cases() {
441 let (h, rest) = host_and_rest("//example.com/path").unwrap();
443 assert_eq!(h, "example.com");
444 assert_eq!(rest, "/path");
445
446 let (h, rest) = host_and_rest("https://example.com").unwrap();
448 assert_eq!(h, "example.com");
449 assert_eq!(rest, "");
450
451 let (h, rest) = host_and_rest("https://example.com?q=1").unwrap();
453 assert_eq!(h, "example.com");
454 assert_eq!(rest, "?q=1");
455
456 let (h, rest) = host_and_rest("https://example.com#frag").unwrap();
458 assert_eq!(h, "example.com");
459 assert_eq!(rest, "#frag");
460
461 assert!(host_and_rest("example.com/path").is_none());
463 assert!(host_and_rest("").is_none());
464
465 let (h, _) = host_and_rest("filesystem:https://example.com/path").unwrap();
467 assert_eq!(h, "example.com");
468
469 let (h, rest) = host_and_rest("https://example.com:8080").unwrap();
471 assert_eq!(h, "example.com");
472 assert_eq!(rest, "");
473
474 let (h, _) = host_and_rest("https://user@example.com:443/x").unwrap();
476 assert_eq!(h, "example.com");
477
478 let (h, rest) = host_and_rest("http://[::1]/path").unwrap();
480 assert_eq!(h, "::1");
481 assert_eq!(rest, "/path");
482
483 assert!(host_and_rest("http:///path").is_none());
485 }
486
487 #[test]
488 fn test_host_contains_label_icase_edge_cases() {
489 assert!(host_contains_label_icase("www.example.com", "example"));
491 assert!(host_contains_label_icase("www.example.com", "EXAMPLE"));
492 assert!(host_contains_label_icase("www.example.com", "www"));
493 assert!(host_contains_label_icase("www.example.com", "com"));
494
495 assert!(host_contains_label_icase("localhost", "localhost"));
497 assert!(host_contains_label_icase("LOCALHOST", "localhost"));
498
499 assert!(!host_contains_label_icase("www.example.com", "exam"));
501 assert!(!host_contains_label_icase("www.example.com", "ample"));
502
503 assert!(!host_contains_label_icase("", "example"));
505 assert!(!host_contains_label_icase("example.com", ""));
506
507 assert!(host_contains_label_icase("example.com.", "com"));
509 assert!(host_contains_label_icase("example.com.", "example"));
510 }
511
512 #[test]
513 fn test_first_label_edge_cases() {
514 assert_eq!(first_label("www.example.com"), "www");
515 assert_eq!(first_label("example.com"), "example");
516 assert_eq!(first_label("localhost"), "localhost");
517 assert_eq!(first_label("example.com."), "example");
518 }
519
520 #[test]
521 fn test_base_domain_from_host_edge_cases() {
522 assert_eq!(base_domain_from_host("example.com"), "example.com");
524
525 assert_eq!(base_domain_from_host("www.example.com"), "example.com");
527 assert_eq!(base_domain_from_host("m.example.com"), "example.com");
528
529 assert_eq!(base_domain_from_host("example.co.uk"), "example.co.uk");
531 assert_eq!(base_domain_from_host("www.example.co.uk"), "example.co.uk");
532
533 assert_eq!(base_domain_from_host("localhost"), "localhost");
535
536 assert_eq!(base_domain_from_host("example.com."), "example.com");
538 }
539
540 #[test]
541 fn test_host_is_subdomain_of_edge_cases() {
542 assert!(host_is_subdomain_of("example.com.", "example.com."));
544 assert!(host_is_subdomain_of("sub.example.com.", "example.com."));
545
546 assert!(!host_is_subdomain_of("example.com", ""));
548
549 assert!(host_is_subdomain_of("example.com", "example.com"));
551
552 assert!(!host_is_subdomain_of("com", "example.com"));
554 }
555}